spell.c source code [neovim/src/nvim/spell.c]

1	// This is an open source non-commercial project. Dear PVS-Studio, please check
2	// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
3
4	// spell.c: code for spell checking
5	//
6	// See spellfile.c for the Vim spell file format.
7	//
8	// The spell checking mechanism uses a tree (aka trie). Each node in the tree
9	// has a list of bytes that can appear (siblings). For each byte there is a
10	// pointer to the node with the byte that follows in the word (child).
11	//
12	// A NUL byte is used where the word may end. The bytes are sorted, so that
13	// binary searching can be used and the NUL bytes are at the start. The
14	// number of possible bytes is stored before the list of bytes.
15	//
16	// The tree uses two arrays: "byts" stores the characters, "idxs" stores
17	// either the next index or flags. The tree starts at index 0. For example,
18	// to lookup "vi" this sequence is followed:
19	// i = 0
20	// len = byts[i]
21	// n = where "v" appears in byts[i + 1] to byts[i + len]
22	// i = idxs[n]
23	// len = byts[i]
24	// n = where "i" appears in byts[i + 1] to byts[i + len]
25	// i = idxs[n]
26	// len = byts[i]
27	// find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
28	//
29	// There are two word trees: one with case-folded words and one with words in
30	// original case. The second one is only used for keep-case words and is
31	// usually small.
32	//
33	// There is one additional tree for when not all prefixes are applied when
34	// generating the .spl file. This tree stores all the possible prefixes, as
35	// if they were words. At each word (prefix) end the prefix nr is stored, the
36	// following word must support this prefix nr. And the condition nr is
37	// stored, used to lookup the condition that the word must match with.
38	//
39	// Thanks to Olaf Seibert for providing an example implementation of this tree
40	// and the compression mechanism.
41	// LZ trie ideas:
42	// http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf
43	// More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html
44	//
45	// Matching involves checking the caps type: Onecap ALLCAP KeepCap.
46	//
47	// Why doesn't Vim use aspell/ispell/myspell/etc.?
48	// See ":help develop-spell".
49
50	// Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word.
51	// Only use it for small word lists!
52
53	// Use SPELL_COMPRESS_ALLWAYS for debugging: compress the word tree after
54	// adding a word. Only use it for small word lists!
55
56	// Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a
57	// specific word.
58
59	// Use this to adjust the score after finding suggestions, based on the
60	// suggested word sounding like the bad word. This is much faster than doing
61	// it for every possible suggestion.
62	// Disadvantage: When "the" is typed as "hte" it sounds quite different ("@"
63	// vs "ht") and goes down in the list.
64	// Used when 'spellsuggest' is set to "best".
65	#define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
66
67	// Do the opposite: based on a maximum end score and a known sound score,
68	// compute the maximum word score that can be used.
69	#define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3)
70
71	#include <assert.h>
72	#include <inttypes.h>
73	#include <limits.h>
74	#include <stdbool.h>
75	#include <string.h>
76	#include <stdlib.h>
77	#include <wctype.h>
78
79	/ for offsetof() /
80	#include <stddef.h>
81
82	#include "nvim/vim.h"
83	#include "nvim/ascii.h"
84	#include "nvim/spell.h"
85	#include "nvim/buffer.h"
86	#include "nvim/change.h"
87	#include "nvim/charset.h"
88	#include "nvim/cursor.h"
89	#include "nvim/edit.h"
90	#include "nvim/eval.h"
91	#include "nvim/ex_cmds.h"
92	#include "nvim/ex_cmds2.h"
93	#include "nvim/ex_docmd.h"
94	#include "nvim/fileio.h"
95	#include "nvim/func_attr.h"
96	#include "nvim/getchar.h"
97	#include "nvim/hashtab.h"
98	#include "nvim/mark.h"
99	#include "nvim/mbyte.h"
100	#include "nvim/memline.h"
101	#include "nvim/memory.h"
102	#include "nvim/message.h"
103	#include "nvim/misc1.h"
104	#include "nvim/garray.h"
105	#include "nvim/normal.h"
106	#include "nvim/option.h"
107	#include "nvim/os_unix.h"
108	#include "nvim/path.h"
109	#include "nvim/regexp.h"
110	#include "nvim/screen.h"
111	#include "nvim/search.h"
112	#include "nvim/spellfile.h"
113	#include "nvim/strings.h"
114	#include "nvim/syntax.h"
115	#include "nvim/undo.h"
116	#include "nvim/os/os.h"
117	#include "nvim/os/input.h"
118
119	// only used for su_badflags
120	#define WF_MIXCAP 0x20 // mix of upper and lower case: macaRONI
121
122	#define WF_CAPMASK (WF_ONECAP \| WF_ALLCAP \| WF_KEEPCAP \| WF_FIXCAP)
123
124	// Result values. Lower number is accepted over higher one.
125	#define SP_BANNED -1
126	#define SP_RARE 0
127	#define SP_OK 1
128	#define SP_LOCAL 2
129	#define SP_BAD 3
130
131	// First language that is loaded, start of the linked list of loaded
132	// languages.
133	slang_T *first_lang = NULL;
134
135	// file used for "zG" and "zW"
136	char_u *int_wordlist = NULL;
137
138	typedef struct wordcount_S {
139	uint16_t wc_count; // nr of times word was seen
140	char_u wc_word[`1`]; // word, actually longer
141	} wordcount_T;
142
143	#define WC_KEY_OFF offsetof(wordcount_T, wc_word)
144	#define HI2WC(hi) ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF))
145	#define MAXWORDCOUNT 0xffff
146
147	// Information used when looking for suggestions.
148	typedef struct suginfo_S {
149	garray_T su_ga; // suggestions, contains "suggest_T"
150	int su_maxcount; // max. number of suggestions displayed
151	int su_maxscore; // maximum score for adding to su_ga
152	int su_sfmaxscore; // idem, for when doing soundfold words
153	garray_T su_sga; // like su_ga, sound-folded scoring
154	char_u su_badptr; // start of bad word in line*
155	int su_badlen; // length of detected bad word in line
156	int su_badflags; // caps flags for bad word
157	char_u su_badword[MAXWLEN]; // bad word truncated at su_badlen
158	char_u su_fbadword[MAXWLEN]; // su_badword case-folded
159	char_u su_sal_badword[MAXWLEN]; // su_badword soundfolded
160	hashtab_T su_banned; // table with banned words
161	slang_T su_sallang; // default language for sound folding*
162	} suginfo_T;
163
164	// One word suggestion. Used in "si_ga".
165	typedef struct {
166	char_u st_word; // suggested word, allocated string*
167	int st_wordlen; // STRLEN(st_word)
168	int st_orglen; // length of replaced text
169	int st_score; // lower is better
170	int st_altscore; // used when st_score compares equal
171	bool st_salscore; // st_score is for soundalike
172	bool st_had_bonus; // bonus already included in score
173	slang_T st_slang; // language used for sound folding*
174	} suggest_T;
175
176	#define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
177
178	// True if a word appears in the list of banned words.
179	#define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word)))
180
181	// Number of suggestions kept when cleaning up. We need to keep more than
182	// what is displayed, because when rescore_suggestions() is called the score
183	// may change and wrong suggestions may be removed later.
184	#define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < \
185	130 ? 150 : (su)->su_maxcount + 20)
186
187	// Threshold for sorting and cleaning up suggestions. Don't want to keep lots
188	// of suggestions that are not going to be displayed.
189	#define SUG_MAX_COUNT(su) (SUG_CLEAN_COUNT(su) + 50)
190
191	// score for various changes
192	#define SCORE_SPLIT 149 // split bad word
193	#define SCORE_SPLIT_NO 249 // split bad word with NOSPLITSUGS
194	#define SCORE_ICASE 52 // slightly different case
195	#define SCORE_REGION 200 // word is for different region
196	#define SCORE_RARE 180 // rare word
197	#define SCORE_SWAP 75 // swap two characters
198	#define SCORE_SWAP3 110 // swap two characters in three
199	#define SCORE_REP 65 // REP replacement
200	#define SCORE_SUBST 93 // substitute a character
201	#define SCORE_SIMILAR 33 // substitute a similar character
202	#define SCORE_SUBCOMP 33 // substitute a composing character
203	#define SCORE_DEL 94 // delete a character
204	#define SCORE_DELDUP 66 // delete a duplicated character
205	#define SCORE_DELCOMP 28 // delete a composing character
206	#define SCORE_INS 96 // insert a character
207	#define SCORE_INSDUP 67 // insert a duplicate character
208	#define SCORE_INSCOMP 30 // insert a composing character
209	#define SCORE_NONWORD 103 // change non-word to word char
210
211	#define SCORE_FILE 30 // suggestion from a file
212	#define SCORE_MAXINIT 350 // Initial maximum score: higher == slower.
213	// 350 allows for about three changes.
214
215	#define SCORE_COMMON1 30 // subtracted for words seen before
216	#define SCORE_COMMON2 40 // subtracted for words often seen
217	#define SCORE_COMMON3 50 // subtracted for words very often seen
218	#define SCORE_THRES2 10 // word count threshold for COMMON2
219	#define SCORE_THRES3 100 // word count threshold for COMMON3
220
221	// When trying changed soundfold words it becomes slow when trying more than
222	// two changes. With less then two changes it's slightly faster but we miss a
223	// few good suggestions. In rare cases we need to try three of four changes.
224	#define SCORE_SFMAX1 200 // maximum score for first try
225	#define SCORE_SFMAX2 300 // maximum score for second try
226	#define SCORE_SFMAX3 400 // maximum score for third try
227
228	#define SCORE_BIG SCORE_INS * 3 // big difference
229	#define SCORE_MAXMAX 999999 // accept any score
230	#define SCORE_LIMITMAX 350 // for spell_edit_score_limit()
231
232	// for spell_edit_score_limit() we need to know the minimum value of
233	// SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS
234	#define SCORE_EDIT_MIN SCORE_SIMILAR
235
236	// Structure to store info for word matching.
237	typedef struct matchinf_S {
238	langp_T mi_lp; // info for language and region*
239
240	// pointers to original text to be checked
241	char_u mi_word; // start of word being checked*
242	char_u mi_end; // end of matching word so far*
243	char_u mi_fend; // next char to be added to mi_fword*
244	char_u mi_cend; // char after what was used for*
245	// mi_capflags
246
247	// case-folded text
248	char_u mi_fword[MAXWLEN + `1`]; // mi_word case-folded
249	int mi_fwordlen; // nr of valid bytes in mi_fword
250
251	// for when checking word after a prefix
252	int mi_prefarridx; // index in sl_pidxs with list of
253	// affixID/condition
254	int mi_prefcnt; // number of entries at mi_prefarridx
255	int mi_prefixlen; // byte length of prefix
256	int mi_cprefixlen; // byte length of prefix in original
257	// case
258
259	// for when checking a compound word
260	int mi_compoff; // start of following word offset
261	char_u mi_compflags[MAXWLEN]; // flags for compound words used
262	int mi_complen; // nr of compound words used
263	int mi_compextra; // nr of COMPOUNDROOT words
264
265	// others
266	int mi_result; // result so far: SP_BAD, SP_OK, etc.
267	int mi_capflags; // WF_ONECAP WF_ALLCAP WF_KEEPCAP
268	win_T mi_win; // buffer being checked*
269
270	// for NOBREAK
271	int mi_result2; // "mi_resul" without following word
272	char_u mi_end2; // "mi_end" without following word*
273	} matchinf_T;
274
275	// Structure used for the cookie argument of do_in_runtimepath().
276	typedef struct spelload_S {
277	char_u sl_lang[MAXWLEN + `1`]; // language name
278	slang_T sl_slang; // resulting slang_T struct*
279	int sl_nobreak; // NOBREAK language found
280	} spelload_T;
281
282	#define SY_MAXLEN 30
283	typedef struct syl_item_S {
284	char_u sy_chars[SY_MAXLEN]; // the sequence of chars
285	int sy_len;
286	} syl_item_T;
287
288	spelltab_T spelltab;
289	int did_set_spelltab;
290
291	// structure used to store soundfolded words that add_sound_suggest() has
292	// handled already.
293	typedef struct {
294	short sft_score; // lowest score used
295	char_u sft_word[`1`]; // soundfolded word, actually longer
296	} sftword_T;
297
298	typedef struct {
299	int badi;
300	int goodi;
301	int score;
302	} limitscore_T;
303
304
305	#ifdef INCLUDE_GENERATED_DECLARATIONS
306	# include "spell.c.generated.h"
307	#endif
308
309	// values for ts_isdiff
310	#define DIFF_NONE 0 // no different byte (yet)
311	#define DIFF_YES 1 // different byte found
312	#define DIFF_INSERT 2 // inserting character
313
314	// values for ts_flags
315	#define TSF_PREFIXOK 1 // already checked that prefix is OK
316	#define TSF_DIDSPLIT 2 // tried split at this point
317	#define TSF_DIDDEL 4 // did a delete, "ts_delidx" has index
318
319	// special values ts_prefixdepth
320	#define PFD_NOPREFIX 0xff // not using prefixes
321	#define PFD_PREFIXTREE 0xfe // walking through the prefix tree
322	#define PFD_NOTSPECIAL 0xfd // highest value that's not special
323
324	// mode values for find_word
325	#define FIND_FOLDWORD 0 // find word case-folded
326	#define FIND_KEEPWORD 1 // find keep-case word
327	#define FIND_PREFIX 2 // find word after prefix
328	#define FIND_COMPOUND 3 // find case-folded compound word
329	#define FIND_KEEPCOMPOUND 4 // find keep-case compound word
330
331	char *e_format = N_("E759: Format error in spell file");
332
333	// Remember what "z?" replaced.
334	static char_u *repl_from = NULL;
335	static char_u *repl_to = NULL;
336
337	// Main spell-checking function.
338	// "ptr" points to a character that could be the start of a word.
339	// "attrp" is set to the highlight index for a badly spelled word. For a*
340	// non-word or when it's OK it remains unchanged.
341	// This must only be called when 'spelllang' is not empty.
342	//
343	// "capcol" is used to check for a Capitalised word after the end of a
344	// sentence. If it's zero then perform the check. Return the column where to
345	// check next, or -1 when no sentence end was found. If it's NULL then don't
346	// worry.
347	//
348	// Returns the length of the word in bytes, also when it's OK, so that the
349	// caller can skip over the word.
350	size_t spell_check(
351	win_T wp, // current window*
352	char_u *ptr,
353	hlf_T *attrp,
354	int capcol, // column to check for Capital*
355	bool docount // count good words
356	)
357	{
358	matchinf_T mi; // Most things are put in "mi" so that it can
359	// be passed to functions quickly.
360	size_t nrlen = `0`; // found a number first
361	int c;
362	size_t wrongcaplen = `0`;
363	int lpi;
364	bool count_word = docount;
365
366	// A word never starts at a space or a control character. Return quickly
367	// then, skipping over the character.
368	if (*ptr <= `' '`) {
369	return `1`;
370	}
371
372	// Return here when loading language files failed.
373	if (GA_EMPTY(&wp->w_s->b_langp)) {
374	return `1`;
375	}
376
377	memset(&mi, `0`, sizeof(matchinf_T));
378
379	// A number is always OK. Also skip hexadecimal numbers 0xFF99 and
380	// 0X99FF. But always do check spelling to find "3GPP" and "11
381	// julifeest".
382	if (ptr >= `'0'` && ptr <= `'9'`) {
383	if (*ptr == `'0'` && (ptr[`1`] == `'b'` \|\| ptr[`1`] == `'B'`)) {
384	mi.mi_end = (char_u) skipbin((char**) ptr + `2`);
385	} else if (*ptr == `'0'` && (ptr[`1`] == `'x'` \|\| ptr[`1`] == `'X'`)) {
386	mi.mi_end = skiphex(ptr + `2`);
387	} else {
388	mi.mi_end = skipdigits(ptr);
389	}
390	nrlen = (size_t)(mi.mi_end - ptr);
391	}
392
393	// Find the normal end of the word (until the next non-word character).
394	mi.mi_word = ptr;
395	mi.mi_fend = ptr;
396	if (spell_iswordp(mi.mi_fend, wp)) {
397	do {
398	MB_PTR_ADV(mi.mi_fend);
399	} while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp));
400
401	if (capcol != NULL && *capcol == `0` && wp->w_s->b_cap_prog != NULL) {
402	// Check word starting with capital letter.
403	c = PTR2CHAR(ptr);
404	if (!SPELL_ISUPPER(c)) {
405	wrongcaplen = (size_t)(mi.mi_fend - ptr);
406	}
407	}
408	}
409	if (capcol != NULL) {
410	*capcol = -`1`;
411	}
412
413	// We always use the characters up to the next non-word character,
414	// also for bad words.
415	mi.mi_end = mi.mi_fend;
416
417	// Check caps type later.
418	mi.mi_capflags = `0`;
419	mi.mi_cend = NULL;
420	mi.mi_win = wp;
421
422	// case-fold the word with one non-word character, so that we can check
423	// for the word end.
424	if (*mi.mi_fend != NUL) {
425	MB_PTR_ADV(mi.mi_fend);
426	}
427
428	(void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword, MAXWLEN + `1`);
429	mi.mi_fwordlen = (int)STRLEN(mi.mi_fword);
430
431	// The word is bad unless we recognize it.
432	mi.mi_result = SP_BAD;
433	mi.mi_result2 = SP_BAD;
434
435	// Loop over the languages specified in 'spelllang'.
436	// We check them all, because a word may be matched longer in another
437	// language.
438	for (lpi = `0`; lpi < wp->w_s->b_langp.ga_len; ++lpi) {
439	mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi);
440
441	// If reloading fails the language is still in the list but everything
442	// has been cleared.
443	if (mi.mi_lp->lp_slang->sl_fidxs == NULL) {
444	continue;
445	}
446
447	// Check for a matching word in case-folded words.
448	find_word(&mi, FIND_FOLDWORD);
449
450	// Check for a matching word in keep-case words.
451	find_word(&mi, FIND_KEEPWORD);
452
453	// Check for matching prefixes.
454	find_prefix(&mi, FIND_FOLDWORD);
455
456	// For a NOBREAK language, may want to use a word without a following
457	// word as a backup.
458	if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
459	&& mi.mi_result2 != SP_BAD) {
460	mi.mi_result = mi.mi_result2;
461	mi.mi_end = mi.mi_end2;
462	}
463
464	// Count the word in the first language where it's found to be OK.
465	if (count_word && mi.mi_result == SP_OK) {
466	count_common_word(mi.mi_lp->lp_slang, ptr,
467	(int)(mi.mi_end - ptr), `1`);
468	count_word = false;
469	}
470	}
471
472	if (mi.mi_result != SP_OK) {
473	// If we found a number skip over it. Allows for "42nd". Do flag
474	// rare and local words, e.g., "3GPP".
475	if (nrlen > `0`) {
476	if (mi.mi_result == SP_BAD \|\| mi.mi_result == SP_BANNED) {
477	return nrlen;
478	}
479	} else if (!spell_iswordp_nmw(ptr, wp)) {
480	// When we are at a non-word character there is no error, just
481	// skip over the character (try looking for a word after it).
482	if (capcol != NULL && wp->w_s->b_cap_prog != NULL) {
483	regmatch_T regmatch;
484
485	// Check for end of sentence.
486	regmatch.regprog = wp->w_s->b_cap_prog;
487	regmatch.rm_ic = false;
488	int r = vim_regexec(&regmatch, ptr, `0`);
489	wp->w_s->b_cap_prog = regmatch.regprog;
490	if (r) {
491	capcol = (int*)(regmatch.endp[`0`] - ptr);
492	}
493	}
494
495	if (has_mbyte) {
496	return (size_t)(*mb_ptr2len)(ptr);
497	}
498	return `1`;
499	} else if (mi.mi_end == ptr) {
500	// Always include at least one character. Required for when there
501	// is a mixup in "midword".
502	MB_PTR_ADV(mi.mi_end);
503	} else if (mi.mi_result == SP_BAD
504	&& LANGP_ENTRY(wp->w_s->b_langp, `0`)->lp_slang->sl_nobreak) {
505	char_u p, fp;
506	int save_result = mi.mi_result;
507
508	// First language in 'spelllang' is NOBREAK. Find first position
509	// at which any word would be valid.
510	mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, `0`);
511	if (mi.mi_lp->lp_slang->sl_fidxs != NULL) {
512	p = mi.mi_word;
513	fp = mi.mi_fword;
514	for (;;) {
515	MB_PTR_ADV(p);
516	MB_PTR_ADV(fp);
517	if (p >= mi.mi_end) {
518	break;
519	}
520	mi.mi_compoff = (int)(fp - mi.mi_fword);
521	find_word(&mi, FIND_COMPOUND);
522	if (mi.mi_result != SP_BAD) {
523	mi.mi_end = p;
524	break;
525	}
526	}
527	mi.mi_result = save_result;
528	}
529	}
530
531	if (mi.mi_result == SP_BAD \|\| mi.mi_result == SP_BANNED) {
532	*attrp = HLF_SPB;
533	} else if (mi.mi_result == SP_RARE) {
534	*attrp = HLF_SPR;
535	} else {
536	*attrp = HLF_SPL;
537	}
538	}
539
540	if (wrongcaplen > `0` && (mi.mi_result == SP_OK \|\| mi.mi_result == SP_RARE)) {
541	// Report SpellCap only when the word isn't badly spelled.
542	*attrp = HLF_SPC;
543	return wrongcaplen;
544	}
545
546	return (size_t)(mi.mi_end - ptr);
547	}
548
549	// Check if the word at "mip->mi_word" is in the tree.
550	// When "mode" is FIND_FOLDWORD check in fold-case word tree.
551	// When "mode" is FIND_KEEPWORD check in keep-case word tree.
552	// When "mode" is FIND_PREFIX check for word after prefix in fold-case word
553	// tree.
554	//
555	// For a match mip->mi_result is updated.
556	static void find_word(matchinf_T mip, int* mode)
557	{
558	int wlen = `0`;
559	int flen;
560	char_u *ptr;
561	slang_T *slang = mip->mi_lp->lp_slang;
562	char_u *byts;
563	idx_T *idxs;
564
565	if (mode == FIND_KEEPWORD \|\| mode == FIND_KEEPCOMPOUND) {
566	// Check for word with matching case in keep-case tree.
567	ptr = mip->mi_word;
568	flen = `9999`; // no case folding, always enough bytes
569	byts = slang->sl_kbyts;
570	idxs = slang->sl_kidxs;
571
572	if (mode == FIND_KEEPCOMPOUND)
573	// Skip over the previously found word(s).
574	wlen += mip->mi_compoff;
575	} else {
576	// Check for case-folded in case-folded tree.
577	ptr = mip->mi_fword;
578	flen = mip->mi_fwordlen; // available case-folded bytes
579	byts = slang->sl_fbyts;
580	idxs = slang->sl_fidxs;
581
582	if (mode == FIND_PREFIX) {
583	// Skip over the prefix.
584	wlen = mip->mi_prefixlen;
585	flen -= mip->mi_prefixlen;
586	} else if (mode == FIND_COMPOUND) {
587	// Skip over the previously found word(s).
588	wlen = mip->mi_compoff;
589	flen -= mip->mi_compoff;
590	}
591
592	}
593
594	if (byts == NULL)
595	return; // array is empty
596
597	idx_T arridx = `0`;
598	int endlen[MAXWLEN]; // length at possible word endings
599	idx_T endidx[MAXWLEN]; // possible word endings
600	int endidxcnt = `0`;
601	int len;
602	int c;
603
604	// Repeat advancing in the tree until:
605	// - there is a byte that doesn't match,
606	// - we reach the end of the tree,
607	// - or we reach the end of the line.
608	for (;; ) {
609	if (flen <= `0` && *mip->mi_fend != NUL)
610	flen = fold_more(mip);
611
612	len = byts[arridx++];
613
614	// If the first possible byte is a zero the word could end here.
615	// Remember this index, we first check for the longest word.
616	if (byts[arridx] == `0`) {
617	if (endidxcnt == MAXWLEN) {
618	// Must be a corrupted spell file.
619	EMSG(_(e_format));
620	return;
621	}
622	endlen[endidxcnt] = wlen;
623	endidx[endidxcnt++] = arridx++;
624	--len;
625
626	// Skip over the zeros, there can be several flag/region
627	// combinations.
628	while (len > `0` && byts[arridx] == `0`) {
629	++arridx;
630	--len;
631	}
632	if (len == `0`)
633	break; // no children, word must end here
634	}
635
636	// Stop looking at end of the line.
637	if (ptr[wlen] == NUL)
638	break;
639
640	// Perform a binary search in the list of accepted bytes.
641	c = ptr[wlen];
642	if (c == TAB) // <Tab> is handled like <Space>
643	c = `' '`;
644	idx_T lo = arridx;
645	idx_T hi = arridx + len - `1`;
646	while (lo < hi) {
647	idx_T m = (lo + hi) / `2`;
648	if (byts[m] > c)
649	hi = m - `1`;
650	else if (byts[m] < c)
651	lo = m + `1`;
652	else {
653	lo = hi = m;
654	break;
655	}
656	}
657
658	// Stop if there is no matching byte.
659	if (hi < lo \|\| byts[lo] != c)
660	break;
661
662	// Continue at the child (if there is one).
663	arridx = idxs[lo];
664	++wlen;
665	--flen;
666
667	// One space in the good word may stand for several spaces in the
668	// checked word.
669	if (c == `' '`) {
670	for (;; ) {
671	if (flen <= `0` && *mip->mi_fend != NUL)
672	flen = fold_more(mip);
673	if (ptr[wlen] != `' '` && ptr[wlen] != TAB)
674	break;
675	++wlen;
676	--flen;
677	}
678	}
679	}
680
681	char_u *p;
682	bool word_ends;
683
684	// Verify that one of the possible endings is valid. Try the longest
685	// first.
686	while (endidxcnt > `0`) {
687	--endidxcnt;
688	arridx = endidx[endidxcnt];
689	wlen = endlen[endidxcnt];
690
691	if (utf_head_off(ptr, ptr + wlen) > `0`) {
692	continue; // not at first byte of character
693	}
694	if (spell_iswordp(ptr + wlen, mip->mi_win)) {
695	if (slang->sl_compprog == NULL && !slang->sl_nobreak)
696	continue; // next char is a word character
697	word_ends = false;
698	} else
699	word_ends = true;
700	// The prefix flag is before compound flags. Once a valid prefix flag
701	// has been found we try compound flags.
702	bool prefix_found = false;
703
704	if (mode != FIND_KEEPWORD && has_mbyte) {
705	// Compute byte length in original word, length may change
706	// when folding case. This can be slow, take a shortcut when the
707	// case-folded word is equal to the keep-case word.
708	p = mip->mi_word;
709	if (STRNCMP(ptr, p, wlen) != `0`) {
710	for (char_u *s = ptr; s < ptr + wlen; MB_PTR_ADV(s)) {
711	MB_PTR_ADV(p);
712	}
713	wlen = (int)(p - mip->mi_word);
714	}
715	}
716
717	// Check flags and region. For FIND_PREFIX check the condition and
718	// prefix ID.
719	// Repeat this if there are more flags/region alternatives until there
720	// is a match.
721	for (len = byts[arridx - `1`]; len > `0` && byts[arridx] == `0`;
722	--len, ++arridx) {
723	uint32_t flags = idxs[arridx];
724
725	// For the fold-case tree check that the case of the checked word
726	// matches with what the word in the tree requires.
727	// For keep-case tree the case is always right. For prefixes we
728	// don't bother to check.
729	if (mode == FIND_FOLDWORD) {
730	if (mip->mi_cend != mip->mi_word + wlen) {
731	// mi_capflags was set for a different word length, need
732	// to do it again.
733	mip->mi_cend = mip->mi_word + wlen;
734	mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
735	}
736
737	if (mip->mi_capflags == WF_KEEPCAP
738	\|\| !spell_valid_case(mip->mi_capflags, flags))
739	continue;
740	}
741	// When mode is FIND_PREFIX the word must support the prefix:
742	// check the prefix ID and the condition. Do that for the list at
743	// mip->mi_prefarridx that find_prefix() filled.
744	else if (mode == FIND_PREFIX && !prefix_found) {
745	c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
746	flags,
747	mip->mi_word + mip->mi_cprefixlen, slang,
748	false);
749	if (c == `0`)
750	continue;
751
752	// Use the WF_RARE flag for a rare prefix.
753	if (c & WF_RAREPFX)
754	flags \|= WF_RARE;
755	prefix_found = true;
756	}
757
758	if (slang->sl_nobreak) {
759	if ((mode == FIND_COMPOUND \|\| mode == FIND_KEEPCOMPOUND)
760	&& (flags & WF_BANNED) == `0`) {
761	// NOBREAK: found a valid following word. That's all we
762	// need to know, so return.
763	mip->mi_result = SP_OK;
764	break;
765	}
766	} else if ((mode == FIND_COMPOUND \|\| mode == FIND_KEEPCOMPOUND
767	\|\| !word_ends)) {
768	// If there is no compound flag or the word is shorter than
769	// COMPOUNDMIN reject it quickly.
770	// Makes you wonder why someone puts a compound flag on a word
771	// that's too short... Myspell compatibility requires this
772	// anyway.
773	if (((unsigned)flags >> `24`) == `0`
774	\|\| wlen - mip->mi_compoff < slang->sl_compminlen)
775	continue;
776	// For multi-byte chars check character length against
777	// COMPOUNDMIN.
778	if (has_mbyte
779	&& slang->sl_compminlen > `0`
780	&& mb_charlen_len(mip->mi_word + mip->mi_compoff,
781	wlen - mip->mi_compoff) < slang->sl_compminlen)
782	continue;
783
784	// Limit the number of compound words to COMPOUNDWORDMAX if no
785	// maximum for syllables is specified.
786	if (!word_ends && mip->mi_complen + mip->mi_compextra + `2`
787	> slang->sl_compmax
788	&& slang->sl_compsylmax == MAXWLEN)
789	continue;
790
791	// Don't allow compounding on a side where an affix was added,
792	// unless COMPOUNDPERMITFLAG was used.
793	if (mip->mi_complen > `0` && (flags & WF_NOCOMPBEF))
794	continue;
795	if (!word_ends && (flags & WF_NOCOMPAFT))
796	continue;
797
798	// Quickly check if compounding is possible with this flag.
799	if (!byte_in_str(mip->mi_complen == `0`
800	? slang->sl_compstartflags
801	: slang->sl_compallflags,
802	((unsigned)flags >> `24`)))
803	continue;
804
805	// If there is a match with a CHECKCOMPOUNDPATTERN rule
806	// discard the compound word.
807	if (match_checkcompoundpattern(ptr, wlen, &slang->sl_comppat))
808	continue;
809
810	if (mode == FIND_COMPOUND) {
811	int capflags;
812
813	// Need to check the caps type of the appended compound
814	// word.
815	if (has_mbyte && STRNCMP(ptr, mip->mi_word,
816	mip->mi_compoff) != `0`) {
817	// case folding may have changed the length
818	p = mip->mi_word;
819	for (char_u *s = ptr; s < ptr + mip->mi_compoff; MB_PTR_ADV(s)) {
820	MB_PTR_ADV(p);
821	}
822	} else {
823	p = mip->mi_word + mip->mi_compoff;
824	}
825	capflags = captype(p, mip->mi_word + wlen);
826	if (capflags == WF_KEEPCAP \|\| (capflags == WF_ALLCAP
827	&& (flags & WF_FIXCAP) != `0`))
828	continue;
829
830	if (capflags != WF_ALLCAP) {
831	// When the character before the word is a word
832	// character we do not accept a Onecap word. We do
833	// accept a no-caps word, even when the dictionary
834	// word specifies ONECAP.
835	MB_PTR_BACK(mip->mi_word, p);
836	if (spell_iswordp_nmw(p, mip->mi_win)
837	? capflags == WF_ONECAP
838	: (flags & WF_ONECAP) != `0`
839	&& capflags != WF_ONECAP) {
840	continue;
841	}
842	}
843	}
844
845	// If the word ends the sequence of compound flags of the
846	// words must match with one of the COMPOUNDRULE items and
847	// the number of syllables must not be too large.
848	mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> `24`);
849	mip->mi_compflags[mip->mi_complen + `1`] = NUL;
850	if (word_ends) {
851	char_u fword[MAXWLEN];
852
853	if (slang->sl_compsylmax < MAXWLEN) {
854	// "fword" is only needed for checking syllables.
855	if (ptr == mip->mi_word)
856	(void)spell_casefold(ptr, wlen, fword, MAXWLEN);
857	else
858	STRLCPY(fword, ptr, endlen[endidxcnt] + `1`);
859	}
860	if (!can_compound(slang, fword, mip->mi_compflags))
861	continue;
862	} else if (slang->sl_comprules != NULL
863	&& !match_compoundrule(slang, mip->mi_compflags))
864	// The compound flags collected so far do not match any
865	// COMPOUNDRULE, discard the compounded word.
866	continue;
867	}
868	// Check NEEDCOMPOUND: can't use word without compounding.
869	else if (flags & WF_NEEDCOMP)
870	continue;
871
872	int nobreak_result = SP_OK;
873
874	if (!word_ends) {
875	int save_result = mip->mi_result;
876	char_u *save_end = mip->mi_end;
877	langp_T *save_lp = mip->mi_lp;
878
879	// Check that a valid word follows. If there is one and we
880	// are compounding, it will set "mi_result", thus we are
881	// always finished here. For NOBREAK we only check that a
882	// valid word follows.
883	// Recursive!
884	if (slang->sl_nobreak)
885	mip->mi_result = SP_BAD;
886
887	// Find following word in case-folded tree.
888	mip->mi_compoff = endlen[endidxcnt];
889	if (has_mbyte && mode == FIND_KEEPWORD) {
890	// Compute byte length in case-folded word from "wlen":
891	// byte length in keep-case word. Length may change when
892	// folding case. This can be slow, take a shortcut when
893	// the case-folded word is equal to the keep-case word.
894	p = mip->mi_fword;
895	if (STRNCMP(ptr, p, wlen) != `0`) {
896	for (char_u *s = ptr; s < ptr + wlen; MB_PTR_ADV(s)) {
897	MB_PTR_ADV(p);
898	}
899	mip->mi_compoff = (int)(p - mip->mi_fword);
900	}
901	}
902	#if 0
903	c = mip->mi_compoff;
904	#endif
905	++mip->mi_complen;
906	if (flags & WF_COMPROOT)
907	++mip->mi_compextra;
908
909	// For NOBREAK we need to try all NOBREAK languages, at least
910	// to find the ".add" file(s).
911	for (int lpi = `0`; lpi < mip->mi_win->w_s->b_langp.ga_len; ++lpi) {
912	if (slang->sl_nobreak) {
913	mip->mi_lp = LANGP_ENTRY(mip->mi_win->w_s->b_langp, lpi);
914	if (mip->mi_lp->lp_slang->sl_fidxs == NULL
915	\|\| !mip->mi_lp->lp_slang->sl_nobreak)
916	continue;
917	}
918
919	find_word(mip, FIND_COMPOUND);
920
921	// When NOBREAK any word that matches is OK. Otherwise we
922	// need to find the longest match, thus try with keep-case
923	// and prefix too.
924	if (!slang->sl_nobreak \|\| mip->mi_result == SP_BAD) {
925	// Find following word in keep-case tree.
926	mip->mi_compoff = wlen;
927	find_word(mip, FIND_KEEPCOMPOUND);
928
929	#if 0 // Disabled, a prefix must not appear halfway through a compound
930	// word, unless the COMPOUNDPERMITFLAG is used, in which case it
931	// can't be a postponed prefix.
932	if (!slang->sl_nobreak \|\| mip->mi_result == SP_BAD) {
933	// Check for following word with prefix.
934	mip->mi_compoff = c;
935	find_prefix(mip, FIND_COMPOUND);
936	}
937	#endif
938	}
939
940	if (!slang->sl_nobreak)
941	break;
942	}
943	--mip->mi_complen;
944	if (flags & WF_COMPROOT)
945	--mip->mi_compextra;
946	mip->mi_lp = save_lp;
947
948	if (slang->sl_nobreak) {
949	nobreak_result = mip->mi_result;
950	mip->mi_result = save_result;
951	mip->mi_end = save_end;
952	} else {
953	if (mip->mi_result == SP_OK)
954	break;
955	continue;
956	}
957	}
958
959	int res = SP_BAD;
960	if (flags & WF_BANNED)
961	res = SP_BANNED;
962	else if (flags & WF_REGION) {
963	// Check region.
964	if ((mip->mi_lp->lp_region & (flags >> `16`)) != `0`)
965	res = SP_OK;
966	else
967	res = SP_LOCAL;
968	} else if (flags & WF_RARE)
969	res = SP_RARE;
970	else
971	res = SP_OK;
972
973	// Always use the longest match and the best result. For NOBREAK
974	// we separately keep the longest match without a following good
975	// word as a fall-back.
976	if (nobreak_result == SP_BAD) {
977	if (mip->mi_result2 > res) {
978	mip->mi_result2 = res;
979	mip->mi_end2 = mip->mi_word + wlen;
980	} else if (mip->mi_result2 == res
981	&& mip->mi_end2 < mip->mi_word + wlen)
982	mip->mi_end2 = mip->mi_word + wlen;
983	} else if (mip->mi_result > res) {
984	mip->mi_result = res;
985	mip->mi_end = mip->mi_word + wlen;
986	} else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
987	mip->mi_end = mip->mi_word + wlen;
988
989	if (mip->mi_result == SP_OK)
990	break;
991	}
992
993	if (mip->mi_result == SP_OK)
994	break;
995	}
996	}
997
998	// Returns true if there is a match between the word ptr[wlen] and
999	// CHECKCOMPOUNDPATTERN rules, assuming that we will concatenate with another
1000	// word.
1001	// A match means that the first part of CHECKCOMPOUNDPATTERN matches at the
1002	// end of ptr[wlen] and the second part matches after it.
1003	static bool
1004	match_checkcompoundpattern (
1005	char_u *ptr,
1006	int wlen,
1007	garray_T gap // &sl_comppat*
1008	)
1009	{
1010	char_u *p;
1011	int len;
1012
1013	for (int i = `0`; i + `1` < gap->ga_len; i += `2`) {
1014	p = ((char_u **)gap->ga_data)[i + `1`];
1015	if (STRNCMP(ptr + wlen, p, STRLEN(p)) == `0`) {
1016	// Second part matches at start of following compound word, now
1017	// check if first part matches at end of previous word.
1018	p = ((char_u **)gap->ga_data)[i];
1019	len = (int)STRLEN(p);
1020	if (len <= wlen && STRNCMP(ptr + wlen - len, p, len) == `0`)
1021	return true;
1022	}
1023	}
1024	return false;
1025	}
1026
1027	// Returns true if "flags" is a valid sequence of compound flags and "word"
1028	// does not have too many syllables.
1029	static bool can_compound(slang_T slang, char_u word, char_u *flags)
1030	{
1031	char_u uflags[MAXWLEN * `2`];
1032	int i;
1033	char_u *p;
1034
1035	if (slang->sl_compprog == NULL)
1036	return false;
1037	if (enc_utf8) {
1038	// Need to convert the single byte flags to utf8 characters.
1039	p = uflags;
1040	for (i = `0`; flags[i] != NUL; i++) {
1041	p += utf_char2bytes(flags[i], p);
1042	}
1043	*p = NUL;
1044	p = uflags;
1045	} else
1046	p = flags;
1047	if (!vim_regexec_prog(&slang->sl_compprog, false, p, `0`))
1048	return false;
1049
1050	// Count the number of syllables. This may be slow, do it last. If there
1051	// are too many syllables AND the number of compound words is above
1052	// COMPOUNDWORDMAX then compounding is not allowed.
1053	if (slang->sl_compsylmax < MAXWLEN
1054	&& count_syllables(slang, word) > slang->sl_compsylmax)
1055	return (int)STRLEN(flags) < slang->sl_compmax;
1056	return true;
1057	}
1058
1059	// Returns true when the sequence of flags in "compflags" plus "flag" can
1060	// possibly form a valid compounded word. This also checks the COMPOUNDRULE
1061	// lines if they don't contain wildcards.
1062	static bool can_be_compound(trystate_T sp, slang_T slang, char_u compflags, int* flag)
1063	{
1064	// If the flag doesn't appear in sl_compstartflags or sl_compallflags
1065	// then it can't possibly compound.
1066	if (!byte_in_str(sp->ts_complen == sp->ts_compsplit
1067	? slang->sl_compstartflags : slang->sl_compallflags, flag))
1068	return false;
1069
1070	// If there are no wildcards, we can check if the flags collected so far
1071	// possibly can form a match with COMPOUNDRULE patterns. This only
1072	// makes sense when we have two or more words.
1073	if (slang->sl_comprules != NULL && sp->ts_complen > sp->ts_compsplit) {
1074	compflags[sp->ts_complen] = flag;
1075	compflags[sp->ts_complen + `1`] = NUL;
1076	bool v = match_compoundrule(slang, compflags + sp->ts_compsplit);
1077	compflags[sp->ts_complen] = NUL;
1078	return v;
1079	}
1080
1081	return true;
1082	}
1083
1084	// Returns true if the compound flags in compflags[] match the start of any
1085	// compound rule. This is used to stop trying a compound if the flags
1086	// collected so far can't possibly match any compound rule.
1087	// Caller must check that slang->sl_comprules is not NULL.
1088	static bool match_compoundrule(slang_T slang, char_u compflags)
1089	{
1090	char_u *p;
1091	int i;
1092	int c;
1093
1094	// loop over all the COMPOUNDRULE entries
1095	for (p = slang->sl_comprules; *p != NUL; ++p) {
1096	// loop over the flags in the compound word we have made, match
1097	// them against the current rule entry
1098	for (i = `0`;; ++i) {
1099	c = compflags[i];
1100	if (c == NUL)
1101	// found a rule that matches for the flags we have so far
1102	return true;
1103	if (p == `'/'` \|\| p == NUL)
1104	break; // end of rule, it's too short
1105	if (*p == `'['`) {
1106	bool match = false;
1107
1108	// compare against all the flags in []
1109	++p;
1110	while (p != `']'` && p != NUL)
1111	if (*p++ == c)
1112	match = true;
1113	if (!match)
1114	break; // none matches
1115	} else if (*p != c)
1116	break; // flag of word doesn't match flag in pattern
1117	++p;
1118	}
1119
1120	// Skip to the next "/", where the next pattern starts.
1121	p = vim_strchr(p, `'/'`);
1122	if (p == NULL)
1123	break;
1124	}
1125
1126	// Checked all the rules and none of them match the flags, so there
1127	// can't possibly be a compound starting with these flags.
1128	return false;
1129	}
1130
1131	// Return non-zero if the prefix indicated by "arridx" matches with the prefix
1132	// ID in "flags" for the word "word".
1133	// The WF_RAREPFX flag is included in the return value for a rare prefix.
1134	static int
1135	valid_word_prefix (
1136	int totprefcnt, // nr of prefix IDs
1137	int arridx, // idx in sl_pidxs[]
1138	int flags,
1139	char_u *word,
1140	slang_T *slang,
1141	bool cond_req // only use prefixes with a condition
1142	)
1143	{
1144	int prefcnt;
1145	int pidx;
1146	int prefid;
1147
1148	prefid = (unsigned)flags >> `24`;
1149	for (prefcnt = totprefcnt - `1`; prefcnt >= `0`; --prefcnt) {
1150	pidx = slang->sl_pidxs[arridx + prefcnt];
1151
1152	// Check the prefix ID.
1153	if (prefid != (pidx & `0xff`))
1154	continue;
1155
1156	// Check if the prefix doesn't combine and the word already has a
1157	// suffix.
1158	if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC))
1159	continue;
1160
1161	// Check the condition, if there is one. The condition index is
1162	// stored in the two bytes above the prefix ID byte.
1163	regprog_T *rp = &slang->sl_prefprog[((unsigned*)pidx >> `8`) & `0xffff`];
1164	if (*rp != NULL) {
1165	if (!vim_regexec_prog(rp, false, word, `0`)) {
1166	continue;
1167	}
1168	} else if (cond_req)
1169	continue;
1170
1171	// It's a match! Return the WF_ flags.
1172	return pidx;
1173	}
1174	return `0`;
1175	}
1176
1177	// Check if the word at "mip->mi_word" has a matching prefix.
1178	// If it does, then check the following word.
1179	//
1180	// If "mode" is "FIND_COMPOUND" then do the same after another word, find a
1181	// prefix in a compound word.
1182	//
1183	// For a match mip->mi_result is updated.
1184	static void find_prefix(matchinf_T mip, int* mode)
1185	{
1186	idx_T arridx = `0`;
1187	int len;
1188	int wlen = `0`;
1189	int flen;
1190	int c;
1191	char_u *ptr;
1192	idx_T lo, hi, m;
1193	slang_T *slang = mip->mi_lp->lp_slang;
1194	char_u *byts;
1195	idx_T *idxs;
1196
1197	byts = slang->sl_pbyts;
1198	if (byts == NULL)
1199	return; // array is empty
1200
1201	// We use the case-folded word here, since prefixes are always
1202	// case-folded.
1203	ptr = mip->mi_fword;
1204	flen = mip->mi_fwordlen; // available case-folded bytes
1205	if (mode == FIND_COMPOUND) {
1206	// Skip over the previously found word(s).
1207	ptr += mip->mi_compoff;
1208	flen -= mip->mi_compoff;
1209	}
1210	idxs = slang->sl_pidxs;
1211
1212	// Repeat advancing in the tree until:
1213	// - there is a byte that doesn't match,
1214	// - we reach the end of the tree,
1215	// - or we reach the end of the line.
1216	for (;; ) {
1217	if (flen == `0` && *mip->mi_fend != NUL)
1218	flen = fold_more(mip);
1219
1220	len = byts[arridx++];
1221
1222	// If the first possible byte is a zero the prefix could end here.
1223	// Check if the following word matches and supports the prefix.
1224	if (byts[arridx] == `0`) {
1225	// There can be several prefixes with different conditions. We
1226	// try them all, since we don't know which one will give the
1227	// longest match. The word is the same each time, pass the list
1228	// of possible prefixes to find_word().
1229	mip->mi_prefarridx = arridx;
1230	mip->mi_prefcnt = len;
1231	while (len > `0` && byts[arridx] == `0`) {
1232	++arridx;
1233	--len;
1234	}
1235	mip->mi_prefcnt -= len;
1236
1237	// Find the word that comes after the prefix.
1238	mip->mi_prefixlen = wlen;
1239	if (mode == FIND_COMPOUND)
1240	// Skip over the previously found word(s).
1241	mip->mi_prefixlen += mip->mi_compoff;
1242
1243	if (has_mbyte) {
1244	// Case-folded length may differ from original length.
1245	mip->mi_cprefixlen = nofold_len(mip->mi_fword,
1246	mip->mi_prefixlen, mip->mi_word);
1247	} else
1248	mip->mi_cprefixlen = mip->mi_prefixlen;
1249	find_word(mip, FIND_PREFIX);
1250
1251
1252	if (len == `0`)
1253	break; // no children, word must end here
1254	}
1255
1256	// Stop looking at end of the line.
1257	if (ptr[wlen] == NUL)
1258	break;
1259
1260	// Perform a binary search in the list of accepted bytes.
1261	c = ptr[wlen];
1262	lo = arridx;
1263	hi = arridx + len - `1`;
1264	while (lo < hi) {
1265	m = (lo + hi) / `2`;
1266	if (byts[m] > c)
1267	hi = m - `1`;
1268	else if (byts[m] < c)
1269	lo = m + `1`;
1270	else {
1271	lo = hi = m;
1272	break;
1273	}
1274	}
1275
1276	// Stop if there is no matching byte.
1277	if (hi < lo \|\| byts[lo] != c)
1278	break;
1279
1280	// Continue at the child (if there is one).
1281	arridx = idxs[lo];
1282	++wlen;
1283	--flen;
1284	}
1285	}
1286
1287	// Need to fold at least one more character. Do until next non-word character
1288	// for efficiency. Include the non-word character too.
1289	// Return the length of the folded chars in bytes.
1290	static int fold_more(matchinf_T *mip)
1291	{
1292	int flen;
1293	char_u *p;
1294
1295	p = mip->mi_fend;
1296	do {
1297	MB_PTR_ADV(mip->mi_fend);
1298	} while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_win));
1299
1300	// Include the non-word character so that we can check for the word end.
1301	if (*mip->mi_fend != NUL) {
1302	MB_PTR_ADV(mip->mi_fend);
1303	}
1304
1305	(void)spell_casefold(p, (int)(mip->mi_fend - p),
1306	mip->mi_fword + mip->mi_fwordlen,
1307	MAXWLEN - mip->mi_fwordlen);
1308	flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen);
1309	mip->mi_fwordlen += flen;
1310	return flen;
1311	}
1312
1313	/// Checks case flags for a word. Returns true, if the word has the requested
1314	/// case.
1315	///
1316	/// @param wordflags Flags for the checked word.
1317	/// @param treeflags Flags for the word in the spell tree.
1318	static bool spell_valid_case(int wordflags, int treeflags)
1319	{
1320	return (wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == `0`)
1321	\|\| ((treeflags & (WF_ALLCAP \| WF_KEEPCAP)) == `0`
1322	&& ((treeflags & WF_ONECAP) == `0`
1323	\|\| (wordflags & WF_ONECAP) != `0`));
1324	}
1325
1326	// Returns true if spell checking is not enabled.
1327	static bool no_spell_checking(win_T *wp)
1328	{
1329	if (!wp->w_p_spell \|\| *wp->w_s->b_p_spl == NUL
1330	\|\| GA_EMPTY(&wp->w_s->b_langp)) {
1331	EMSG(_("E756: Spell checking is not enabled"));
1332	return true;
1333	}
1334	return false;
1335	}
1336
1337	// Moves to the next spell error.
1338	// "curline" is false for "[s", "]s", "[S" and "]S".
1339	// "curline" is true to find word under/after cursor in the same line.
1340	// For Insert mode completion "dir" is BACKWARD and "curline" is true: move
1341	// to after badly spelled word before the cursor.
1342	// Return 0 if not found, length of the badly spelled word otherwise.
1343	size_t
1344	spell_move_to (
1345	win_T *wp,
1346	int dir, // FORWARD or BACKWARD
1347	bool allwords, // true for "[s"/"]s", false for "[S"/"]S"
1348	bool curline,
1349	hlf_T attrp // return: attributes of bad word or NULL*
1350	// (only when "dir" is FORWARD)
1351	)
1352	{
1353	linenr_T lnum;
1354	pos_T found_pos;
1355	size_t found_len = `0`;
1356	char_u *line;
1357	char_u *p;
1358	char_u *endp;
1359	hlf_T attr = HLF_COUNT;
1360	size_t len;
1361	int has_syntax = syntax_present(wp);
1362	int col;
1363	bool can_spell;
1364	char_u *buf = NULL;
1365	size_t buflen = `0`;
1366	int skip = `0`;
1367	int capcol = -`1`;
1368	bool found_one = false;
1369	bool wrapped = false;
1370
1371	if (no_spell_checking(wp))
1372	return `0`;
1373
1374	// Start looking for bad word at the start of the line, because we can't
1375	// start halfway through a word, we don't know where it starts or ends.
1376	//
1377	// When searching backwards, we continue in the line to find the last
1378	// bad word (in the cursor line: before the cursor).
1379	//
1380	// We concatenate the start of the next line, so that wrapped words work
1381	// (e.g. "et<line-break>cetera"). Doesn't work when searching backwards
1382	// though...
1383	lnum = wp->w_cursor.lnum;
1384	clearpos(&found_pos);
1385
1386	while (!got_int) {
1387	line = ml_get_buf(wp->w_buffer, lnum, FALSE);
1388
1389	len = STRLEN(line);
1390	if (buflen < len + MAXWLEN + `2`) {
1391	xfree(buf);
1392	buflen = len + MAXWLEN + `2`;
1393	buf = xmalloc(buflen);
1394	}
1395	assert(buf && buflen >= len + MAXWLEN + `2`);
1396
1397	// In first line check first word for Capital.
1398	if (lnum == `1`)
1399	capcol = `0`;
1400
1401	// For checking first word with a capital skip white space.
1402	if (capcol == `0`) {
1403	capcol = (int)getwhitecols(line);
1404	} else if (curline && wp == curwin) {
1405	// For spellbadword(): check if first word needs a capital.
1406	col = (int)getwhitecols(line);
1407	if (check_need_cap(lnum, col)) {
1408	capcol = col;
1409	}
1410
1411	// Need to get the line again, may have looked at the previous
1412	// one.
1413	line = ml_get_buf(wp->w_buffer, lnum, FALSE);
1414	}
1415
1416	// Copy the line into "buf" and append the start of the next line if
1417	// possible.
1418	STRCPY(buf, line);
1419	if (lnum < wp->w_buffer->b_ml.ml_line_count)
1420	spell_cat_line(buf + STRLEN(buf),
1421	ml_get_buf(wp->w_buffer, lnum + `1`, FALSE),
1422	MAXWLEN);
1423	p = buf + skip;
1424	endp = buf + len;
1425	while (p < endp) {
1426	// When searching backward don't search after the cursor. Unless
1427	// we wrapped around the end of the buffer.
1428	if (dir == BACKWARD
1429	&& lnum == wp->w_cursor.lnum
1430	&& !wrapped
1431	&& (colnr_T)(p - buf) >= wp->w_cursor.col)
1432	break;
1433
1434	// start of word
1435	attr = HLF_COUNT;
1436	len = spell_check(wp, p, &attr, &capcol, false);
1437
1438	if (attr != HLF_COUNT) {
1439	// We found a bad word. Check the attribute.
1440	if (allwords \|\| attr == HLF_SPB) {
1441	// When searching forward only accept a bad word after
1442	// the cursor.
1443	if (dir == BACKWARD
1444	\|\| lnum != wp->w_cursor.lnum
1445	\|\| wrapped
1446	\|\| ((colnr_T)(curline
1447	? p - buf + (ptrdiff_t)len
1448	: p - buf) > wp->w_cursor.col)) {
1449	if (has_syntax) {
1450	col = (int)(p - buf);
1451	(void)syn_get_id(wp, lnum, (colnr_T)col,
1452	FALSE, &can_spell, FALSE);
1453	if (!can_spell)
1454	attr = HLF_COUNT;
1455	} else
1456	can_spell = true;
1457
1458	if (can_spell) {
1459	found_one = true;
1460	found_pos.lnum = lnum;
1461	found_pos.col = (int)(p - buf);
1462	found_pos.coladd = `0`;
1463	if (dir == FORWARD) {
1464	// No need to search further.
1465	wp->w_cursor = found_pos;
1466	xfree(buf);
1467	if (attrp != NULL)
1468	*attrp = attr;
1469	return len;
1470	} else if (curline) {
1471	// Insert mode completion: put cursor after
1472	// the bad word.
1473	assert(len <= INT_MAX);
1474	found_pos.col += (int)len;
1475	}
1476	found_len = len;
1477	}
1478	} else
1479	found_one = true;
1480	}
1481	}
1482
1483	// advance to character after the word
1484	p += len;
1485	assert(len <= INT_MAX);
1486	capcol -= (int)len;
1487	}
1488
1489	if (dir == BACKWARD && found_pos.lnum != `0`) {
1490	// Use the last match in the line (before the cursor).
1491	wp->w_cursor = found_pos;
1492	xfree(buf);
1493	return found_len;
1494	}
1495
1496	if (curline) {
1497	break; // only check cursor line
1498	}
1499
1500	// If we are back at the starting line and searched it again there
1501	// is no match, give up.
1502	if (lnum == wp->w_cursor.lnum && wrapped) {
1503	break;
1504	}
1505
1506	// Advance to next line.
1507	if (dir == BACKWARD) {
1508	if (lnum > `1`) {
1509	lnum--;
1510	} else if (!p_ws) {
1511	break; // at first line and 'nowrapscan'
1512	} else {
1513	// Wrap around to the end of the buffer. May search the
1514	// starting line again and accept the last match.
1515	lnum = wp->w_buffer->b_ml.ml_line_count;
1516	wrapped = true;
1517	if (!shortmess(SHM_SEARCH))
1518	give_warning((char_u *)_(top_bot_msg), true);
1519	}
1520	capcol = -`1`;
1521	} else {
1522	if (lnum < wp->w_buffer->b_ml.ml_line_count)
1523	++lnum;
1524	else if (!p_ws)
1525	break; // at first line and 'nowrapscan'
1526	else {
1527	// Wrap around to the start of the buffer. May search the
1528	// starting line again and accept the first match.
1529	lnum = `1`;
1530	wrapped = true;
1531	if (!shortmess(SHM_SEARCH))
1532	give_warning((char_u *)_(bot_top_msg), true);
1533	}
1534
1535	// If we are back at the starting line and there is no match then
1536	// give up.
1537	if (lnum == wp->w_cursor.lnum && !found_one) {
1538	break;
1539	}
1540
1541	// Skip the characters at the start of the next line that were
1542	// included in a match crossing line boundaries.
1543	if (attr == HLF_COUNT)
1544	skip = (int)(p - endp);
1545	else
1546	skip = `0`;
1547
1548	// Capcol skips over the inserted space.
1549	--capcol;
1550
1551	// But after empty line check first word in next line
1552	if (*skipwhite(line) == NUL)
1553	capcol = `0`;
1554	}
1555
1556	line_breakcheck();
1557	}
1558
1559	xfree(buf);
1560	return `0`;
1561	}
1562
1563	// For spell checking: concatenate the start of the following line "line" into
1564	// "buf", blanking-out special characters. Copy less then "maxlen" bytes.
1565	// Keep the blanks at the start of the next line, this is used in win_line()
1566	// to skip those bytes if the word was OK.
1567	void spell_cat_line(char_u buf, char_u line, int maxlen)
1568	{
1569	char_u *p;
1570	int n;
1571
1572	p = skipwhite(line);
1573	while (vim_strchr((char_u )"#/\"\t", *p) != NULL)
1574	p = skipwhite(p + `1`);
1575
1576	if (*p != NUL) {
1577	// Only worth concatenating if there is something else than spaces to
1578	// concatenate.
1579	n = (int)(p - line) + `1`;
1580	if (n < maxlen - `1`) {
1581	memset(buf, `' '`, n);
1582	STRLCPY(buf + n, p, maxlen - n);
1583	}
1584	}
1585	}
1586
1587	// Load word list(s) for "lang" from Vim spell file(s).
1588	// "lang" must be the language without the region: e.g., "en".
1589	static void spell_load_lang(char_u *lang)
1590	{
1591	char_u fname_enc[`85`];
1592	int r;
1593	spelload_T sl;
1594	int round;
1595
1596	// Copy the language name to pass it to spell_load_cb() as a cookie.
1597	// It's truncated when an error is detected.
1598	STRCPY(sl.sl_lang, lang);
1599	sl.sl_slang = NULL;
1600	sl.sl_nobreak = false;
1601
1602	// We may retry when no spell file is found for the language, an
1603	// autocommand may load it then.
1604	for (round = `1`; round <= `2`; ++round) {
1605	// Find the first spell file for "lang" in 'runtimepath' and load it.
1606	vim_snprintf((char )fname_enc, sizeof*(fname_enc) - `5`,
1607	"spell/%s.%s.spl", lang, spell_enc());
1608	r = do_in_runtimepath(fname_enc, `0`, spell_load_cb, &sl);
1609
1610	if (r == FAIL && *sl.sl_lang != NUL) {
1611	// Try loading the ASCII version.
1612	vim_snprintf((char )fname_enc, sizeof*(fname_enc) - `5`,
1613	"spell/%s.ascii.spl", lang);
1614	r = do_in_runtimepath(fname_enc, `0`, spell_load_cb, &sl);
1615
1616	if (r == FAIL && *sl.sl_lang != NUL && round == `1`
1617	&& apply_autocmds(EVENT_SPELLFILEMISSING, lang,
1618	curbuf->b_fname, FALSE, curbuf))
1619	continue;
1620	break;
1621	}
1622	break;
1623	}
1624
1625	if (r == FAIL) {
1626	if (starting) {
1627	// Prompt the user at VimEnter if spell files are missing. #3027
1628	// Plugins aren't loaded yet, so spellfile.vim cannot handle this case.
1629	char autocmd_buf[`512`] = { `0` };
1630	snprintf(autocmd_buf, sizeof(autocmd_buf),
1631	"autocmd VimEnter * call spellfile#LoadFile('%s')\|set spell",
1632	lang);
1633	do_cmdline_cmd(autocmd_buf);
1634	} else {
1635	smsg(
1636	_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
1637	lang, spell_enc(), lang);
1638	}
1639	} else if (sl.sl_slang != NULL) {
1640	// At least one file was loaded, now load ALL the additions.
1641	STRCPY(fname_enc + STRLEN(fname_enc) - `3`, "add.spl");
1642	do_in_runtimepath(fname_enc, DIP_ALL, spell_load_cb, &sl);
1643	}
1644	}
1645
1646	// Return the encoding used for spell checking: Use 'encoding', except that we
1647	// use "latin1" for "latin9". And limit to 60 characters (just in case).
1648	char_u spell_enc(void*)
1649	{
1650
1651	if (STRLEN(p_enc) < `60` && STRCMP(p_enc, "iso-8859-15") != `0`)
1652	return p_enc;
1653	return (char_u *)"latin1";
1654	}
1655
1656	// Get the name of the .spl file for the internal wordlist into
1657	// "fname[MAXPATHL]".
1658	static void int_wordlist_spl(char_u *fname)
1659	{
1660	vim_snprintf((char *)fname, MAXPATHL, SPL_FNAME_TMPL,
1661	int_wordlist, spell_enc());
1662	}
1663
1664	// Allocate a new slang_T for language "lang". "lang" can be NULL.
1665	// Caller must fill "sl_next".
1666	slang_T slang_alloc(char_u lang)
1667	{
1668	slang_T lp = xcalloc(`1`, sizeof*(slang_T));
1669
1670	if (lang != NULL)
1671	lp->sl_name = vim_strsave(lang);
1672	ga_init(&lp->sl_rep, sizeof(fromto_T), `10`);
1673	ga_init(&lp->sl_repsal, sizeof(fromto_T), `10`);
1674	lp->sl_compmax = MAXWLEN;
1675	lp->sl_compsylmax = MAXWLEN;
1676	hash_init(&lp->sl_wordcount);
1677
1678	return lp;
1679	}
1680
1681	// Free the contents of an slang_T and the structure itself.
1682	void slang_free(slang_T *lp)
1683	{
1684	xfree(lp->sl_name);
1685	xfree(lp->sl_fname);
1686	slang_clear(lp);
1687	xfree(lp);
1688	}
1689
1690	/// Frees a salitem_T
1691	static void free_salitem(salitem_T *smp) {
1692	xfree(smp->sm_lead);
1693	// Don't free sm_oneof and sm_rules, they point into sm_lead.
1694	xfree(smp->sm_to);
1695	xfree(smp->sm_lead_w);
1696	xfree(smp->sm_oneof_w);
1697	xfree(smp->sm_to_w);
1698	}
1699
1700	/// Frees a fromto_T
1701	static void free_fromto(fromto_T *ftp) {
1702	xfree(ftp->ft_from);
1703	xfree(ftp->ft_to);
1704	}
1705
1706	// Clear an slang_T so that the file can be reloaded.
1707	void slang_clear(slang_T *lp)
1708	{
1709	garray_T *gap;
1710
1711	XFREE_CLEAR(lp->sl_fbyts);
1712	XFREE_CLEAR(lp->sl_kbyts);
1713	XFREE_CLEAR(lp->sl_pbyts);
1714
1715	XFREE_CLEAR(lp->sl_fidxs);
1716	XFREE_CLEAR(lp->sl_kidxs);
1717	XFREE_CLEAR(lp->sl_pidxs);
1718
1719	GA_DEEP_CLEAR(&lp->sl_rep, fromto_T, free_fromto);
1720	GA_DEEP_CLEAR(&lp->sl_repsal, fromto_T, free_fromto);
1721
1722	gap = &lp->sl_sal;
1723	if (lp->sl_sofo) {
1724	// "ga_len" is set to 1 without adding an item for latin1
1725	GA_DEEP_CLEAR_PTR(gap);
1726	} else {
1727	// SAL items: free salitem_T items
1728	GA_DEEP_CLEAR(gap, salitem_T, free_salitem);
1729	}
1730
1731	for (int i = `0`; i < lp->sl_prefixcnt; ++i) {
1732	vim_regfree(lp->sl_prefprog[i]);
1733	}
1734	lp->sl_prefixcnt = `0`;
1735	XFREE_CLEAR(lp->sl_prefprog);
1736	XFREE_CLEAR(lp->sl_info);
1737	XFREE_CLEAR(lp->sl_midword);
1738
1739	vim_regfree(lp->sl_compprog);
1740	lp->sl_compprog = NULL;
1741	XFREE_CLEAR(lp->sl_comprules);
1742	XFREE_CLEAR(lp->sl_compstartflags);
1743	XFREE_CLEAR(lp->sl_compallflags);
1744
1745	XFREE_CLEAR(lp->sl_syllable);
1746	ga_clear(&lp->sl_syl_items);
1747
1748	ga_clear_strings(&lp->sl_comppat);
1749
1750	hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF);
1751	hash_init(&lp->sl_wordcount);
1752
1753	hash_clear_all(&lp->sl_map_hash, `0`);
1754
1755	// Clear info from .sug file.
1756	slang_clear_sug(lp);
1757
1758	lp->sl_compmax = MAXWLEN;
1759	lp->sl_compminlen = `0`;
1760	lp->sl_compsylmax = MAXWLEN;
1761	lp->sl_regions[`0`] = NUL;
1762	}
1763
1764	// Clear the info from the .sug file in "lp".
1765	void slang_clear_sug(slang_T *lp)
1766	{
1767	XFREE_CLEAR(lp->sl_sbyts);
1768	XFREE_CLEAR(lp->sl_sidxs);
1769	close_spellbuf(lp->sl_sugbuf);
1770	lp->sl_sugbuf = NULL;
1771	lp->sl_sugloaded = false;
1772	lp->sl_sugtime = `0`;
1773	}
1774
1775	// Load one spell file and store the info into a slang_T.
1776	// Invoked through do_in_runtimepath().
1777	static void spell_load_cb(char_u fname, void* *cookie)
1778	{
1779	spelload_T slp = (spelload_T )cookie;
1780	slang_T *slang;
1781
1782	slang = spell_load_file(fname, slp->sl_lang, NULL, false);
1783	if (slang != NULL) {
1784	// When a previously loaded file has NOBREAK also use it for the
1785	// ".add" files.
1786	if (slp->sl_nobreak && slang->sl_add)
1787	slang->sl_nobreak = true;
1788	else if (slang->sl_nobreak)
1789	slp->sl_nobreak = true;
1790
1791	slp->sl_slang = slang;
1792	}
1793	}
1794
1795	/// Add a word to the hashtable of common words.
1796	/// If it's already there then the counter is increased.
1797	///
1798	/// @param[in] lp
1799	/// @param[in] word added to common words hashtable
1800	/// @param[in] len length of word or -1 for NUL terminated
1801	/// @param[in] count 1 to count once, 10 to init
1802	void count_common_word(slang_T lp, char_u word, int len, int count)
1803	{
1804	hash_T hash;
1805	hashitem_T *hi;
1806	wordcount_T *wc;
1807	char_u buf[MAXWLEN];
1808	char_u *p;
1809
1810	if (len == -`1`) {
1811	p = word;
1812	} else if (len >= MAXWLEN) {
1813	return;
1814	} else {
1815	STRLCPY(buf, word, len + `1`);
1816	p = buf;
1817	}
1818
1819	hash = hash_hash(p);
1820	const size_t p_len = STRLEN(p);
1821	hi = hash_lookup(&lp->sl_wordcount, (const char *)p, p_len, hash);
1822	if (HASHITEM_EMPTY(hi)) {
1823	wc = xmalloc(sizeof(wordcount_T) + p_len);
1824	memcpy(wc->wc_word, p, p_len + `1`);
1825	wc->wc_count = count;
1826	hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash);
1827	} else {
1828	wc = HI2WC(hi);
1829	if ((wc->wc_count += count) < (unsigned)count) // check for overflow
1830	wc->wc_count = MAXWORDCOUNT;
1831	}
1832	}
1833
1834	// Adjust the score of common words.
1835	static int
1836	score_wordcount_adj (
1837	slang_T *slang,
1838	int score,
1839	char_u *word,
1840	bool split // word was split, less bonus
1841	)
1842	{
1843	hashitem_T *hi;
1844	wordcount_T *wc;
1845	int bonus;
1846	int newscore;
1847
1848	hi = hash_find(&slang->sl_wordcount, word);
1849	if (!HASHITEM_EMPTY(hi)) {
1850	wc = HI2WC(hi);
1851	if (wc->wc_count < SCORE_THRES2)
1852	bonus = SCORE_COMMON1;
1853	else if (wc->wc_count < SCORE_THRES3)
1854	bonus = SCORE_COMMON2;
1855	else
1856	bonus = SCORE_COMMON3;
1857	if (split)
1858	newscore = score - bonus / `2`;
1859	else
1860	newscore = score - bonus;
1861	if (newscore < `0`)
1862	return `0`;
1863	return newscore;
1864	}
1865	return score;
1866	}
1867
1868	// Returns true if byte "n" appears in "str".
1869	// Like strchr() but independent of locale.
1870	bool byte_in_str(char_u str, int* n)
1871	{
1872	char_u *p;
1873
1874	for (p = str; *p != NUL; ++p)
1875	if (*p == n)
1876	return true;
1877	return false;
1878	}
1879
1880	// Truncate "slang->sl_syllable" at the first slash and put the following items
1881	// in "slang->sl_syl_items".
1882	int init_syl_tab(slang_T *slang)
1883	{
1884	char_u *p;
1885	char_u *s;
1886	int l;
1887
1888	ga_init(&slang->sl_syl_items, sizeof(syl_item_T), `4`);
1889	p = vim_strchr(slang->sl_syllable, `'/'`);
1890	while (p != NULL) {
1891	*p++ = NUL;
1892	if (p == NUL) // trailing slash*
1893	break;
1894	s = p;
1895	p = vim_strchr(p, `'/'`);
1896	if (p == NULL)
1897	l = (int)STRLEN(s);
1898	else
1899	l = (int)(p - s);
1900	if (l >= SY_MAXLEN)
1901	return SP_FORMERROR;
1902
1903	syl_item_T *syl = GA_APPEND_VIA_PTR(syl_item_T, &slang->sl_syl_items);
1904	STRLCPY(syl->sy_chars, s, l + `1`);
1905	syl->sy_len = l;
1906	}
1907	return OK;
1908	}
1909
1910	// Count the number of syllables in "word".
1911	// When "word" contains spaces the syllables after the last space are counted.
1912	// Returns zero if syllables are not defines.
1913	static int count_syllables(slang_T slang, char_u word)
1914	{
1915	int cnt = `0`;
1916	bool skip = false;
1917	char_u *p;
1918	int len;
1919	syl_item_T *syl;
1920	int c;
1921
1922	if (slang->sl_syllable == NULL)
1923	return `0`;
1924
1925	for (p = word; *p != NUL; p += len) {
1926	// When running into a space reset counter.
1927	if (*p == `' '`) {
1928	len = `1`;
1929	cnt = `0`;
1930	continue;
1931	}
1932
1933	// Find longest match of syllable items.
1934	len = `0`;
1935	for (int i = `0`; i < slang->sl_syl_items.ga_len; ++i) {
1936	syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
1937	if (syl->sy_len > len
1938	&& STRNCMP(p, syl->sy_chars, syl->sy_len) == `0`)
1939	len = syl->sy_len;
1940	}
1941	if (len != `0`) { // found a match, count syllable
1942	++cnt;
1943	skip = false;
1944	} else {
1945	// No recognized syllable item, at least a syllable char then?
1946	c = utf_ptr2char(p);
1947	len = (*mb_ptr2len)(p);
1948	if (vim_strchr(slang->sl_syllable, c) == NULL)
1949	skip = false; // No, search for next syllable
1950	else if (!skip) {
1951	++cnt; // Yes, count it
1952	skip = true; // don't count following syllable chars
1953	}
1954	}
1955	}
1956	return cnt;
1957	}
1958
1959	// Parse 'spelllang' and set w_s->b_langp accordingly.
1960	// Returns NULL if it's OK, an error message otherwise.
1961	char_u did_set_spelllang(win_T wp)
1962	{
1963	garray_T ga;
1964	char_u *splp;
1965	char_u *region;
1966	char_u region_cp[`3`];
1967	bool filename;
1968	int region_mask;
1969	slang_T *slang;
1970	int c;
1971	char_u lang[MAXWLEN + `1`];
1972	char_u spf_name[MAXPATHL];
1973	int len;
1974	char_u *p;
1975	int round;
1976	char_u *spf;
1977	char_u *use_region = NULL;
1978	bool dont_use_region = false;
1979	bool nobreak = false;
1980	langp_T lp, lp2;
1981	static bool recursive = false;
1982	char_u *ret_msg = NULL;
1983	char_u *spl_copy;
1984
1985	bufref_T bufref;
1986	set_bufref(&bufref, wp->w_buffer);
1987
1988	// We don't want to do this recursively. May happen when a language is
1989	// not available and the SpellFileMissing autocommand opens a new buffer
1990	// in which 'spell' is set.
1991	if (recursive)
1992	return NULL;
1993	recursive = true;
1994
1995	ga_init(&ga, sizeof(langp_T), `2`);
1996	clear_midword(wp);
1997
1998	// Make a copy of 'spelllang', the SpellFileMissing autocommands may change
1999	// it under our fingers.
2000	spl_copy = vim_strsave(wp->w_s->b_p_spl);
2001
2002	wp->w_s->b_cjk = `0`;
2003
2004	// Loop over comma separated language names.
2005	for (splp = spl_copy; *splp != NUL; ) {
2006	// Get one language name.
2007	copy_option_part(&splp, lang, MAXWLEN, ",");
2008	region = NULL;
2009	len = (int)STRLEN(lang);
2010
2011	if (STRCMP(lang, "cjk") == `0`) {
2012	wp->w_s->b_cjk = `1`;
2013	continue;
2014	}
2015
2016	// If the name ends in ".spl" use it as the name of the spell file.
2017	// If there is a region name let "region" point to it and remove it
2018	// from the name.
2019	if (len > `4` && fnamecmp(lang + len - `4`, ".spl") == `0`) {
2020	filename = true;
2021
2022	// Locate a region and remove it from the file name.
2023	p = vim_strchr(path_tail(lang), `'_'`);
2024	if (p != NULL && ASCII_ISALPHA(p[`1`]) && ASCII_ISALPHA(p[`2`])
2025	&& !ASCII_ISALPHA(p[`3`])) {
2026	STRLCPY(region_cp, p + `1`, `3`);
2027	memmove(p, p + `3`, len - (p - lang) - `2`);
2028	region = region_cp;
2029	} else
2030	dont_use_region = true;
2031
2032	// Check if we loaded this language before.
2033	for (slang = first_lang; slang != NULL; slang = slang->sl_next) {
2034	if (path_full_compare(lang, slang->sl_fname, false) == kEqualFiles) {
2035	break;
2036	}
2037	}
2038	} else {
2039	filename = false;
2040	if (len > `3` && lang[len - `3`] == `'_'`) {
2041	region = lang + len - `2`;
2042	lang[len - `3`] = NUL;
2043	} else
2044	dont_use_region = true;
2045
2046	// Check if we loaded this language before.
2047	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2048	if (STRICMP(lang, slang->sl_name) == `0`)
2049	break;
2050	}
2051
2052	if (region != NULL) {
2053	// If the region differs from what was used before then don't
2054	// use it for 'spellfile'.
2055	if (use_region != NULL && STRCMP(region, use_region) != `0`)
2056	dont_use_region = true;
2057	use_region = region;
2058	}
2059
2060	// If not found try loading the language now.
2061	if (slang == NULL) {
2062	if (filename)
2063	(void)spell_load_file(lang, lang, NULL, false);
2064	else {
2065	spell_load_lang(lang);
2066	// SpellFileMissing autocommands may do anything, including
2067	// destroying the buffer we are using...
2068	if (!bufref_valid(&bufref)) {
2069	ret_msg =
2070	(char_u *)N_("E797: SpellFileMissing autocommand deleted buffer");
2071	goto theend;
2072	}
2073	}
2074	}
2075
2076	// Loop over the languages, there can be several files for "lang".
2077	for (slang = first_lang; slang != NULL; slang = slang->sl_next) {
2078	if (filename
2079	? path_full_compare(lang, slang->sl_fname, false) == kEqualFiles
2080	: STRICMP(lang, slang->sl_name) == `0`) {
2081	region_mask = REGION_ALL;
2082	if (!filename && region != NULL) {
2083	// find region in sl_regions
2084	c = find_region(slang->sl_regions, region);
2085	if (c == REGION_ALL) {
2086	if (slang->sl_add) {
2087	if (*slang->sl_regions != NUL)
2088	// This addition file is for other regions.
2089	region_mask = `0`;
2090	} else
2091	// This is probably an error. Give a warning and
2092	// accept the words anyway.
2093	smsg(_("Warning: region %s not supported"),
2094	region);
2095	} else
2096	region_mask = `1` << c;
2097	}
2098
2099	if (region_mask != `0`) {
2100	langp_T *p_ = GA_APPEND_VIA_PTR(langp_T, &ga);
2101	p_->lp_slang = slang;
2102	p_->lp_region = region_mask;
2103
2104	use_midword(slang, wp);
2105	if (slang->sl_nobreak)
2106	nobreak = true;
2107	}
2108	}
2109	}
2110	}
2111
2112	// round 0: load int_wordlist, if possible.
2113	// round 1: load first name in 'spellfile'.
2114	// round 2: load second name in 'spellfile.
2115	// etc.
2116	spf = curwin->w_s->b_p_spf;
2117	for (round = `0`; round == `0` \|\| *spf != NUL; ++round) {
2118	if (round == `0`) {
2119	// Internal wordlist, if there is one.
2120	if (int_wordlist == NULL)
2121	continue;
2122	int_wordlist_spl(spf_name);
2123	} else {
2124	// One entry in 'spellfile'.
2125	copy_option_part(&spf, spf_name, MAXPATHL - `5`, ",");
2126	STRCAT(spf_name, ".spl");
2127
2128	// If it was already found above then skip it.
2129	for (c = `0`; c < ga.ga_len; ++c) {
2130	p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
2131	if (p != NULL
2132	&& path_full_compare(spf_name, p, false) == kEqualFiles) {
2133	break;
2134	}
2135	}
2136	if (c < ga.ga_len)
2137	continue;
2138	}
2139
2140	// Check if it was loaded already.
2141	for (slang = first_lang; slang != NULL; slang = slang->sl_next) {
2142	if (path_full_compare(spf_name, slang->sl_fname, false) == kEqualFiles) {
2143	break;
2144	}
2145	}
2146	if (slang == NULL) {
2147	// Not loaded, try loading it now. The language name includes the
2148	// region name, the region is ignored otherwise. for int_wordlist
2149	// use an arbitrary name.
2150	if (round == `0`)
2151	STRCPY(lang, "internal wordlist");
2152	else {
2153	STRLCPY(lang, path_tail(spf_name), MAXWLEN + `1`);
2154	p = vim_strchr(lang, `'.'`);
2155	if (p != NULL)
2156	p = NUL; // truncate at ".encoding.add"*
2157	}
2158	slang = spell_load_file(spf_name, lang, NULL, true);
2159
2160	// If one of the languages has NOBREAK we assume the addition
2161	// files also have this.
2162	if (slang != NULL && nobreak)
2163	slang->sl_nobreak = true;
2164	}
2165	if (slang != NULL) {
2166	region_mask = REGION_ALL;
2167	if (use_region != NULL && !dont_use_region) {
2168	// find region in sl_regions
2169	c = find_region(slang->sl_regions, use_region);
2170	if (c != REGION_ALL)
2171	region_mask = `1` << c;
2172	else if (*slang->sl_regions != NUL)
2173	// This spell file is for other regions.
2174	region_mask = `0`;
2175	}
2176
2177	if (region_mask != `0`) {
2178	langp_T *p_ = GA_APPEND_VIA_PTR(langp_T, &ga);
2179	p_->lp_slang = slang;
2180	p_->lp_sallang = NULL;
2181	p_->lp_replang = NULL;
2182	p_->lp_region = region_mask;
2183
2184	use_midword(slang, wp);
2185	}
2186	}
2187	}
2188
2189	// Everything is fine, store the new b_langp value.
2190	ga_clear(&wp->w_s->b_langp);
2191	wp->w_s->b_langp = ga;
2192
2193	// For each language figure out what language to use for sound folding and
2194	// REP items. If the language doesn't support it itself use another one
2195	// with the same name. E.g. for "en-math" use "en".
2196	for (int i = `0`; i < ga.ga_len; ++i) {
2197	lp = LANGP_ENTRY(ga, i);
2198
2199	// sound folding
2200	if (!GA_EMPTY(&lp->lp_slang->sl_sal))
2201	// language does sound folding itself
2202	lp->lp_sallang = lp->lp_slang;
2203	else
2204	// find first similar language that does sound folding
2205	for (int j = `0`; j < ga.ga_len; ++j) {
2206	lp2 = LANGP_ENTRY(ga, j);
2207	if (!GA_EMPTY(&lp2->lp_slang->sl_sal)
2208	&& STRNCMP(lp->lp_slang->sl_name,
2209	lp2->lp_slang->sl_name, `2`) == `0`) {
2210	lp->lp_sallang = lp2->lp_slang;
2211	break;
2212	}
2213	}
2214
2215	// REP items
2216	if (!GA_EMPTY(&lp->lp_slang->sl_rep))
2217	// language has REP items itself
2218	lp->lp_replang = lp->lp_slang;
2219	else
2220	// find first similar language that has REP items
2221	for (int j = `0`; j < ga.ga_len; ++j) {
2222	lp2 = LANGP_ENTRY(ga, j);
2223	if (!GA_EMPTY(&lp2->lp_slang->sl_rep)
2224	&& STRNCMP(lp->lp_slang->sl_name,
2225	lp2->lp_slang->sl_name, `2`) == `0`) {
2226	lp->lp_replang = lp2->lp_slang;
2227	break;
2228	}
2229	}
2230	}
2231
2232	theend:
2233	xfree(spl_copy);
2234	recursive = false;
2235	redraw_win_later(wp, NOT_VALID);
2236	return ret_msg;
2237	}
2238
2239	// Clear the midword characters for buffer "buf".
2240	static void clear_midword(win_T *wp)
2241	{
2242	memset(wp->w_s->b_spell_ismw, `0`, `256`);
2243	XFREE_CLEAR(wp->w_s->b_spell_ismw_mb);
2244	}
2245
2246	// Use the "sl_midword" field of language "lp" for buffer "buf".
2247	// They add up to any currently used midword characters.
2248	static void use_midword(slang_T lp, win_T wp)
2249	{
2250	char_u *p;
2251
2252	if (lp->sl_midword == NULL) // there aren't any
2253	return;
2254
2255	for (p = lp->sl_midword; *p != NUL; )
2256	if (has_mbyte) {
2257	int c, l, n;
2258	char_u *bp;
2259
2260	c = utf_ptr2char(p);
2261	l = (*mb_ptr2len)(p);
2262	if (c < `256` && l <= `2`)
2263	wp->w_s->b_spell_ismw[c] = true;
2264	else if (wp->w_s->b_spell_ismw_mb == NULL)
2265	// First multi-byte char in "b_spell_ismw_mb".
2266	wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l);
2267	else {
2268	// Append multi-byte chars to "b_spell_ismw_mb".
2269	n = (int)STRLEN(wp->w_s->b_spell_ismw_mb);
2270	bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l);
2271	xfree(wp->w_s->b_spell_ismw_mb);
2272	wp->w_s->b_spell_ismw_mb = bp;
2273	STRLCPY(bp + n, p, l + `1`);
2274	}
2275	p += l;
2276	} else
2277	wp->w_s->b_spell_ismw[*p++] = true;
2278	}
2279
2280	// Find the region "region[2]" in "rp" (points to "sl_regions").
2281	// Each region is simply stored as the two characters of its name.
2282	// Returns the index if found (first is 0), REGION_ALL if not found.
2283	static int find_region(char_u rp, char_u region)
2284	{
2285	int i;
2286
2287	for (i = `0`;; i += `2`) {
2288	if (rp[i] == NUL)
2289	return REGION_ALL;
2290	if (rp[i] == region[`0`] && rp[i + `1`] == region[`1`])
2291	break;
2292	}
2293	return i / `2`;
2294	}
2295
2296	/// Return case type of word:
2297	/// w word 0
2298	/// Word WF_ONECAP
2299	/// W WORD WF_ALLCAP
2300	/// WoRd wOrd WF_KEEPCAP
2301	///
2302	/// @param[in] word
2303	/// @param[in] end End of word or NULL for NUL delimited string
2304	///
2305	/// @returns Case type of word
2306	int captype(char_u word, char_u end)
2307	FUNC_ATTR_NONNULL_ARG(`1`)
2308	{
2309	char_u *p;
2310	int c;
2311	int firstcap;
2312	bool allcap;
2313	bool past_second = false; // past second word char
2314
2315	// find first letter
2316	for (p = word; !spell_iswordp_nmw(p, curwin); MB_PTR_ADV(p)) {
2317	if (end == NULL ? *p == NUL : p >= end) {
2318	return `0`; // only non-word characters, illegal word
2319	}
2320	}
2321	if (has_mbyte) {
2322	c = mb_ptr2char_adv((const char_u **)&p);
2323	} else {
2324	c = *p++;
2325	}
2326	firstcap = allcap = SPELL_ISUPPER(c);
2327
2328	// Need to check all letters to find a word with mixed upper/lower.
2329	// But a word with an upper char only at start is a ONECAP.
2330	for (; end == NULL ? *p != NUL : p < end; MB_PTR_ADV(p)) {
2331	if (spell_iswordp_nmw(p, curwin)) {
2332	c = PTR2CHAR(p);
2333	if (!SPELL_ISUPPER(c)) {
2334	// UUl -> KEEPCAP
2335	if (past_second && allcap) {
2336	return WF_KEEPCAP;
2337	}
2338	allcap = false;
2339	} else if (!allcap) {
2340	// UlU -> KEEPCAP
2341	return WF_KEEPCAP;
2342	}
2343	past_second = true;
2344	}
2345	}
2346
2347	if (allcap)
2348	return WF_ALLCAP;
2349	if (firstcap)
2350	return WF_ONECAP;
2351	return `0`;
2352	}
2353
2354	// Like captype() but for a KEEPCAP word add ONECAP if the word starts with a
2355	// capital. So that make_case_word() can turn WOrd into Word.
2356	// Add ALLCAP for "WOrD".
2357	static int badword_captype(char_u word, char_u end)
2358	FUNC_ATTR_NONNULL_ALL
2359	{
2360	int flags = captype(word, end);
2361	int c;
2362	int l, u;
2363	bool first;
2364	char_u *p;
2365
2366	if (flags & WF_KEEPCAP) {
2367	// Count the number of UPPER and lower case letters.
2368	l = u = `0`;
2369	first = false;
2370	for (p = word; p < end; MB_PTR_ADV(p)) {
2371	c = PTR2CHAR(p);
2372	if (SPELL_ISUPPER(c)) {
2373	++u;
2374	if (p == word)
2375	first = true;
2376	} else
2377	++l;
2378	}
2379
2380	// If there are more UPPER than lower case letters suggest an
2381	// ALLCAP word. Otherwise, if the first letter is UPPER then
2382	// suggest ONECAP. Exception: "ALl" most likely should be "All",
2383	// require three upper case letters.
2384	if (u > l && u > `2`)
2385	flags \|= WF_ALLCAP;
2386	else if (first)
2387	flags \|= WF_ONECAP;
2388
2389	if (u >= `2` && l >= `2`) // maCARONI maCAroni
2390	flags \|= WF_MIXCAP;
2391	}
2392	return flags;
2393	}
2394
2395	// Delete the internal wordlist and its .spl file.
2396	void spell_delete_wordlist(void)
2397	{
2398	char_u fname[MAXPATHL] = {`0`};
2399
2400	if (int_wordlist != NULL) {
2401	os_remove((char *)int_wordlist);
2402	int_wordlist_spl(fname);
2403	os_remove((char *)fname);
2404	XFREE_CLEAR(int_wordlist);
2405	}
2406	}
2407
2408	// Free all languages.
2409	void spell_free_all(void)
2410	{
2411	slang_T *slang;
2412
2413	// Go through all buffers and handle 'spelllang'. <VN>
2414	FOR_ALL_BUFFERS(buf) {
2415	ga_clear(&buf->b_s.b_langp);
2416	}
2417
2418	while (first_lang != NULL) {
2419	slang = first_lang;
2420	first_lang = slang->sl_next;
2421	slang_free(slang);
2422	}
2423
2424	spell_delete_wordlist();
2425
2426	XFREE_CLEAR(repl_to);
2427	XFREE_CLEAR(repl_from);
2428	}
2429
2430	// Clear all spelling tables and reload them.
2431	// Used after 'encoding' is set and when ":mkspell" was used.
2432	void spell_reload(void)
2433	{
2434	// Initialize the table for spell_iswordp().
2435	init_spell_chartab();
2436
2437	// Unload all allocated memory.
2438	spell_free_all();
2439
2440	// Go through all buffers and handle 'spelllang'.
2441	FOR_ALL_WINDOWS_IN_TAB(wp, curtab) {
2442	// Only load the wordlists when 'spelllang' is set and there is a
2443	// window for this buffer in which 'spell' is set.
2444	if (*wp->w_s->b_p_spl != NUL) {
2445	if (wp->w_p_spell) {
2446	(void)did_set_spelllang(wp);
2447	break;
2448	}
2449	}
2450	}
2451	}
2452
2453
2454	// Opposite of offset2bytes().
2455	// "pp" points to the bytes and is advanced over it.
2456	// Returns the offset.
2457	static int bytes2offset(char_u **pp)
2458	{
2459	char_u p = pp;
2460	int nr;
2461	int c;
2462
2463	c = *p++;
2464	if ((c & `0x80`) == `0x00`) { // 1 byte
2465	nr = c - `1`;
2466	} else if ((c & `0xc0`) == `0x80`) { // 2 bytes
2467	nr = (c & `0x3f`) - `1`;
2468	nr = nr * `255` + (*p++ - `1`);
2469	} else if ((c & `0xe0`) == `0xc0`) { // 3 bytes
2470	nr = (c & `0x1f`) - `1`;
2471	nr = nr * `255` + (*p++ - `1`);
2472	nr = nr * `255` + (*p++ - `1`);
2473	} else { // 4 bytes
2474	nr = (c & `0x0f`) - `1`;
2475	nr = nr * `255` + (*p++ - `1`);
2476	nr = nr * `255` + (*p++ - `1`);
2477	nr = nr * `255` + (*p++ - `1`);
2478	}
2479
2480	*pp = p;
2481	return nr;
2482	}
2483
2484	// Open a spell buffer. This is a nameless buffer that is not in the buffer
2485	// list and only contains text lines. Can use a swapfile to reduce memory
2486	// use.
2487	// Most other fields are invalid! Esp. watch out for string options being
2488	// NULL and there is no undo info.
2489	buf_T open_spellbuf(void*)
2490	{
2491	buf_T buf = xcalloc(`1`, sizeof*(buf_T));
2492
2493	buf->b_spell = true;
2494	buf->b_p_swf = true; // may create a swap file
2495	if (ml_open(buf) == FAIL) {
2496	ELOG("Error opening a new memline");
2497	}
2498	ml_open_file(buf); // create swap file now
2499
2500	return buf;
2501	}
2502
2503	// Close the buffer used for spell info.
2504	void close_spellbuf(buf_T *buf)
2505	{
2506	if (buf != NULL) {
2507	ml_close(buf, TRUE);
2508	xfree(buf);
2509	}
2510	}
2511
2512	// Init the chartab used for spelling for ASCII.
2513	void clear_spell_chartab(spelltab_T *sp)
2514	{
2515	int i;
2516
2517	// Init everything to false.
2518	memset(sp->st_isw, false, sizeof(sp->st_isw));
2519	memset(sp->st_isu, false, sizeof(sp->st_isu));
2520
2521	for (i = `0`; i < `256`; ++i) {
2522	sp->st_fold[i] = i;
2523	sp->st_upper[i] = i;
2524	}
2525
2526	// We include digits. A word shouldn't start with a digit, but handling
2527	// that is done separately.
2528	for (i = `'0'`; i <= `'9'`; ++i)
2529	sp->st_isw[i] = true;
2530	for (i = `'A'`; i <= `'Z'`; ++i) {
2531	sp->st_isw[i] = true;
2532	sp->st_isu[i] = true;
2533	sp->st_fold[i] = i + `0x20`;
2534	}
2535	for (i = `'a'`; i <= `'z'`; ++i) {
2536	sp->st_isw[i] = true;
2537	sp->st_upper[i] = i - `0x20`;
2538	}
2539	}
2540
2541	// Init the chartab used for spelling. Called once while starting up.
2542	// The default is to use isalpha(), but the spell file should define the word
2543	// characters to make it possible that 'encoding' differs from the current
2544	// locale. For utf-8 we don't use isalpha() but our own functions.
2545	void init_spell_chartab(void)
2546	{
2547	int i;
2548
2549	did_set_spelltab = false;
2550	clear_spell_chartab(&spelltab);
2551	for (i = `128`; i < `256`; i++) {
2552	int f = utf_fold(i);
2553	int u = mb_toupper(i);
2554
2555	spelltab.st_isu[i] = mb_isupper(i);
2556	spelltab.st_isw[i] = spelltab.st_isu[i] \|\| mb_islower(i);
2557	// The folded/upper-cased value is different between latin1 and
2558	// utf8 for 0xb5, causing E763 for no good reason. Use the latin1
2559	// value for utf-8 to avoid this.
2560	spelltab.st_fold[i] = (f < `256`) ? f : i;
2561	spelltab.st_upper[i] = (u < `256`) ? u : i;
2562	}
2563	}
2564
2565	/// Returns true if "p" points to a word character.
2566	/// As a special case we see "midword" characters as word character when it is
2567	/// followed by a word character. This finds they'there but not 'they there'.
2568	/// Thus this only works properly when past the first character of the word.
2569	///
2570	/// @param wp Buffer used.
2571	static bool spell_iswordp(char_u p, win_T wp)
2572	{
2573	char_u *s;
2574	int l;
2575	int c;
2576
2577	if (has_mbyte) {
2578	l = MB_PTR2LEN(p);
2579	s = p;
2580	if (l == `1`) {
2581	// be quick for ASCII
2582	if (wp->w_s->b_spell_ismw[*p])
2583	s = p + `1`; // skip a mid-word character
2584	} else {
2585	c = utf_ptr2char(p);
2586	if (c < `256` ? wp->w_s->b_spell_ismw[c]
2587	: (wp->w_s->b_spell_ismw_mb != NULL
2588	&& vim_strchr(wp->w_s->b_spell_ismw_mb, c) != NULL)) {
2589	s = p + l;
2590	}
2591	}
2592
2593	c = utf_ptr2char(s);
2594	if (c > `255`) {
2595	return spell_mb_isword_class(mb_get_class(s), wp);
2596	}
2597	return spelltab.st_isw[c];
2598	}
2599
2600	return spelltab.st_isw[wp->w_s->b_spell_ismw[*p] ? p[`1`] : p[`0`]];
2601	}
2602
2603	// Returns true if "p" points to a word character.
2604	// Unlike spell_iswordp() this doesn't check for "midword" characters.
2605	bool spell_iswordp_nmw(const char_u p, win_T wp)
2606	{
2607	int c = utf_ptr2char(p);
2608	if (c > `255`) {
2609	return spell_mb_isword_class(mb_get_class(p), wp);
2610	}
2611	return spelltab.st_isw[c];
2612	}
2613
2614	// Returns true if word class indicates a word character.
2615	// Only for characters above 255.
2616	// Unicode subscript and superscript are not considered word characters.
2617	// See also utf_class() in mbyte.c.
2618	static bool spell_mb_isword_class(int cl, win_T *wp)
2619	{
2620	if (wp->w_s->b_cjk)
2621	// East Asian characters are not considered word characters.
2622	return cl == `2` \|\| cl == `0x2800`;
2623	return cl >= `2` && cl != `0x2070` && cl != `0x2080` && cl != `3`;
2624	}
2625
2626	// Returns true if "p" points to a word character.
2627	// Wide version of spell_iswordp().
2628	static bool spell_iswordp_w(int p, win_T wp)
2629	{
2630	int *s;
2631
2632	if (p < `256` ? wp->w_s->b_spell_ismw[p]
2633	: (wp->w_s->b_spell_ismw_mb != NULL
2634	&& vim_strchr(wp->w_s->b_spell_ismw_mb, *p) != NULL))
2635	s = p + `1`;
2636	else
2637	s = p;
2638
2639	if (*s > `255`) {
2640	return spell_mb_isword_class(utf_class(*s), wp);
2641	}
2642	return spelltab.st_isw[*s];
2643	}
2644
2645	// Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated.
2646	// Uses the character definitions from the .spl file.
2647	// When using a multi-byte 'encoding' the length may change!
2648	// Returns FAIL when something wrong.
2649	int spell_casefold(char_u str, int* len, char_u buf, int* buflen)
2650	{
2651	int i;
2652
2653	if (len >= buflen) {
2654	buf[`0`] = NUL;
2655	return FAIL; // result will not fit
2656	}
2657
2658	if (has_mbyte) {
2659	int outi = `0`;
2660	char_u *p;
2661	int c;
2662
2663	// Fold one character at a time.
2664	for (p = str; p < str + len; ) {
2665	if (outi + MB_MAXBYTES > buflen) {
2666	buf[outi] = NUL;
2667	return FAIL;
2668	}
2669	c = mb_cptr2char_adv((const char_u **)&p);
2670	outi += utf_char2bytes(SPELL_TOFOLD(c), buf + outi);
2671	}
2672	buf[outi] = NUL;
2673	} else {
2674	// Be quick for non-multibyte encodings.
2675	for (i = `0`; i < len; ++i)
2676	buf[i] = spelltab.st_fold[str[i]];
2677	buf[i] = NUL;
2678	}
2679
2680	return OK;
2681	}
2682
2683	// values for sps_flags
2684	#define SPS_BEST 1
2685	#define SPS_FAST 2
2686	#define SPS_DOUBLE 4
2687
2688	static int sps_flags = SPS_BEST; // flags from 'spellsuggest'
2689	static int sps_limit = `9999`; // max nr of suggestions given
2690
2691	// Check the 'spellsuggest' option. Return FAIL if it's wrong.
2692	// Sets "sps_flags" and "sps_limit".
2693	int spell_check_sps(void)
2694	{
2695	char_u *p;
2696	char_u *s;
2697	char_u buf[MAXPATHL];
2698	int f;
2699
2700	sps_flags = `0`;
2701	sps_limit = `9999`;
2702
2703	for (p = p_sps; *p != NUL; ) {
2704	copy_option_part(&p, buf, MAXPATHL, ",");
2705
2706	f = `0`;
2707	if (ascii_isdigit(*buf)) {
2708	s = buf;
2709	sps_limit = getdigits_int(&s, true, `0`);
2710	if (s != NUL && !ascii_isdigit(s)) {
2711	f = -`1`;
2712	}
2713	} else if (STRCMP(buf, "best") == `0`) {
2714	f = SPS_BEST;
2715	} else if (STRCMP(buf, "fast") == `0`) {
2716	f = SPS_FAST;
2717	} else if (STRCMP(buf, "double") == `0`) {
2718	f = SPS_DOUBLE;
2719	} else if (STRNCMP(buf, "expr:", `5`) != `0`
2720	&& STRNCMP(buf, "file:", `5`) != `0`) {
2721	f = -`1`;
2722	}
2723
2724	if (f == -`1` \|\| (sps_flags != `0` && f != `0`)) {
2725	sps_flags = SPS_BEST;
2726	sps_limit = `9999`;
2727	return FAIL;
2728	}
2729	if (f != `0`)
2730	sps_flags = f;
2731	}
2732
2733	if (sps_flags == `0`)
2734	sps_flags = SPS_BEST;
2735
2736	return OK;
2737	}
2738
2739	// "z=": Find badly spelled word under or after the cursor.
2740	// Give suggestions for the properly spelled word.
2741	// In Visual mode use the highlighted word as the bad word.
2742	// When "count" is non-zero use that suggestion.
2743	void spell_suggest(int count)
2744	{
2745	char_u *line;
2746	pos_T prev_cursor = curwin->w_cursor;
2747	char_u wcopy[MAXWLEN + `2`];
2748	char_u *p;
2749	int c;
2750	suginfo_T sug;
2751	suggest_T *stp;
2752	int mouse_used;
2753	int need_cap;
2754	int limit;
2755	int selected = count;
2756	int badlen = `0`;
2757	int msg_scroll_save = msg_scroll;
2758
2759	if (no_spell_checking(curwin))
2760	return;
2761
2762	if (VIsual_active) {
2763	// Use the Visually selected text as the bad word. But reject
2764	// a multi-line selection.
2765	if (curwin->w_cursor.lnum != VIsual.lnum) {
2766	vim_beep(BO_SPELL);
2767	return;
2768	}
2769	badlen = (int)curwin->w_cursor.col - (int)VIsual.col;
2770	if (badlen < `0`) {
2771	badlen = -badlen;
2772	} else {
2773	curwin->w_cursor.col = VIsual.col;
2774	}
2775	badlen++;
2776	end_visual_mode();
2777	} else
2778	// Find the start of the badly spelled word.
2779	if (spell_move_to(curwin, FORWARD, true, true, NULL) == `0`
2780	\|\| curwin->w_cursor.col > prev_cursor.col) {
2781	// No bad word or it starts after the cursor: use the word under the
2782	// cursor.
2783	curwin->w_cursor = prev_cursor;
2784	line = get_cursor_line_ptr();
2785	p = line + curwin->w_cursor.col;
2786	// Backup to before start of word.
2787	while (p > line && spell_iswordp_nmw(p, curwin)) {
2788	MB_PTR_BACK(line, p);
2789	}
2790	// Forward to start of word.
2791	while (*p != NUL && !spell_iswordp_nmw(p, curwin)) {
2792	MB_PTR_ADV(p);
2793	}
2794
2795	if (!spell_iswordp_nmw(p, curwin)) { // No word found.
2796	beep_flush();
2797	return;
2798	}
2799	curwin->w_cursor.col = (colnr_T)(p - line);
2800	}
2801
2802	// Get the word and its length.
2803
2804	// Figure out if the word should be capitalised.
2805	need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col);
2806
2807	// Make a copy of current line since autocommands may free the line.
2808	line = vim_strsave(get_cursor_line_ptr());
2809
2810	// Get the list of suggestions. Limit to 'lines' - 2 or the number in
2811	// 'spellsuggest', whatever is smaller.
2812	if (sps_limit > (int)Rows - `2`)
2813	limit = (int)Rows - `2`;
2814	else
2815	limit = sps_limit;
2816	spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit,
2817	true, need_cap, true);
2818
2819	if (GA_EMPTY(&sug.su_ga))
2820	MSG(_("Sorry, no suggestions"));
2821	else if (count > `0`) {
2822	if (count > sug.su_ga.ga_len)
2823	smsg(_("Sorry, only %" PRId64 " suggestions"),
2824	(int64_t)sug.su_ga.ga_len);
2825	} else {
2826	XFREE_CLEAR(repl_from);
2827	XFREE_CLEAR(repl_to);
2828
2829	// When 'rightleft' is set the list is drawn right-left.
2830	cmdmsg_rl = curwin->w_p_rl;
2831	if (cmdmsg_rl)
2832	msg_col = Columns - `1`;
2833
2834	// List the suggestions.
2835	msg_start();
2836	msg_row = Rows - `1`; // for when 'cmdheight' > 1
2837	lines_left = Rows; // avoid more prompt
2838	vim_snprintf((char )IObuff, IOSIZE, _("Change \"%.s\" to:"),
2839	sug.su_badlen, sug.su_badptr);
2840	if (cmdmsg_rl && STRNCMP(IObuff, "Change", `6`) == `0`) {
2841	// And now the rabbit from the high hat: Avoid showing the
2842	// untranslated message rightleft.
2843	vim_snprintf((char )IObuff, IOSIZE, ":ot \"%.s\" egnahC",
2844	sug.su_badlen, sug.su_badptr);
2845	}
2846	msg_puts((const char *)IObuff);
2847	msg_clr_eos();
2848	msg_putchar(`'\n'`);
2849
2850	msg_scroll = TRUE;
2851	for (int i = `0`; i < sug.su_ga.ga_len; ++i) {
2852	stp = &SUG(sug.su_ga, i);
2853
2854	// The suggested word may replace only part of the bad word, add
2855	// the not replaced part.
2856	STRLCPY(wcopy, stp->st_word, MAXWLEN + `1`);
2857	if (sug.su_badlen > stp->st_orglen)
2858	STRLCPY(wcopy + stp->st_wordlen,
2859	sug.su_badptr + stp->st_orglen,
2860	sug.su_badlen - stp->st_orglen + `1`);
2861	vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + `1`);
2862	if (cmdmsg_rl) {
2863	rl_mirror(IObuff);
2864	}
2865	msg_puts((const char *)IObuff);
2866
2867	vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy);
2868	msg_puts((const char *)IObuff);
2869
2870	// The word may replace more than "su_badlen".
2871	if (sug.su_badlen < stp->st_orglen) {
2872	vim_snprintf((char )IObuff, IOSIZE, _(" < \"%.s\""),
2873	stp->st_orglen, sug.su_badptr);
2874	msg_puts((const char *)IObuff);
2875	}
2876
2877	if (p_verbose > `0`) {
2878	// Add the score.
2879	if (sps_flags & (SPS_DOUBLE \| SPS_BEST))
2880	vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)",
2881	stp->st_salscore ? "s " : "",
2882	stp->st_score, stp->st_altscore);
2883	else
2884	vim_snprintf((char *)IObuff, IOSIZE, " (%d)",
2885	stp->st_score);
2886	if (cmdmsg_rl)
2887	// Mirror the numbers, but keep the leading space.
2888	rl_mirror(IObuff + `1`);
2889	msg_advance(`30`);
2890	msg_puts((const char *)IObuff);
2891	}
2892	msg_putchar(`'\n'`);
2893	}
2894
2895	cmdmsg_rl = FALSE;
2896	msg_col = `0`;
2897	// Ask for choice.
2898	selected = prompt_for_number(&mouse_used);
2899	if (mouse_used)
2900	selected -= lines_left;
2901	lines_left = Rows; // avoid more prompt
2902	// don't delay for 'smd' in normal_cmd()
2903	msg_scroll = msg_scroll_save;
2904	}
2905
2906	if (selected > `0` && selected <= sug.su_ga.ga_len && u_save_cursor() == OK) {
2907	// Save the from and to text for :spellrepall.
2908	stp = &SUG(sug.su_ga, selected - `1`);
2909	if (sug.su_badlen > stp->st_orglen) {
2910	// Replacing less than "su_badlen", append the remainder to
2911	// repl_to.
2912	repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen);
2913	vim_snprintf((char )IObuff, IOSIZE, "%s%.s", stp->st_word,
2914	sug.su_badlen - stp->st_orglen,
2915	sug.su_badptr + stp->st_orglen);
2916	repl_to = vim_strsave(IObuff);
2917	} else {
2918	// Replacing su_badlen or more, use the whole word.
2919	repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen);
2920	repl_to = vim_strsave(stp->st_word);
2921	}
2922
2923	// Replace the word.
2924	p = xmalloc(STRLEN(line) - stp->st_orglen + stp->st_wordlen + `1`);
2925	c = (int)(sug.su_badptr - line);
2926	memmove(p, line, c);
2927	STRCPY(p + c, stp->st_word);
2928	STRCAT(p, sug.su_badptr + stp->st_orglen);
2929	ml_replace(curwin->w_cursor.lnum, p, false);
2930	curwin->w_cursor.col = c;
2931
2932	// For redo we use a change-word command.
2933	ResetRedobuff();
2934	AppendToRedobuff("ciw");
2935	AppendToRedobuffLit(p + c,
2936	stp->st_wordlen + sug.su_badlen - stp->st_orglen);
2937	AppendCharToRedobuff(ESC);
2938
2939	// After this "p" may be invalid.
2940	changed_bytes(curwin->w_cursor.lnum, c);
2941	} else
2942	curwin->w_cursor = prev_cursor;
2943
2944	spell_find_cleanup(&sug);
2945	xfree(line);
2946	}
2947
2948	// Check if the word at line "lnum" column "col" is required to start with a
2949	// capital. This uses 'spellcapcheck' of the current buffer.
2950	static bool check_need_cap(linenr_T lnum, colnr_T col)
2951	{
2952	bool need_cap = false;
2953	char_u *line;
2954	char_u *line_copy = NULL;
2955	char_u *p;
2956	colnr_T endcol;
2957	regmatch_T regmatch;
2958
2959	if (curwin->w_s->b_cap_prog == NULL)
2960	return false;
2961
2962	line = get_cursor_line_ptr();
2963	endcol = `0`;
2964	if (getwhitecols(line) >= (int)col) {
2965	// At start of line, check if previous line is empty or sentence
2966	// ends there.
2967	if (lnum == `1`)
2968	need_cap = true;
2969	else {
2970	line = ml_get(lnum - `1`);
2971	if (*skipwhite(line) == NUL)
2972	need_cap = true;
2973	else {
2974	// Append a space in place of the line break.
2975	line_copy = concat_str(line, (char_u *)" ");
2976	line = line_copy;
2977	endcol = (colnr_T)STRLEN(line);
2978	}
2979	}
2980	} else {
2981	endcol = col;
2982	}
2983
2984	if (endcol > `0`) {
2985	// Check if sentence ends before the bad word.
2986	regmatch.regprog = curwin->w_s->b_cap_prog;
2987	regmatch.rm_ic = FALSE;
2988	p = line + endcol;
2989	for (;; ) {
2990	MB_PTR_BACK(line, p);
2991	if (p == line \|\| spell_iswordp_nmw(p, curwin)) {
2992	break;
2993	}
2994	if (vim_regexec(&regmatch, p, `0`)
2995	&& regmatch.endp[`0`] == line + endcol) {
2996	need_cap = true;
2997	break;
2998	}
2999	}
3000	curwin->w_s->b_cap_prog = regmatch.regprog;
3001	}
3002
3003	xfree(line_copy);
3004
3005	return need_cap;
3006	}
3007
3008
3009	// ":spellrepall"
3010	void ex_spellrepall(exarg_T *eap)
3011	{
3012	pos_T pos = curwin->w_cursor;
3013	char_u *frompat;
3014	int addlen;
3015	char_u *line;
3016	char_u *p;
3017	bool save_ws = p_ws;
3018	linenr_T prev_lnum = `0`;
3019
3020	if (repl_from == NULL \|\| repl_to == NULL) {
3021	EMSG(_("E752: No previous spell replacement"));
3022	return;
3023	}
3024	addlen = (int)(STRLEN(repl_to) - STRLEN(repl_from));
3025
3026	frompat = xmalloc(STRLEN(repl_from) + `7`);
3027	sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
3028	p_ws = false;
3029
3030	sub_nsubs = `0`;
3031	sub_nlines = `0`;
3032	curwin->w_cursor.lnum = `0`;
3033	while (!got_int) {
3034	if (do_search(NULL, `'/'`, frompat, `1L`, SEARCH_KEEP, NULL, NULL) == `0`
3035	\|\| u_save_cursor() == FAIL) {
3036	break;
3037	}
3038
3039	// Only replace when the right word isn't there yet. This happens
3040	// when changing "etc" to "etc.".
3041	line = get_cursor_line_ptr();
3042	if (addlen <= `0` \|\| STRNCMP(line + curwin->w_cursor.col,
3043	repl_to, STRLEN(repl_to)) != `0`) {
3044	p = xmalloc(STRLEN(line) + addlen + `1`);
3045	memmove(p, line, curwin->w_cursor.col);
3046	STRCPY(p + curwin->w_cursor.col, repl_to);
3047	STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
3048	ml_replace(curwin->w_cursor.lnum, p, false);
3049	changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
3050
3051	if (curwin->w_cursor.lnum != prev_lnum) {
3052	++sub_nlines;
3053	prev_lnum = curwin->w_cursor.lnum;
3054	}
3055	++sub_nsubs;
3056	}
3057	curwin->w_cursor.col += (colnr_T)STRLEN(repl_to);
3058	}
3059
3060	p_ws = save_ws;
3061	curwin->w_cursor = pos;
3062	xfree(frompat);
3063
3064	if (sub_nsubs == `0`)
3065	EMSG2(_("E753: Not found: %s"), repl_from);
3066	else
3067	do_sub_msg(false);
3068	}
3069
3070	// Find spell suggestions for "word". Return them in the growarray "gap" as*
3071	// a list of allocated strings.
3072	void
3073	spell_suggest_list (
3074	garray_T *gap,
3075	char_u *word,
3076	int maxcount, // maximum nr of suggestions
3077	bool need_cap, // 'spellcapcheck' matched
3078	bool interactive
3079	)
3080	{
3081	suginfo_T sug;
3082	suggest_T *stp;
3083	char_u *wcopy;
3084
3085	spell_find_suggest(word, `0`, &sug, maxcount, false, need_cap, interactive);
3086
3087	// Make room in "gap".
3088	ga_init(gap, sizeof(char_u *), sug.su_ga.ga_len + `1`);
3089	ga_grow(gap, sug.su_ga.ga_len);
3090	for (int i = `0`; i < sug.su_ga.ga_len; ++i) {
3091	stp = &SUG(sug.su_ga, i);
3092
3093	// The suggested word may replace only part of "word", add the not
3094	// replaced part.
3095	wcopy = xmalloc(stp->st_wordlen
3096	+ STRLEN(sug.su_badptr + stp->st_orglen) + `1`);
3097	STRCPY(wcopy, stp->st_word);
3098	STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen);
3099	((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
3100	}
3101
3102	spell_find_cleanup(&sug);
3103	}
3104
3105	// Find spell suggestions for the word at the start of "badptr".
3106	// Return the suggestions in "su->su_ga".
3107	// The maximum number of suggestions is "maxcount".
3108	// Note: does use info for the current window.
3109	// This is based on the mechanisms of Aspell, but completely reimplemented.
3110	static void
3111	spell_find_suggest (
3112	char_u *badptr,
3113	int badlen, // length of bad word or 0 if unknown
3114	suginfo_T *su,
3115	int maxcount,
3116	bool banbadword, // don't include badword in suggestions
3117	bool need_cap, // word should start with capital
3118	bool interactive
3119	)
3120	{
3121	hlf_T attr = HLF_COUNT;
3122	char_u buf[MAXPATHL];
3123	char_u *p;
3124	bool do_combine = false;
3125	char_u *sps_copy;
3126	static bool expr_busy = false;
3127	int c;
3128	langp_T *lp;
3129
3130	// Set the info in "su".*
3131	memset(su, `0`, sizeof(suginfo_T));
3132	ga_init(&su->su_ga, (int)sizeof(suggest_T), `10`);
3133	ga_init(&su->su_sga, (int)sizeof(suggest_T), `10`);
3134	if (*badptr == NUL)
3135	return;
3136	hash_init(&su->su_banned);
3137
3138	su->su_badptr = badptr;
3139	if (badlen != `0`)
3140	su->su_badlen = badlen;
3141	else {
3142	size_t tmplen = spell_check(curwin, su->su_badptr, &attr, NULL, false);
3143	assert(tmplen <= INT_MAX);
3144	su->su_badlen = (int)tmplen;
3145	}
3146	su->su_maxcount = maxcount;
3147	su->su_maxscore = SCORE_MAXINIT;
3148
3149	if (su->su_badlen >= MAXWLEN)
3150	su->su_badlen = MAXWLEN - `1`; // just in case
3151	STRLCPY(su->su_badword, su->su_badptr, su->su_badlen + `1`);
3152	(void)spell_casefold(su->su_badptr, su->su_badlen, su->su_fbadword, MAXWLEN);
3153
3154	// TODO(vim): make this work if the case-folded text is longer than the
3155	// original text. Currently an illegal byte causes wrong pointer
3156	// computations.
3157	su->su_fbadword[su->su_badlen] = NUL;
3158
3159	// get caps flags for bad word
3160	su->su_badflags = badword_captype(su->su_badptr,
3161	su->su_badptr + su->su_badlen);
3162	if (need_cap)
3163	su->su_badflags \|= WF_ONECAP;
3164
3165	// Find the default language for sound folding. We simply use the first
3166	// one in 'spelllang' that supports sound folding. That's good for when
3167	// using multiple files for one language, it's not that bad when mixing
3168	// languages (e.g., "pl,en").
3169	for (int i = `0`; i < curbuf->b_s.b_langp.ga_len; ++i) {
3170	lp = LANGP_ENTRY(curbuf->b_s.b_langp, i);
3171	if (lp->lp_sallang != NULL) {
3172	su->su_sallang = lp->lp_sallang;
3173	break;
3174	}
3175	}
3176
3177	// Soundfold the bad word with the default sound folding, so that we don't
3178	// have to do this many times.
3179	if (su->su_sallang != NULL)
3180	spell_soundfold(su->su_sallang, su->su_fbadword, true,
3181	su->su_sal_badword);
3182
3183	// If the word is not capitalised and spell_check() doesn't consider the
3184	// word to be bad then it might need to be capitalised. Add a suggestion
3185	// for that.
3186	c = PTR2CHAR(su->su_badptr);
3187	if (!SPELL_ISUPPER(c) && attr == HLF_COUNT) {
3188	make_case_word(su->su_badword, buf, WF_ONECAP);
3189	add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
3190	`0`, true, su->su_sallang, false);
3191	}
3192
3193	// Ban the bad word itself. It may appear in another region.
3194	if (banbadword)
3195	add_banned(su, su->su_badword);
3196
3197	// Make a copy of 'spellsuggest', because the expression may change it.
3198	sps_copy = vim_strsave(p_sps);
3199
3200	// Loop over the items in 'spellsuggest'.
3201	for (p = sps_copy; *p != NUL; ) {
3202	copy_option_part(&p, buf, MAXPATHL, ",");
3203
3204	if (STRNCMP(buf, "expr:", `5`) == `0`) {
3205	// Evaluate an expression. Skip this when called recursively,
3206	// when using spellsuggest() in the expression.
3207	if (!expr_busy) {
3208	expr_busy = true;
3209	spell_suggest_expr(su, buf + `5`);
3210	expr_busy = false;
3211	}
3212	} else if (STRNCMP(buf, "file:", `5`) == `0`)
3213	// Use list of suggestions in a file.
3214	spell_suggest_file(su, buf + `5`);
3215	else {
3216	// Use internal method.
3217	spell_suggest_intern(su, interactive);
3218	if (sps_flags & SPS_DOUBLE)
3219	do_combine = true;
3220	}
3221	}
3222
3223	xfree(sps_copy);
3224
3225	if (do_combine)
3226	// Combine the two list of suggestions. This must be done last,
3227	// because sorting changes the order again.
3228	score_combine(su);
3229	}
3230
3231	// Find suggestions by evaluating expression "expr".
3232	static void spell_suggest_expr(suginfo_T su, char_u expr)
3233	{
3234	int score;
3235	const char *p;
3236
3237	// The work is split up in a few parts to avoid having to export
3238	// suginfo_T.
3239	// First evaluate the expression and get the resulting list.
3240	list_T *const list = eval_spell_expr(su->su_badword, expr);
3241	if (list != NULL) {
3242	// Loop over the items in the list.
3243	TV_LIST_ITER(list, li, {
3244	if (TV_LIST_ITEM_TV(li)->v_type == VAR_LIST) {
3245	// Get the word and the score from the items.
3246	score = get_spellword(TV_LIST_ITEM_TV(li)->vval.v_list, &p);
3247	if (score >= `0` && score <= su->su_maxscore) {
3248	add_suggestion(su, &su->su_ga, (const char_u *)p, su->su_badlen,
3249	score, `0`, true, su->su_sallang, false);
3250	}
3251	}
3252	});
3253	tv_list_unref(list);
3254	}
3255
3256	// Remove bogus suggestions, sort and truncate at "maxcount".
3257	check_suggestions(su, &su->su_ga);
3258	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3259	}
3260
3261	// Find suggestions in file "fname". Used for "file:" in 'spellsuggest'.
3262	static void spell_suggest_file(suginfo_T su, char_u fname)
3263	{
3264	FILE *fd;
3265	char_u line[MAXWLEN * `2`];
3266	char_u *p;
3267	int len;
3268	char_u cword[MAXWLEN];
3269
3270	// Open the file.
3271	fd = os_fopen((char *)fname, "r");
3272	if (fd == NULL) {
3273	EMSG2(_(e_notopen), fname);
3274	return;
3275	}
3276
3277	// Read it line by line.
3278	while (!vim_fgets(line, MAXWLEN * `2`, fd) && !got_int) {
3279	line_breakcheck();
3280
3281	p = vim_strchr(line, `'/'`);
3282	if (p == NULL)
3283	continue; // No Tab found, just skip the line.
3284	*p++ = NUL;
3285	if (STRICMP(su->su_badword, line) == `0`) {
3286	// Match! Isolate the good word, until CR or NL.
3287	for (len = `0`; p[len] >= `' '`; ++len)
3288	;
3289	p[len] = NUL;
3290
3291	// If the suggestion doesn't have specific case duplicate the case
3292	// of the bad word.
3293	if (captype(p, NULL) == `0`) {
3294	make_case_word(p, cword, su->su_badflags);
3295	p = cword;
3296	}
3297
3298	add_suggestion(su, &su->su_ga, p, su->su_badlen,
3299	SCORE_FILE, `0`, true, su->su_sallang, false);
3300	}
3301	}
3302
3303	fclose(fd);
3304
3305	// Remove bogus suggestions, sort and truncate at "maxcount".
3306	check_suggestions(su, &su->su_ga);
3307	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3308	}
3309
3310	// Find suggestions for the internal method indicated by "sps_flags".
3311	static void spell_suggest_intern(suginfo_T *su, bool interactive)
3312	{
3313	// Load the .sug file(s) that are available and not done yet.
3314	suggest_load_files();
3315
3316	// 1. Try special cases, such as repeating a word: "the the" -> "the".
3317	//
3318	// Set a maximum score to limit the combination of operations that is
3319	// tried.
3320	suggest_try_special(su);
3321
3322	// 2. Try inserting/deleting/swapping/changing a letter, use REP entries
3323	// from the .aff file and inserting a space (split the word).
3324	suggest_try_change(su);
3325
3326	// For the resulting top-scorers compute the sound-a-like score.
3327	if (sps_flags & SPS_DOUBLE)
3328	score_comp_sal(su);
3329
3330	// 3. Try finding sound-a-like words.
3331	if ((sps_flags & SPS_FAST) == `0`) {
3332	if (sps_flags & SPS_BEST)
3333	// Adjust the word score for the suggestions found so far for how
3334	// they sounds like.
3335	rescore_suggestions(su);
3336
3337	// While going through the soundfold tree "su_maxscore" is the score
3338	// for the soundfold word, limits the changes that are being tried,
3339	// and "su_sfmaxscore" the rescored score, which is set by
3340	// cleanup_suggestions().
3341	// First find words with a small edit distance, because this is much
3342	// faster and often already finds the top-N suggestions. If we didn't
3343	// find many suggestions try again with a higher edit distance.
3344	// "sl_sounddone" is used to avoid doing the same word twice.
3345	suggest_try_soundalike_prep();
3346	su->su_maxscore = SCORE_SFMAX1;
3347	su->su_sfmaxscore = SCORE_MAXINIT * `3`;
3348	suggest_try_soundalike(su);
3349	if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) {
3350	// We didn't find enough matches, try again, allowing more
3351	// changes to the soundfold word.
3352	su->su_maxscore = SCORE_SFMAX2;
3353	suggest_try_soundalike(su);
3354	if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) {
3355	// Still didn't find enough matches, try again, allowing even
3356	// more changes to the soundfold word.
3357	su->su_maxscore = SCORE_SFMAX3;
3358	suggest_try_soundalike(su);
3359	}
3360	}
3361	su->su_maxscore = su->su_sfmaxscore;
3362	suggest_try_soundalike_finish();
3363	}
3364
3365	// When CTRL-C was hit while searching do show the results. Only clear
3366	// got_int when using a command, not for spellsuggest().
3367	os_breakcheck();
3368	if (interactive && got_int) {
3369	(void)vgetc();
3370	got_int = FALSE;
3371	}
3372
3373	if ((sps_flags & SPS_DOUBLE) == `0` && su->su_ga.ga_len != `0`) {
3374	if (sps_flags & SPS_BEST)
3375	// Adjust the word score for how it sounds like.
3376	rescore_suggestions(su);
3377
3378	// Remove bogus suggestions, sort and truncate at "maxcount".
3379	check_suggestions(su, &su->su_ga);
3380	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3381	}
3382	}
3383
3384	// Free the info put in "su" by spell_find_suggest().*
3385	static void spell_find_cleanup(suginfo_T *su)
3386	{
3387	# define FREE_SUG_WORD(sug) xfree(sug->st_word)
3388	// Free the suggestions.
3389	GA_DEEP_CLEAR(&su->su_ga, suggest_T, FREE_SUG_WORD);
3390	GA_DEEP_CLEAR(&su->su_sga, suggest_T, FREE_SUG_WORD);
3391
3392	// Free the banned words.
3393	hash_clear_all(&su->su_banned, `0`);
3394	}
3395
3396	/// Make a copy of "word", with the first letter upper or lower cased, to
3397	/// "wcopy[MAXWLEN]". "word" must not be empty.
3398	/// The result is NUL terminated.
3399	///
3400	/// @param[in] word source string to copy
3401	/// @param[in,out] wcopy copied string, with case of first letter changed
3402	/// @param[in] upper True to upper case, otherwise lower case
3403	void onecap_copy(char_u word, char_u wcopy, bool upper)
3404	{
3405	char_u *p;
3406	int c;
3407	int l;
3408
3409	p = word;
3410	if (has_mbyte) {
3411	c = mb_cptr2char_adv((const char_u **)&p);
3412	} else {
3413	c = *p++;
3414	}
3415	if (upper) {
3416	c = SPELL_TOUPPER(c);
3417	} else {
3418	c = SPELL_TOFOLD(c);
3419	}
3420	l = utf_char2bytes(c, wcopy);
3421	STRLCPY(wcopy + l, p, MAXWLEN - l);
3422	}
3423
3424	// Make a copy of "word" with all the letters upper cased into
3425	// "wcopy[MAXWLEN]". The result is NUL terminated.
3426	static void allcap_copy(char_u word, char_u wcopy)
3427	{
3428	char_u *s;
3429	char_u *d;
3430	int c;
3431
3432	d = wcopy;
3433	for (s = word; *s != NUL; ) {
3434	if (has_mbyte) {
3435	c = mb_cptr2char_adv((const char_u **)&s);
3436	} else {
3437	c = *s++;
3438	}
3439
3440	if (c == `0xdf`) {
3441	c = `'S'`;
3442	if (d - wcopy >= MAXWLEN - `1`)
3443	break;
3444	*d++ = c;
3445	} else
3446	c = SPELL_TOUPPER(c);
3447
3448	if (d - wcopy >= MAXWLEN - MB_MAXBYTES) {
3449	break;
3450	}
3451	d += utf_char2bytes(c, d);
3452	}
3453	*d = NUL;
3454	}
3455
3456	// Try finding suggestions by recognizing specific situations.
3457	static void suggest_try_special(suginfo_T *su)
3458	{
3459	char_u *p;
3460	size_t len;
3461	int c;
3462	char_u word[MAXWLEN];
3463
3464	// Recognize a word that is repeated: "the the".
3465	p = skiptowhite(su->su_fbadword);
3466	len = p - su->su_fbadword;
3467	p = skipwhite(p);
3468	if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == `0`) {
3469	// Include badflags: if the badword is onecap or allcap
3470	// use that for the goodword too: "The the" -> "The".
3471	c = su->su_fbadword[len];
3472	su->su_fbadword[len] = NUL;
3473	make_case_word(su->su_fbadword, word, su->su_badflags);
3474	su->su_fbadword[len] = c;
3475
3476	// Give a soundalike score of 0, compute the score as if deleting one
3477	// character.
3478	add_suggestion(su, &su->su_ga, word, su->su_badlen,
3479	RESCORE(SCORE_REP, `0`), `0`, true, su->su_sallang, false);
3480	}
3481	}
3482
3483	// Measure how much time is spent in each state.
3484	// Output is dumped in "suggestprof".
3485
3486	#ifdef SUGGEST_PROFILE
3487	proftime_T current;
3488	proftime_T total;
3489	proftime_T times[STATE_FINAL + `1`];
3490	long counts[STATE_FINAL + `1`];
3491
3492	static void
3493	prof_init(void)
3494	{
3495	for (int i = `0`; i <= STATE_FINAL; i++) {
3496	profile_zero(&times[i]);
3497	counts[i] = `0`;
3498	}
3499	profile_start(&current);
3500	profile_start(&total);
3501	}
3502
3503	// call before changing state
3504	static void
3505	prof_store(state_T state)
3506	{
3507	profile_end(&current);
3508	profile_add(&times[state], &current);
3509	counts[state]++;
3510	profile_start(&current);
3511	}
3512	# define PROF_STORE(state) prof_store(state);
3513
3514	static void
3515	prof_report(char *name)
3516	{
3517	FILE *fd = fopen("suggestprof", "a");
3518
3519	profile_end(&total);
3520	fprintf(fd, "-----------------------\n");
3521	fprintf(fd, "%s: %s\n", name, profile_msg(&total));
3522	for (int i = `0`; i <= STATE_FINAL; i++) {
3523	fprintf(fd, "%d: %s ("%" PRId64)\n", i, profile_msg(&times[i]), counts[i]);
3524	}
3525	fclose(fd);
3526	}
3527	#else
3528	# define PROF_STORE(state)
3529	#endif
3530
3531	// Try finding suggestions by adding/removing/swapping letters.
3532
3533	static void suggest_try_change(suginfo_T *su)
3534	{
3535	char_u fword[MAXWLEN]; // copy of the bad word, case-folded
3536	int n;
3537	char_u *p;
3538	langp_T *lp;
3539
3540	// We make a copy of the case-folded bad word, so that we can modify it
3541	// to find matches (esp. REP items). Append some more text, changing
3542	// chars after the bad word may help.
3543	STRCPY(fword, su->su_fbadword);
3544	n = (int)STRLEN(fword);
3545	p = su->su_badptr + su->su_badlen;
3546	(void)spell_casefold(p, (int)STRLEN(p), fword + n, MAXWLEN - n);
3547
3548	for (int lpi = `0`; lpi < curwin->w_s->b_langp.ga_len; ++lpi) {
3549	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
3550
3551	// If reloading a spell file fails it's still in the list but
3552	// everything has been cleared.
3553	if (lp->lp_slang->sl_fbyts == NULL)
3554	continue;
3555
3556	// Try it for this language. Will add possible suggestions.
3557	//
3558	#ifdef SUGGEST_PROFILE
3559	prof_init();
3560	#endif
3561	suggest_trie_walk(su, lp, fword, false);
3562	#ifdef SUGGEST_PROFILE
3563	prof_report("try_change");
3564	#endif
3565	}
3566	}
3567
3568	// Check the maximum score, if we go over it we won't try this change.
3569	#define TRY_DEEPER(su, stack, depth, add) \
3570	(stack[depth].ts_score + (add) < su->su_maxscore)
3571
3572	// Try finding suggestions by adding/removing/swapping letters.
3573	//
3574	// This uses a state machine. At each node in the tree we try various
3575	// operations. When trying if an operation works "depth" is increased and the
3576	// stack[] is used to store info. This allows combinations, thus insert one
3577	// character, replace one and delete another. The number of changes is
3578	// limited by su->su_maxscore.
3579	//
3580	// After implementing this I noticed an article by Kemal Oflazer that
3581	// describes something similar: "Error-tolerant Finite State Recognition with
3582	// Applications to Morphological Analysis and Spelling Correction" (1996).
3583	// The implementation in the article is simplified and requires a stack of
3584	// unknown depth. The implementation here only needs a stack depth equal to
3585	// the length of the word.
3586	//
3587	// This is also used for the sound-folded word, "soundfold" is true then.
3588	// The mechanism is the same, but we find a match with a sound-folded word
3589	// that comes from one or more original words. Each of these words may be
3590	// added, this is done by add_sound_suggest().
3591	// Don't use:
3592	// the prefix tree or the keep-case tree
3593	// "su->su_badlen"
3594	// anything to do with upper and lower case
3595	// anything to do with word or non-word characters ("spell_iswordp()")
3596	// banned words
3597	// word flags (rare, region, compounding)
3598	// word splitting for now
3599	// "similar_chars()"
3600	// use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep"
3601	static void suggest_trie_walk(suginfo_T su, langp_T lp, char_u *fword, bool soundfold)
3602	{
3603	char_u tword[MAXWLEN]; // good word collected so far
3604	trystate_T stack[MAXWLEN];
3605	char_u preword[MAXWLEN * `3`]; // word found with proper case;
3606	// concatenation of prefix compound
3607	// words and split word. NUL terminated
3608	// when going deeper but not when coming
3609	// back.
3610	char_u compflags[MAXWLEN]; // compound flags, one for each word
3611	trystate_T *sp;
3612	int newscore;
3613	int score;
3614	char_u byts, fbyts, *pbyts;
3615	idx_T idxs, fidxs, *pidxs;
3616	int depth;
3617	int c, c2, c3;
3618	int n = `0`;
3619	int flags;
3620	garray_T *gap;
3621	idx_T arridx;
3622	int len;
3623	char_u *p;
3624	fromto_T *ftp;
3625	int fl = `0`, tl;
3626	int repextra = `0`; // extra bytes in fword[] from REP item
3627	slang_T *slang = lp->lp_slang;
3628	int fword_ends;
3629	bool goodword_ends;
3630	#ifdef DEBUG_TRIEWALK
3631	// Stores the name of the change made at each level.
3632	char_u changename[MAXWLEN][`80`];
3633	#endif
3634	int breakcheckcount = `1000`;
3635	bool compound_ok;
3636
3637	// Go through the whole case-fold tree, try changes at each node.
3638	// "tword[]" contains the word collected from nodes in the tree.
3639	// "fword[]" the word we are trying to match with (initially the bad
3640	// word).
3641	depth = `0`;
3642	sp = &stack[`0`];
3643	memset(sp, `0`, sizeof(trystate_T)); // -V512
3644	sp->ts_curi = `1`;
3645
3646	if (soundfold) {
3647	// Going through the soundfold tree.
3648	byts = fbyts = slang->sl_sbyts;
3649	idxs = fidxs = slang->sl_sidxs;
3650	pbyts = NULL;
3651	pidxs = NULL;
3652	sp->ts_prefixdepth = PFD_NOPREFIX;
3653	sp->ts_state = STATE_START;
3654	} else {
3655	// When there are postponed prefixes we need to use these first. At
3656	// the end of the prefix we continue in the case-fold tree.
3657	fbyts = slang->sl_fbyts;
3658	fidxs = slang->sl_fidxs;
3659	pbyts = slang->sl_pbyts;
3660	pidxs = slang->sl_pidxs;
3661	if (pbyts != NULL) {
3662	byts = pbyts;
3663	idxs = pidxs;
3664	sp->ts_prefixdepth = PFD_PREFIXTREE;
3665	sp->ts_state = STATE_NOPREFIX; // try without prefix first
3666	} else {
3667	byts = fbyts;
3668	idxs = fidxs;
3669	sp->ts_prefixdepth = PFD_NOPREFIX;
3670	sp->ts_state = STATE_START;
3671	}
3672	}
3673
3674	// Loop to find all suggestions. At each round we either:
3675	// - For the current state try one operation, advance "ts_curi",
3676	// increase "depth".
3677	// - When a state is done go to the next, set "ts_state".
3678	// - When all states are tried decrease "depth".
3679	while (depth >= `0` && !got_int) {
3680	sp = &stack[depth];
3681	switch (sp->ts_state) {
3682	case STATE_START:
3683	case STATE_NOPREFIX:
3684	// Start of node: Deal with NUL bytes, which means
3685	// tword[] may end here.
3686	arridx = sp->ts_arridx; // current node in the tree
3687	len = byts[arridx]; // bytes in this node
3688	arridx += sp->ts_curi; // index of current byte
3689
3690	if (sp->ts_prefixdepth == PFD_PREFIXTREE) {
3691	// Skip over the NUL bytes, we use them later.
3692	for (n = `0`; n < len && byts[arridx + n] == `0`; ++n)
3693	;
3694	sp->ts_curi += n;
3695
3696	// Always past NUL bytes now.
3697	n = (int)sp->ts_state;
3698	PROF_STORE(sp->ts_state)
3699	sp->ts_state = STATE_ENDNUL;
3700	sp->ts_save_badflags = su->su_badflags;
3701
3702	// At end of a prefix or at start of prefixtree: check for
3703	// following word.
3704	if (byts[arridx] == `0` \|\| n == (int)STATE_NOPREFIX) {
3705	// Set su->su_badflags to the caps type at this position.
3706	// Use the caps type until here for the prefix itself.
3707	if (has_mbyte)
3708	n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
3709	else
3710	n = sp->ts_fidx;
3711	flags = badword_captype(su->su_badptr, su->su_badptr + n);
3712	su->su_badflags = badword_captype(su->su_badptr + n,
3713	su->su_badptr + su->su_badlen);
3714	#ifdef DEBUG_TRIEWALK
3715	sprintf(changename[depth], "prefix");
3716	#endif
3717	go_deeper(stack, depth, `0`);
3718	++depth;
3719	sp = &stack[depth];
3720	sp->ts_prefixdepth = depth - `1`;
3721	byts = fbyts;
3722	idxs = fidxs;
3723	sp->ts_arridx = `0`;
3724
3725	// Move the prefix to preword[] with the right case
3726	// and make find_keepcap_word() works.
3727	tword[sp->ts_twordlen] = NUL;
3728	make_case_word(tword + sp->ts_splitoff,
3729	preword + sp->ts_prewordlen, flags);
3730	sp->ts_prewordlen = (char_u)STRLEN(preword);
3731	sp->ts_splitoff = sp->ts_twordlen;
3732	}
3733	break;
3734	}
3735
3736	if (sp->ts_curi > len \|\| byts[arridx] != `0`) {
3737	// Past bytes in node and/or past NUL bytes.
3738	PROF_STORE(sp->ts_state)
3739	sp->ts_state = STATE_ENDNUL;
3740	sp->ts_save_badflags = su->su_badflags;
3741	break;
3742	}
3743
3744	// End of word in tree.
3745	++sp->ts_curi; // eat one NUL byte
3746
3747	flags = (int)idxs[arridx];
3748
3749	// Skip words with the NOSUGGEST flag.
3750	if (flags & WF_NOSUGGEST)
3751	break;
3752
3753	fword_ends = (fword[sp->ts_fidx] == NUL
3754	\|\| (soundfold
3755	? ascii_iswhite(fword[sp->ts_fidx])
3756	: !spell_iswordp(fword + sp->ts_fidx, curwin)));
3757	tword[sp->ts_twordlen] = NUL;
3758
3759	if (sp->ts_prefixdepth <= PFD_NOTSPECIAL
3760	&& (sp->ts_flags & TSF_PREFIXOK) == `0`) {
3761	// There was a prefix before the word. Check that the prefix
3762	// can be used with this word.
3763	// Count the length of the NULs in the prefix. If there are
3764	// none this must be the first try without a prefix.
3765	n = stack[sp->ts_prefixdepth].ts_arridx;
3766	len = pbyts[n++];
3767	for (c = `0`; c < len && pbyts[n + c] == `0`; ++c)
3768	;
3769	if (c > `0`) {
3770	c = valid_word_prefix(c, n, flags,
3771	tword + sp->ts_splitoff, slang, false);
3772	if (c == `0`)
3773	break;
3774
3775	// Use the WF_RARE flag for a rare prefix.
3776	if (c & WF_RAREPFX)
3777	flags \|= WF_RARE;
3778
3779	// Tricky: when checking for both prefix and compounding
3780	// we run into the prefix flag first.
3781	// Remember that it's OK, so that we accept the prefix
3782	// when arriving at a compound flag.
3783	sp->ts_flags \|= TSF_PREFIXOK;
3784	}
3785	}
3786
3787	// Check NEEDCOMPOUND: can't use word without compounding. Do try
3788	// appending another compound word below.
3789	if (sp->ts_complen == sp->ts_compsplit && fword_ends
3790	&& (flags & WF_NEEDCOMP))
3791	goodword_ends = false;
3792	else
3793	goodword_ends = true;
3794
3795	p = NULL;
3796	compound_ok = true;
3797	if (sp->ts_complen > sp->ts_compsplit) {
3798	if (slang->sl_nobreak) {
3799	// There was a word before this word. When there was no
3800	// change in this word (it was correct) add the first word
3801	// as a suggestion. If this word was corrected too, we
3802	// need to check if a correct word follows.
3803	if (sp->ts_fidx - sp->ts_splitfidx
3804	== sp->ts_twordlen - sp->ts_splitoff
3805	&& STRNCMP(fword + sp->ts_splitfidx,
3806	tword + sp->ts_splitoff,
3807	sp->ts_fidx - sp->ts_splitfidx) == `0`) {
3808	preword[sp->ts_prewordlen] = NUL;
3809	newscore = score_wordcount_adj(slang, sp->ts_score,
3810	preword + sp->ts_prewordlen,
3811	sp->ts_prewordlen > `0`);
3812	// Add the suggestion if the score isn't too bad.
3813	if (newscore <= su->su_maxscore)
3814	add_suggestion(su, &su->su_ga, preword,
3815	sp->ts_splitfidx - repextra,
3816	newscore, `0`, false,
3817	lp->lp_sallang, false);
3818	break;
3819	}
3820	} else {
3821	// There was a compound word before this word. If this
3822	// word does not support compounding then give up
3823	// (splitting is tried for the word without compound
3824	// flag).
3825	if (((unsigned)flags >> `24`) == `0`
3826	\|\| sp->ts_twordlen - sp->ts_splitoff
3827	< slang->sl_compminlen)
3828	break;
3829	// For multi-byte chars check character length against
3830	// COMPOUNDMIN.
3831	if (has_mbyte
3832	&& slang->sl_compminlen > `0`
3833	&& mb_charlen(tword + sp->ts_splitoff)
3834	< slang->sl_compminlen)
3835	break;
3836
3837	compflags[sp->ts_complen] = ((unsigned)flags >> `24`);
3838	compflags[sp->ts_complen + `1`] = NUL;
3839	STRLCPY(preword + sp->ts_prewordlen,
3840	tword + sp->ts_splitoff,
3841	sp->ts_twordlen - sp->ts_splitoff + `1`);
3842
3843	// Verify CHECKCOMPOUNDPATTERN rules.
3844	if (match_checkcompoundpattern(preword, sp->ts_prewordlen,
3845	&slang->sl_comppat))
3846	compound_ok = false;
3847
3848	if (compound_ok) {
3849	p = preword;
3850	while (*skiptowhite(p) != NUL)
3851	p = skipwhite(skiptowhite(p));
3852	if (fword_ends && !can_compound(slang, p,
3853	compflags + sp->ts_compsplit))
3854	// Compound is not allowed. But it may still be
3855	// possible if we add another (short) word.
3856	compound_ok = false;
3857	}
3858
3859	// Get pointer to last char of previous word.
3860	p = preword + sp->ts_prewordlen;
3861	MB_PTR_BACK(preword, p);
3862	}
3863	}
3864
3865	// Form the word with proper case in preword.
3866	// If there is a word from a previous split, append.
3867	// For the soundfold tree don't change the case, simply append.
3868	if (soundfold)
3869	STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff);
3870	else if (flags & WF_KEEPCAP)
3871	// Must find the word in the keep-case tree.
3872	find_keepcap_word(slang, tword + sp->ts_splitoff,
3873	preword + sp->ts_prewordlen);
3874	else {
3875	// Include badflags: If the badword is onecap or allcap
3876	// use that for the goodword too. But if the badword is
3877	// allcap and it's only one char long use onecap.
3878	c = su->su_badflags;
3879	if ((c & WF_ALLCAP)
3880	&& su->su_badlen == (*mb_ptr2len)(su->su_badptr)
3881	)
3882	c = WF_ONECAP;
3883	c \|= flags;
3884
3885	// When appending a compound word after a word character don't
3886	// use Onecap.
3887	if (p != NULL && spell_iswordp_nmw(p, curwin))
3888	c &= ~WF_ONECAP;
3889	make_case_word(tword + sp->ts_splitoff,
3890	preword + sp->ts_prewordlen, c);
3891	}
3892
3893	if (!soundfold) {
3894	// Don't use a banned word. It may appear again as a good
3895	// word, thus remember it.
3896	if (flags & WF_BANNED) {
3897	add_banned(su, preword + sp->ts_prewordlen);
3898	break;
3899	}
3900	if ((sp->ts_complen == sp->ts_compsplit
3901	&& WAS_BANNED(su, preword + sp->ts_prewordlen))
3902	\|\| WAS_BANNED(su, preword)) {
3903	if (slang->sl_compprog == NULL)
3904	break;
3905	// the word so far was banned but we may try compounding
3906	goodword_ends = false;
3907	}
3908	}
3909
3910	newscore = `0`;
3911	if (!soundfold) { // soundfold words don't have flags
3912	if ((flags & WF_REGION)
3913	&& (((unsigned)flags >> `16`) & lp->lp_region) == `0`)
3914	newscore += SCORE_REGION;
3915	if (flags & WF_RARE)
3916	newscore += SCORE_RARE;
3917
3918	if (!spell_valid_case(su->su_badflags,
3919	captype(preword + sp->ts_prewordlen, NULL)))
3920	newscore += SCORE_ICASE;
3921	}
3922
3923	// TODO: how about splitting in the soundfold tree?
3924	if (fword_ends
3925	&& goodword_ends
3926	&& sp->ts_fidx >= sp->ts_fidxtry
3927	&& compound_ok) {
3928	// The badword also ends: add suggestions.
3929	#ifdef DEBUG_TRIEWALK
3930	if (soundfold && STRCMP(preword, "smwrd") == `0`) {
3931	int j;
3932
3933	// print the stack of changes that brought us here
3934	smsg("------ %s -------", fword);
3935	for (j = `0`; j < depth; ++j)
3936	smsg("%s", changename[j]);
3937	}
3938	#endif
3939	if (soundfold) {
3940	// For soundfolded words we need to find the original
3941	// words, the edit distance and then add them.
3942	add_sound_suggest(su, preword, sp->ts_score, lp);
3943	} else if (sp->ts_fidx > `0`) {
3944	// Give a penalty when changing non-word char to word
3945	// char, e.g., "thes," -> "these".
3946	p = fword + sp->ts_fidx;
3947	MB_PTR_BACK(fword, p);
3948	if (!spell_iswordp(p, curwin)) {
3949	p = preword + STRLEN(preword);
3950	MB_PTR_BACK(preword, p);
3951	if (spell_iswordp(p, curwin)) {
3952	newscore += SCORE_NONWORD;
3953	}
3954	}
3955
3956	// Give a bonus to words seen before.
3957	score = score_wordcount_adj(slang,
3958	sp->ts_score + newscore,
3959	preword + sp->ts_prewordlen,
3960	sp->ts_prewordlen > `0`);
3961
3962	// Add the suggestion if the score isn't too bad.
3963	if (score <= su->su_maxscore) {
3964	add_suggestion(su, &su->su_ga, preword,
3965	sp->ts_fidx - repextra,
3966	score, `0`, false, lp->lp_sallang, false);
3967
3968	if (su->su_badflags & WF_MIXCAP) {
3969	// We really don't know if the word should be
3970	// upper or lower case, add both.
3971	c = captype(preword, NULL);
3972	if (c == `0` \|\| c == WF_ALLCAP) {
3973	make_case_word(tword + sp->ts_splitoff,
3974	preword + sp->ts_prewordlen,
3975	c == `0` ? WF_ALLCAP : `0`);
3976
3977	add_suggestion(su, &su->su_ga, preword,
3978	sp->ts_fidx - repextra,
3979	score + SCORE_ICASE, `0`, false,
3980	lp->lp_sallang, false);
3981	}
3982	}
3983	}
3984	}
3985	}
3986
3987	// Try word split and/or compounding.
3988	if ((sp->ts_fidx >= sp->ts_fidxtry \|\| fword_ends)
3989	// Don't split in the middle of a character
3990	&& (!has_mbyte \|\| sp->ts_tcharlen == `0`)
3991	) {
3992	bool try_compound;
3993	int try_split;
3994
3995	// If past the end of the bad word don't try a split.
3996	// Otherwise try changing the next word. E.g., find
3997	// suggestions for "the the" where the second "the" is
3998	// different. It's done like a split.
3999	// TODO: word split for soundfold words
4000	try_split = (sp->ts_fidx - repextra < su->su_badlen)
4001	&& !soundfold;
4002
4003	// Get here in several situations:
4004	// 1. The word in the tree ends:
4005	// If the word allows compounding try that. Otherwise try
4006	// a split by inserting a space. For both check that a
4007	// valid words starts at fword[sp->ts_fidx].
4008	// For NOBREAK do like compounding to be able to check if
4009	// the next word is valid.
4010	// 2. The badword does end, but it was due to a change (e.g.,
4011	// a swap). No need to split, but do check that the
4012	// following word is valid.
4013	// 3. The badword and the word in the tree end. It may still
4014	// be possible to compound another (short) word.
4015	try_compound = false;
4016	if (!soundfold
4017	&& !slang->sl_nocompoundsugs
4018	&& slang->sl_compprog != NULL
4019	&& ((unsigned)flags >> `24`) != `0`
4020	&& sp->ts_twordlen - sp->ts_splitoff
4021	>= slang->sl_compminlen
4022	&& (!has_mbyte
4023	\|\| slang->sl_compminlen == `0`
4024	\|\| mb_charlen(tword + sp->ts_splitoff)
4025	>= slang->sl_compminlen)
4026	&& (slang->sl_compsylmax < MAXWLEN
4027	\|\| sp->ts_complen + `1` - sp->ts_compsplit
4028	< slang->sl_compmax)
4029	&& (can_be_compound(sp, slang,
4030	compflags, ((unsigned)flags >> `24`)))) {
4031	try_compound = true;
4032	compflags[sp->ts_complen] = ((unsigned)flags >> `24`);
4033	compflags[sp->ts_complen + `1`] = NUL;
4034	}
4035
4036	// For NOBREAK we never try splitting, it won't make any word
4037	// valid.
4038	if (slang->sl_nobreak && !slang->sl_nocompoundsugs) {
4039	try_compound = true;
4040	} else if (!fword_ends
4041	&& try_compound
4042	&& (sp->ts_flags & TSF_DIDSPLIT) == `0`) {
4043	// If we could add a compound word, and it's also possible to
4044	// split at this point, do the split first and set
4045	// TSF_DIDSPLIT to avoid doing it again.
4046	try_compound = false;
4047	sp->ts_flags \|= TSF_DIDSPLIT;
4048	--sp->ts_curi; // do the same NUL again
4049	compflags[sp->ts_complen] = NUL;
4050	} else {
4051	sp->ts_flags &= ~TSF_DIDSPLIT;
4052	}
4053
4054	if (try_split \|\| try_compound) {
4055	if (!try_compound && (!fword_ends \|\| !goodword_ends)) {
4056	// If we're going to split need to check that the
4057	// words so far are valid for compounding. If there
4058	// is only one word it must not have the NEEDCOMPOUND
4059	// flag.
4060	if (sp->ts_complen == sp->ts_compsplit
4061	&& (flags & WF_NEEDCOMP))
4062	break;
4063	p = preword;
4064	while (*skiptowhite(p) != NUL)
4065	p = skipwhite(skiptowhite(p));
4066	if (sp->ts_complen > sp->ts_compsplit
4067	&& !can_compound(slang, p,
4068	compflags + sp->ts_compsplit))
4069	break;
4070
4071	if (slang->sl_nosplitsugs)
4072	newscore += SCORE_SPLIT_NO;
4073	else
4074	newscore += SCORE_SPLIT;
4075
4076	// Give a bonus to words seen before.
4077	newscore = score_wordcount_adj(slang, newscore,
4078	preword + sp->ts_prewordlen, true);
4079	}
4080
4081	if (TRY_DEEPER(su, stack, depth, newscore)) {
4082	go_deeper(stack, depth, newscore);
4083	#ifdef DEBUG_TRIEWALK
4084	if (!try_compound && !fword_ends)
4085	sprintf(changename[depth], "%.*s-%s: split",
4086	sp->ts_twordlen, tword, fword + sp->ts_fidx);
4087	else
4088	sprintf(changename[depth], "%.*s-%s: compound",
4089	sp->ts_twordlen, tword, fword + sp->ts_fidx);
4090	#endif
4091	// Save things to be restored at STATE_SPLITUNDO.
4092	sp->ts_save_badflags = su->su_badflags;
4093	PROF_STORE(sp->ts_state)
4094	sp->ts_state = STATE_SPLITUNDO;
4095
4096	++depth;
4097	sp = &stack[depth];
4098
4099	// Append a space to preword when splitting.
4100	if (!try_compound && !fword_ends)
4101	STRCAT(preword, " ");
4102	sp->ts_prewordlen = (char_u)STRLEN(preword);
4103	sp->ts_splitoff = sp->ts_twordlen;
4104	sp->ts_splitfidx = sp->ts_fidx;
4105
4106	// If the badword has a non-word character at this
4107	// position skip it. That means replacing the
4108	// non-word character with a space. Always skip a
4109	// character when the word ends. But only when the
4110	// good word can end.
4111	if (((!try_compound && !spell_iswordp_nmw(fword
4112	+ sp->ts_fidx,
4113	curwin))
4114	\|\| fword_ends)
4115	&& fword[sp->ts_fidx] != NUL
4116	&& goodword_ends) {
4117	int l;
4118
4119	l = MB_PTR2LEN(fword + sp->ts_fidx);
4120	if (fword_ends) {
4121	// Copy the skipped character to preword.
4122	memmove(preword + sp->ts_prewordlen,
4123	fword + sp->ts_fidx, l);
4124	sp->ts_prewordlen += l;
4125	preword[sp->ts_prewordlen] = NUL;
4126	} else
4127	sp->ts_score -= SCORE_SPLIT - SCORE_SUBST;
4128	sp->ts_fidx += l;
4129	}
4130
4131	// When compounding include compound flag in
4132	// compflags[] (already set above). When splitting we
4133	// may start compounding over again.
4134	if (try_compound)
4135	++sp->ts_complen;
4136	else
4137	sp->ts_compsplit = sp->ts_complen;
4138	sp->ts_prefixdepth = PFD_NOPREFIX;
4139
4140	// set su->su_badflags to the caps type at this
4141	// position
4142	if (has_mbyte)
4143	n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
4144	else
4145	n = sp->ts_fidx;
4146	su->su_badflags = badword_captype(su->su_badptr + n,
4147	su->su_badptr + su->su_badlen);
4148
4149	// Restart at top of the tree.
4150	sp->ts_arridx = `0`;
4151
4152	// If there are postponed prefixes, try these too.
4153	if (pbyts != NULL) {
4154	byts = pbyts;
4155	idxs = pidxs;
4156	sp->ts_prefixdepth = PFD_PREFIXTREE;
4157	PROF_STORE(sp->ts_state)
4158	sp->ts_state = STATE_NOPREFIX;
4159	}
4160	}
4161	}
4162	}
4163	break;
4164
4165	case STATE_SPLITUNDO:
4166	// Undo the changes done for word split or compound word.
4167	su->su_badflags = sp->ts_save_badflags;
4168
4169	// Continue looking for NUL bytes.
4170	PROF_STORE(sp->ts_state)
4171	sp->ts_state = STATE_START;
4172
4173	// In case we went into the prefix tree.
4174	byts = fbyts;
4175	idxs = fidxs;
4176	break;
4177
4178	case STATE_ENDNUL:
4179	// Past the NUL bytes in the node.
4180	su->su_badflags = sp->ts_save_badflags;
4181	if (fword[sp->ts_fidx] == NUL
4182	&& sp->ts_tcharlen == `0`
4183	) {
4184	// The badword ends, can't use STATE_PLAIN.
4185	PROF_STORE(sp->ts_state)
4186	sp->ts_state = STATE_DEL;
4187	break;
4188	}
4189	PROF_STORE(sp->ts_state)
4190	sp->ts_state = STATE_PLAIN;
4191	FALLTHROUGH;
4192
4193	case STATE_PLAIN:
4194	// Go over all possible bytes at this node, add each to tword[]
4195	// and use child node. "ts_curi" is the index.
4196	arridx = sp->ts_arridx;
4197	if (sp->ts_curi > byts[arridx]) {
4198	// Done all bytes at this node, do next state. When still at
4199	// already changed bytes skip the other tricks.
4200	PROF_STORE(sp->ts_state)
4201	if (sp->ts_fidx >= sp->ts_fidxtry) {
4202	sp->ts_state = STATE_DEL;
4203	} else {
4204	sp->ts_state = STATE_FINAL;
4205	}
4206	} else {
4207	arridx += sp->ts_curi++;
4208	c = byts[arridx];
4209
4210	// Normal byte, go one level deeper. If it's not equal to the
4211	// byte in the bad word adjust the score. But don't even try
4212	// when the byte was already changed. And don't try when we
4213	// just deleted this byte, accepting it is always cheaper than
4214	// delete + substitute.
4215	if (c == fword[sp->ts_fidx]
4216	\|\| (sp->ts_tcharlen > `0` && sp->ts_isdiff != DIFF_NONE)
4217	)
4218	newscore = `0`;
4219	else
4220	newscore = SCORE_SUBST;
4221	if ((newscore == `0`
4222	\|\| (sp->ts_fidx >= sp->ts_fidxtry
4223	&& ((sp->ts_flags & TSF_DIDDEL) == `0`
4224	\|\| c != fword[sp->ts_delidx])))
4225	&& TRY_DEEPER(su, stack, depth, newscore)) {
4226	go_deeper(stack, depth, newscore);
4227	#ifdef DEBUG_TRIEWALK
4228	if (newscore > `0`)
4229	sprintf(changename[depth], "%.*s-%s: subst %c to %c",
4230	sp->ts_twordlen, tword, fword + sp->ts_fidx,
4231	fword[sp->ts_fidx], c);
4232	else
4233	sprintf(changename[depth], "%.*s-%s: accept %c",
4234	sp->ts_twordlen, tword, fword + sp->ts_fidx,
4235	fword[sp->ts_fidx]);
4236	#endif
4237	++depth;
4238	sp = &stack[depth];
4239	++sp->ts_fidx;
4240	tword[sp->ts_twordlen++] = c;
4241	sp->ts_arridx = idxs[arridx];
4242	if (newscore == SCORE_SUBST)
4243	sp->ts_isdiff = DIFF_YES;
4244	if (has_mbyte) {
4245	// Multi-byte characters are a bit complicated to
4246	// handle: They differ when any of the bytes differ
4247	// and then their length may also differ.
4248	if (sp->ts_tcharlen == `0`) {
4249	// First byte.
4250	sp->ts_tcharidx = `0`;
4251	sp->ts_tcharlen = MB_BYTE2LEN(c);
4252	sp->ts_fcharstart = sp->ts_fidx - `1`;
4253	sp->ts_isdiff = (newscore != `0`)
4254	? DIFF_YES : DIFF_NONE;
4255	} else if (sp->ts_isdiff == DIFF_INSERT)
4256	// When inserting trail bytes don't advance in the
4257	// bad word.
4258	--sp->ts_fidx;
4259	if (++sp->ts_tcharidx == sp->ts_tcharlen) {
4260	// Last byte of character.
4261	if (sp->ts_isdiff == DIFF_YES) {
4262	// Correct ts_fidx for the byte length of the
4263	// character (we didn't check that before).
4264	sp->ts_fidx = sp->ts_fcharstart
4265	+ MB_PTR2LEN(fword + sp->ts_fcharstart);
4266
4267	// For changing a composing character adjust
4268	// the score from SCORE_SUBST to
4269	// SCORE_SUBCOMP.
4270	if (enc_utf8
4271	&& utf_iscomposing(utf_ptr2char(tword + sp->ts_twordlen
4272	- sp->ts_tcharlen))
4273	&& utf_iscomposing(utf_ptr2char(fword
4274	+ sp->ts_fcharstart))) {
4275	sp->ts_score -= SCORE_SUBST - SCORE_SUBCOMP;
4276	} else if (
4277	!soundfold
4278	&& slang->sl_has_map
4279	&& similar_chars(
4280	slang,
4281	utf_ptr2char(tword + sp->ts_twordlen - sp->ts_tcharlen),
4282	utf_ptr2char(fword + sp->ts_fcharstart))) {
4283	// For a similar character adjust score from
4284	// SCORE_SUBST to SCORE_SIMILAR.
4285	sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
4286	}
4287	} else if (sp->ts_isdiff == DIFF_INSERT
4288	&& sp->ts_twordlen > sp->ts_tcharlen) {
4289	p = tword + sp->ts_twordlen - sp->ts_tcharlen;
4290	c = utf_ptr2char(p);
4291	if (utf_iscomposing(c)) {
4292	// Inserting a composing char doesn't
4293	// count that much.
4294	sp->ts_score -= SCORE_INS - SCORE_INSCOMP;
4295	} else {
4296	// If the previous character was the same,
4297	// thus doubling a character, give a bonus
4298	// to the score. Also for the soundfold
4299	// tree (might seem illogical but does
4300	// give better scores).
4301	MB_PTR_BACK(tword, p);
4302	if (c == utf_ptr2char(p)) {
4303	sp->ts_score -= SCORE_INS - SCORE_INSDUP;
4304	}
4305	}
4306	}
4307
4308	// Starting a new char, reset the length.
4309	sp->ts_tcharlen = `0`;
4310	}
4311	} else {
4312	// If we found a similar char adjust the score.
4313	// We do this after calling go_deeper() because
4314	// it's slow.
4315	if (newscore != `0`
4316	&& !soundfold
4317	&& slang->sl_has_map
4318	&& similar_chars(slang,
4319	c, fword[sp->ts_fidx - `1`]))
4320	sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
4321	}
4322	}
4323	}
4324	break;
4325
4326	case STATE_DEL:
4327	// When past the first byte of a multi-byte char don't try
4328	// delete/insert/swap a character.
4329	if (has_mbyte && sp->ts_tcharlen > `0`) {
4330	PROF_STORE(sp->ts_state)
4331	sp->ts_state = STATE_FINAL;
4332	break;
4333	}
4334	// Try skipping one character in the bad word (delete it).
4335	PROF_STORE(sp->ts_state)
4336	sp->ts_state = STATE_INS_PREP;
4337	sp->ts_curi = `1`;
4338	if (soundfold && sp->ts_fidx == `0` && fword[sp->ts_fidx] == `'*'`)
4339	// Deleting a vowel at the start of a word counts less, see
4340	// soundalike_score().
4341	newscore = `2` * SCORE_DEL / `3`;
4342	else
4343	newscore = SCORE_DEL;
4344	if (fword[sp->ts_fidx] != NUL
4345	&& TRY_DEEPER(su, stack, depth, newscore)) {
4346	go_deeper(stack, depth, newscore);
4347	#ifdef DEBUG_TRIEWALK
4348	sprintf(changename[depth], "%.*s-%s: delete %c",
4349	sp->ts_twordlen, tword, fword + sp->ts_fidx,
4350	fword[sp->ts_fidx]);
4351	#endif
4352	++depth;
4353
4354	// Remember what character we deleted, so that we can avoid
4355	// inserting it again.
4356	stack[depth].ts_flags \|= TSF_DIDDEL;
4357	stack[depth].ts_delidx = sp->ts_fidx;
4358
4359	// Advance over the character in fword[]. Give a bonus to the
4360	// score if the same character is following "nn" -> "n". It's
4361	// a bit illogical for soundfold tree but it does give better
4362	// results.
4363	c = utf_ptr2char(fword + sp->ts_fidx);
4364	stack[depth].ts_fidx += MB_PTR2LEN(fword + sp->ts_fidx);
4365	if (utf_iscomposing(c)) {
4366	stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP;
4367	} else if (c == utf_ptr2char(fword + stack[depth].ts_fidx)) {
4368	stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
4369	}
4370
4371	break;
4372	}
4373	FALLTHROUGH;
4374
4375	case STATE_INS_PREP:
4376	if (sp->ts_flags & TSF_DIDDEL) {
4377	// If we just deleted a byte then inserting won't make sense,
4378	// a substitute is always cheaper.
4379	PROF_STORE(sp->ts_state)
4380	sp->ts_state = STATE_SWAP;
4381	break;
4382	}
4383
4384	// skip over NUL bytes
4385	n = sp->ts_arridx;
4386	for (;; ) {
4387	if (sp->ts_curi > byts[n]) {
4388	// Only NUL bytes at this node, go to next state.
4389	PROF_STORE(sp->ts_state)
4390	sp->ts_state = STATE_SWAP;
4391	break;
4392	}
4393	if (byts[n + sp->ts_curi] != NUL) {
4394	// Found a byte to insert.
4395	PROF_STORE(sp->ts_state)
4396	sp->ts_state = STATE_INS;
4397	break;
4398	}
4399	++sp->ts_curi;
4400	}
4401	break;
4402
4403	FALLTHROUGH;
4404
4405	case STATE_INS:
4406	// Insert one byte. Repeat this for each possible byte at this
4407	// node.
4408	n = sp->ts_arridx;
4409	if (sp->ts_curi > byts[n]) {
4410	// Done all bytes at this node, go to next state.
4411	PROF_STORE(sp->ts_state)
4412	sp->ts_state = STATE_SWAP;
4413	break;
4414	}
4415
4416	// Do one more byte at this node, but:
4417	// - Skip NUL bytes.
4418	// - Skip the byte if it's equal to the byte in the word,
4419	// accepting that byte is always better.
4420	n += sp->ts_curi++;
4421	c = byts[n];
4422	if (soundfold && sp->ts_twordlen == `0` && c == `'*'`)
4423	// Inserting a vowel at the start of a word counts less,
4424	// see soundalike_score().
4425	newscore = `2` * SCORE_INS / `3`;
4426	else
4427	newscore = SCORE_INS;
4428	if (c != fword[sp->ts_fidx]
4429	&& TRY_DEEPER(su, stack, depth, newscore)) {
4430	go_deeper(stack, depth, newscore);
4431	#ifdef DEBUG_TRIEWALK
4432	sprintf(changename[depth], "%.*s-%s: insert %c",
4433	sp->ts_twordlen, tword, fword + sp->ts_fidx,
4434	c);
4435	#endif
4436	++depth;
4437	sp = &stack[depth];
4438	tword[sp->ts_twordlen++] = c;
4439	sp->ts_arridx = idxs[n];
4440	if (has_mbyte) {
4441	fl = MB_BYTE2LEN(c);
4442	if (fl > `1`) {
4443	// There are following bytes for the same character.
4444	// We must find all bytes before trying
4445	// delete/insert/swap/etc.
4446	sp->ts_tcharlen = fl;
4447	sp->ts_tcharidx = `1`;
4448	sp->ts_isdiff = DIFF_INSERT;
4449	}
4450	} else
4451	fl = `1`;
4452	if (fl == `1`) {
4453	// If the previous character was the same, thus doubling a
4454	// character, give a bonus to the score. Also for
4455	// soundfold words (illogical but does give a better
4456	// score).
4457	if (sp->ts_twordlen >= `2`
4458	&& tword[sp->ts_twordlen - `2`] == c)
4459	sp->ts_score -= SCORE_INS - SCORE_INSDUP;
4460	}
4461	}
4462	break;
4463
4464	case STATE_SWAP:
4465	// Swap two bytes in the bad word: "12" -> "21".
4466	// We change "fword" here, it's changed back afterwards at
4467	// STATE_UNSWAP.
4468	p = fword + sp->ts_fidx;
4469	c = *p;
4470	if (c == NUL) {
4471	// End of word, can't swap or replace.
4472	PROF_STORE(sp->ts_state)
4473	sp->ts_state = STATE_FINAL;
4474	break;
4475	}
4476
4477	// Don't swap if the first character is not a word character.
4478	// SWAP3 etc. also don't make sense then.
4479	if (!soundfold && !spell_iswordp(p, curwin)) {
4480	PROF_STORE(sp->ts_state)
4481	sp->ts_state = STATE_REP_INI;
4482	break;
4483	}
4484
4485	n = MB_CPTR2LEN(p);
4486	c = utf_ptr2char(p);
4487	if (p[n] == NUL) {
4488	c2 = NUL;
4489	} else if (!soundfold && !spell_iswordp(p + n, curwin)) {
4490	c2 = c; // don't swap non-word char
4491	} else {
4492	c2 = utf_ptr2char(p + n);
4493	}
4494
4495	// When the second character is NUL we can't swap.
4496	if (c2 == NUL) {
4497	PROF_STORE(sp->ts_state)
4498	sp->ts_state = STATE_REP_INI;
4499	break;
4500	}
4501
4502	// When characters are identical, swap won't do anything.
4503	// Also get here if the second char is not a word character.
4504	if (c == c2) {
4505	PROF_STORE(sp->ts_state)
4506	sp->ts_state = STATE_SWAP3;
4507	break;
4508	}
4509	if (TRY_DEEPER(su, stack, depth, SCORE_SWAP)) {
4510	go_deeper(stack, depth, SCORE_SWAP);
4511	#ifdef DEBUG_TRIEWALK
4512	snprintf(changename[depth], sizeof(changename[`0`]),
4513	"%.*s-%s: swap %c and %c",
4514	sp->ts_twordlen, tword, fword + sp->ts_fidx,
4515	c, c2);
4516	#endif
4517	PROF_STORE(sp->ts_state)
4518	sp->ts_state = STATE_UNSWAP;
4519	depth++;
4520	fl = mb_char2len(c2);
4521	memmove(p, p + n, fl);
4522	utf_char2bytes(c, p + fl);
4523	stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
4524	} else {
4525	// If this swap doesn't work then SWAP3 won't either.
4526	PROF_STORE(sp->ts_state)
4527	sp->ts_state = STATE_REP_INI;
4528	}
4529	break;
4530
4531	case STATE_UNSWAP:
4532	// Undo the STATE_SWAP swap: "21" -> "12".
4533	p = fword + sp->ts_fidx;
4534	n = MB_PTR2LEN(p);
4535	c = utf_ptr2char(p + n);
4536	memmove(p + MB_PTR2LEN(p + n), p, n);
4537	utf_char2bytes(c, p);
4538
4539	FALLTHROUGH;
4540
4541	case STATE_SWAP3:
4542	// Swap two bytes, skipping one: "123" -> "321". We change
4543	// "fword" here, it's changed back afterwards at STATE_UNSWAP3.
4544	p = fword + sp->ts_fidx;
4545	n = MB_CPTR2LEN(p);
4546	c = utf_ptr2char(p);
4547	fl = MB_CPTR2LEN(p + n);
4548	c2 = utf_ptr2char(p + n);
4549	if (!soundfold && !spell_iswordp(p + n + fl, curwin)) {
4550	c3 = c; // don't swap non-word char
4551	} else {
4552	c3 = utf_ptr2char(p + n + fl);
4553	}
4554
4555	// When characters are identical: "121" then SWAP3 result is
4556	// identical, ROT3L result is same as SWAP: "211", ROT3L result is
4557	// same as SWAP on next char: "112". Thus skip all swapping.
4558	// Also skip when c3 is NUL.
4559	// Also get here when the third character is not a word character.
4560	// Second character may any char: "a.b" -> "b.a"
4561	if (c == c3 \|\| c3 == NUL) {
4562	PROF_STORE(sp->ts_state)
4563	sp->ts_state = STATE_REP_INI;
4564	break;
4565	}
4566	if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) {
4567	go_deeper(stack, depth, SCORE_SWAP3);
4568	#ifdef DEBUG_TRIEWALK
4569	sprintf(changename[depth], "%.*s-%s: swap3 %c and %c",
4570	sp->ts_twordlen, tword, fword + sp->ts_fidx,
4571	c, c3);
4572	#endif
4573	PROF_STORE(sp->ts_state)
4574	sp->ts_state = STATE_UNSWAP3;
4575	depth++;
4576	tl = mb_char2len(c3);
4577	memmove(p, p + n + fl, tl);
4578	utf_char2bytes(c2, p + tl);
4579	utf_char2bytes(c, p + fl + tl);
4580	stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
4581	} else {
4582	PROF_STORE(sp->ts_state)
4583	sp->ts_state = STATE_REP_INI;
4584	}
4585	break;
4586
4587	case STATE_UNSWAP3:
4588	// Undo STATE_SWAP3: "321" -> "123"
4589	p = fword + sp->ts_fidx;
4590	n = MB_PTR2LEN(p);
4591	c2 = utf_ptr2char(p + n);
4592	fl = MB_PTR2LEN(p + n);
4593	c = utf_ptr2char(p + n + fl);
4594	tl = MB_PTR2LEN(p + n + fl);
4595	memmove(p + fl + tl, p, n);
4596	utf_char2bytes(c, p);
4597	utf_char2bytes(c2, p + tl);
4598	p = p + tl;
4599
4600	if (!soundfold && !spell_iswordp(p, curwin)) {
4601	// Middle char is not a word char, skip the rotate. First and
4602	// third char were already checked at swap and swap3.
4603	PROF_STORE(sp->ts_state)
4604	sp->ts_state = STATE_REP_INI;
4605	break;
4606	}
4607
4608	// Rotate three characters left: "123" -> "231". We change
4609	// "fword" here, it's changed back afterwards at STATE_UNROT3L.
4610	if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) {
4611	go_deeper(stack, depth, SCORE_SWAP3);
4612	#ifdef DEBUG_TRIEWALK
4613	p = fword + sp->ts_fidx;
4614	sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c",
4615	sp->ts_twordlen, tword, fword + sp->ts_fidx,
4616	p[`0`], p[`1`], p[`2`]);
4617	#endif
4618	PROF_STORE(sp->ts_state)
4619	sp->ts_state = STATE_UNROT3L;
4620	++depth;
4621	p = fword + sp->ts_fidx;
4622	n = MB_CPTR2LEN(p);
4623	c = utf_ptr2char(p);
4624	fl = MB_CPTR2LEN(p + n);
4625	fl += MB_CPTR2LEN(p + n + fl);
4626	memmove(p, p + n, fl);
4627	utf_char2bytes(c, p + fl);
4628	stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
4629	} else {
4630	PROF_STORE(sp->ts_state)
4631	sp->ts_state = STATE_REP_INI;
4632	}
4633	break;
4634
4635	case STATE_UNROT3L:
4636	// Undo ROT3L: "231" -> "123"
4637	p = fword + sp->ts_fidx;
4638	n = MB_PTR2LEN(p);
4639	n += MB_PTR2LEN(p + n);
4640	c = utf_ptr2char(p + n);
4641	tl = MB_PTR2LEN(p + n);
4642	memmove(p + tl, p, n);
4643	utf_char2bytes(c, p);
4644
4645	// Rotate three bytes right: "123" -> "312". We change "fword"
4646	// here, it's changed back afterwards at STATE_UNROT3R.
4647	if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) {
4648	go_deeper(stack, depth, SCORE_SWAP3);
4649	#ifdef DEBUG_TRIEWALK
4650	p = fword + sp->ts_fidx;
4651	sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c",
4652	sp->ts_twordlen, tword, fword + sp->ts_fidx,
4653	p[`0`], p[`1`], p[`2`]);
4654	#endif
4655	PROF_STORE(sp->ts_state)
4656	sp->ts_state = STATE_UNROT3R;
4657	++depth;
4658	p = fword + sp->ts_fidx;
4659	n = MB_CPTR2LEN(p);
4660	n += MB_CPTR2LEN(p + n);
4661	c = utf_ptr2char(p + n);
4662	tl = MB_CPTR2LEN(p + n);
4663	memmove(p + tl, p, n);
4664	utf_char2bytes(c, p);
4665	stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
4666	} else {
4667	PROF_STORE(sp->ts_state)
4668	sp->ts_state = STATE_REP_INI;
4669	}
4670	break;
4671
4672	case STATE_UNROT3R:
4673	// Undo ROT3R: "312" -> "123"
4674	p = fword + sp->ts_fidx;
4675	c = utf_ptr2char(p);
4676	tl = MB_PTR2LEN(p);
4677	n = MB_PTR2LEN(p + tl);
4678	n += MB_PTR2LEN(p + tl + n);
4679	memmove(p, p + tl, n);
4680	utf_char2bytes(c, p + n);
4681
4682	FALLTHROUGH;
4683
4684	case STATE_REP_INI:
4685	// Check if matching with REP items from the .aff file would work.
4686	// Quickly skip if:
4687	// - there are no REP items and we are not in the soundfold trie
4688	// - the score is going to be too high anyway
4689	// - already applied a REP item or swapped here
4690	if ((lp->lp_replang == NULL && !soundfold)
4691	\|\| sp->ts_score + SCORE_REP >= su->su_maxscore
4692	\|\| sp->ts_fidx < sp->ts_fidxtry) {
4693	PROF_STORE(sp->ts_state)
4694	sp->ts_state = STATE_FINAL;
4695	break;
4696	}
4697
4698	// Use the first byte to quickly find the first entry that may
4699	// match. If the index is -1 there is none.
4700	if (soundfold)
4701	sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]];
4702	else
4703	sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]];
4704
4705	if (sp->ts_curi < `0`) {
4706	PROF_STORE(sp->ts_state)
4707	sp->ts_state = STATE_FINAL;
4708	break;
4709	}
4710
4711	PROF_STORE(sp->ts_state)
4712	sp->ts_state = STATE_REP;
4713	FALLTHROUGH;
4714
4715	case STATE_REP:
4716	// Try matching with REP items from the .aff file. For each match
4717	// replace the characters and check if the resulting word is
4718	// valid.
4719	p = fword + sp->ts_fidx;
4720
4721	if (soundfold)
4722	gap = &slang->sl_repsal;
4723	else
4724	gap = &lp->lp_replang->sl_rep;
4725	while (sp->ts_curi < gap->ga_len) {
4726	ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
4727	if (ftp->ft_from != p) {
4728	// past possible matching entries
4729	sp->ts_curi = gap->ga_len;
4730	break;
4731	}
4732	if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == `0`
4733	&& TRY_DEEPER(su, stack, depth, SCORE_REP)) {
4734	go_deeper(stack, depth, SCORE_REP);
4735	#ifdef DEBUG_TRIEWALK
4736	sprintf(changename[depth], "%.*s-%s: replace %s with %s",
4737	sp->ts_twordlen, tword, fword + sp->ts_fidx,
4738	ftp->ft_from, ftp->ft_to);
4739	#endif
4740	// Need to undo this afterwards.
4741	PROF_STORE(sp->ts_state)
4742	sp->ts_state = STATE_REP_UNDO;
4743
4744	// Change the "from" to the "to" string.
4745	++depth;
4746	fl = (int)STRLEN(ftp->ft_from);
4747	tl = (int)STRLEN(ftp->ft_to);
4748	if (fl != tl) {
4749	STRMOVE(p + tl, p + fl);
4750	repextra += tl - fl;
4751	}
4752	memmove(p, ftp->ft_to, tl);
4753	stack[depth].ts_fidxtry = sp->ts_fidx + tl;
4754	stack[depth].ts_tcharlen = `0`;
4755	break;
4756	}
4757	}
4758
4759	if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP)
4760	// No (more) matches.
4761	PROF_STORE(sp->ts_state)
4762	sp->ts_state = STATE_FINAL;
4763
4764	break;
4765
4766	case STATE_REP_UNDO:
4767	// Undo a REP replacement and continue with the next one.
4768	if (soundfold)
4769	gap = &slang->sl_repsal;
4770	else
4771	gap = &lp->lp_replang->sl_rep;
4772	ftp = (fromto_T *)gap->ga_data + sp->ts_curi - `1`;
4773	fl = (int)STRLEN(ftp->ft_from);
4774	tl = (int)STRLEN(ftp->ft_to);
4775	p = fword + sp->ts_fidx;
4776	if (fl != tl) {
4777	STRMOVE(p + fl, p + tl);
4778	repextra -= tl - fl;
4779	}
4780	memmove(p, ftp->ft_from, fl);
4781	PROF_STORE(sp->ts_state)
4782	sp->ts_state = STATE_REP;
4783	break;
4784
4785	default:
4786	// Did all possible states at this level, go up one level.
4787	--depth;
4788
4789	if (depth >= `0` && stack[depth].ts_prefixdepth == PFD_PREFIXTREE) {
4790	// Continue in or go back to the prefix tree.
4791	byts = pbyts;
4792	idxs = pidxs;
4793	}
4794
4795	// Don't check for CTRL-C too often, it takes time.
4796	if (--breakcheckcount == `0`) {
4797	os_breakcheck();
4798	breakcheckcount = `1000`;
4799	}
4800	}
4801	}
4802	}
4803
4804
4805	// Go one level deeper in the tree.
4806	static void go_deeper(trystate_T stack, int* depth, int score_add)
4807	{
4808	stack[depth + `1`] = stack[depth];
4809	stack[depth + `1`].ts_state = STATE_START;
4810	stack[depth + `1`].ts_score = stack[depth].ts_score + score_add;
4811	stack[depth + `1`].ts_curi = `1`; // start just after length byte
4812	stack[depth + `1`].ts_flags = `0`;
4813	}
4814
4815	// Case-folding may change the number of bytes: Count nr of chars in
4816	// fword[flen] and return the byte length of that many chars in "word".
4817	static int nofold_len(char_u fword, int* flen, char_u *word)
4818	{
4819	char_u *p;
4820	int i = `0`;
4821
4822	for (p = fword; p < fword + flen; MB_PTR_ADV(p)) {
4823	i++;
4824	}
4825	for (p = word; i > `0`; MB_PTR_ADV(p)) {
4826	i--;
4827	}
4828	return (int)(p - word);
4829	}
4830
4831	// "fword" is a good word with case folded. Find the matching keep-case
4832	// words and put it in "kword".
4833	// Theoretically there could be several keep-case words that result in the
4834	// same case-folded word, but we only find one...
4835	static void find_keepcap_word(slang_T slang, char_u fword, char_u *kword)
4836	{
4837	char_u uword[MAXWLEN]; // "fword" in upper-case
4838	int depth;
4839	idx_T tryidx;
4840
4841	// The following arrays are used at each depth in the tree.
4842	idx_T arridx[MAXWLEN];
4843	int round[MAXWLEN];
4844	int fwordidx[MAXWLEN];
4845	int uwordidx[MAXWLEN];
4846	int kwordlen[MAXWLEN];
4847
4848	int flen, ulen;
4849	int l;
4850	int len;
4851	int c;
4852	idx_T lo, hi, m;
4853	char_u *p;
4854	char_u byts = slang->sl_kbyts; // array with bytes of the words*
4855	idx_T idxs = slang->sl_kidxs; // array with indexes*
4856
4857	if (byts == NULL) {
4858	// array is empty: "cannot happen"
4859	*kword = NUL;
4860	return;
4861	}
4862
4863	// Make an all-cap version of "fword".
4864	allcap_copy(fword, uword);
4865
4866	// Each character needs to be tried both case-folded and upper-case.
4867	// All this gets very complicated if we keep in mind that changing case
4868	// may change the byte length of a multi-byte character...
4869	depth = `0`;
4870	arridx[`0`] = `0`;
4871	round[`0`] = `0`;
4872	fwordidx[`0`] = `0`;
4873	uwordidx[`0`] = `0`;
4874	kwordlen[`0`] = `0`;
4875	while (depth >= `0`) {
4876	if (fword[fwordidx[depth]] == NUL) {
4877	// We are at the end of "fword". If the tree allows a word to end
4878	// here we have found a match.
4879	if (byts[arridx[depth] + `1`] == `0`) {
4880	kword[kwordlen[depth]] = NUL;
4881	return;
4882	}
4883
4884	// kword is getting too long, continue one level up
4885	--depth;
4886	} else if (++round[depth] > `2`) {
4887	// tried both fold-case and upper-case character, continue one
4888	// level up
4889	--depth;
4890	} else {
4891	// round[depth] == 1: Try using the folded-case character.
4892	// round[depth] == 2: Try using the upper-case character.
4893	if (has_mbyte) {
4894	flen = MB_CPTR2LEN(fword + fwordidx[depth]);
4895	ulen = MB_CPTR2LEN(uword + uwordidx[depth]);
4896	} else {
4897	ulen = flen = `1`;
4898	}
4899	if (round[depth] == `1`) {
4900	p = fword + fwordidx[depth];
4901	l = flen;
4902	} else {
4903	p = uword + uwordidx[depth];
4904	l = ulen;
4905	}
4906
4907	for (tryidx = arridx[depth]; l > `0`; --l) {
4908	// Perform a binary search in the list of accepted bytes.
4909	len = byts[tryidx++];
4910	c = *p++;
4911	lo = tryidx;
4912	hi = tryidx + len - `1`;
4913	while (lo < hi) {
4914	m = (lo + hi) / `2`;
4915	if (byts[m] > c)
4916	hi = m - `1`;
4917	else if (byts[m] < c)
4918	lo = m + `1`;
4919	else {
4920	lo = hi = m;
4921	break;
4922	}
4923	}
4924
4925	// Stop if there is no matching byte.
4926	if (hi < lo \|\| byts[lo] != c)
4927	break;
4928
4929	// Continue at the child (if there is one).
4930	tryidx = idxs[lo];
4931	}
4932
4933	if (l == `0`) {
4934	// Found the matching char. Copy it to "kword" and go a
4935	// level deeper.
4936	if (round[depth] == `1`) {
4937	STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
4938	flen);
4939	kwordlen[depth + `1`] = kwordlen[depth] + flen;
4940	} else {
4941	STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
4942	ulen);
4943	kwordlen[depth + `1`] = kwordlen[depth] + ulen;
4944	}
4945	fwordidx[depth + `1`] = fwordidx[depth] + flen;
4946	uwordidx[depth + `1`] = uwordidx[depth] + ulen;
4947
4948	++depth;
4949	arridx[depth] = tryidx;
4950	round[depth] = `0`;
4951	}
4952	}
4953	}
4954
4955	// Didn't find it: "cannot happen".
4956	*kword = NUL;
4957	}
4958
4959	// Compute the sound-a-like score for suggestions in su->su_ga and add them to
4960	// su->su_sga.
4961	static void score_comp_sal(suginfo_T *su)
4962	{
4963	langp_T *lp;
4964	char_u badsound[MAXWLEN];
4965	int i;
4966	suggest_T *stp;
4967	suggest_T *sstp;
4968	int score;
4969
4970	ga_grow(&su->su_sga, su->su_ga.ga_len);
4971
4972	// Use the sound-folding of the first language that supports it.
4973	for (int lpi = `0`; lpi < curwin->w_s->b_langp.ga_len; ++lpi) {
4974	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
4975	if (!GA_EMPTY(&lp->lp_slang->sl_sal)) {
4976	// soundfold the bad word
4977	spell_soundfold(lp->lp_slang, su->su_fbadword, true, badsound);
4978
4979	for (i = `0`; i < su->su_ga.ga_len; ++i) {
4980	stp = &SUG(su->su_ga, i);
4981
4982	// Case-fold the suggested word, sound-fold it and compute the
4983	// sound-a-like score.
4984	score = stp_sal_score(stp, su, lp->lp_slang, badsound);
4985	if (score < SCORE_MAXMAX) {
4986	// Add the suggestion.
4987	sstp = &SUG(su->su_sga, su->su_sga.ga_len);
4988	sstp->st_word = vim_strsave(stp->st_word);
4989	sstp->st_wordlen = stp->st_wordlen;
4990	sstp->st_score = score;
4991	sstp->st_altscore = `0`;
4992	sstp->st_orglen = stp->st_orglen;
4993	++su->su_sga.ga_len;
4994	}
4995	}
4996	break;
4997	}
4998	}
4999	}
5000
5001	// Combine the list of suggestions in su->su_ga and su->su_sga.
5002	// They are entwined.
5003	static void score_combine(suginfo_T *su)
5004	{
5005	garray_T ga;
5006	garray_T *gap;
5007	langp_T *lp;
5008	suggest_T *stp;
5009	char_u *p;
5010	char_u badsound[MAXWLEN];
5011	int round;
5012	slang_T *slang = NULL;
5013
5014	// Add the alternate score to su_ga.
5015	for (int lpi = `0`; lpi < curwin->w_s->b_langp.ga_len; ++lpi) {
5016	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
5017	if (!GA_EMPTY(&lp->lp_slang->sl_sal)) {
5018	// soundfold the bad word
5019	slang = lp->lp_slang;
5020	spell_soundfold(slang, su->su_fbadword, true, badsound);
5021
5022	for (int i = `0`; i < su->su_ga.ga_len; ++i) {
5023	stp = &SUG(su->su_ga, i);
5024	stp->st_altscore = stp_sal_score(stp, su, slang, badsound);
5025	if (stp->st_altscore == SCORE_MAXMAX)
5026	stp->st_score = (stp->st_score * `3` + SCORE_BIG) / `4`;
5027	else
5028	stp->st_score = (stp->st_score * `3`
5029	+ stp->st_altscore) / `4`;
5030	stp->st_salscore = false;
5031	}
5032	break;
5033	}
5034	}
5035
5036	if (slang == NULL) { // Using "double" without sound folding.
5037	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore,
5038	su->su_maxcount);
5039	return;
5040	}
5041
5042	// Add the alternate score to su_sga.
5043	for (int i = `0`; i < su->su_sga.ga_len; ++i) {
5044	stp = &SUG(su->su_sga, i);
5045	stp->st_altscore = spell_edit_score(slang,
5046	su->su_badword, stp->st_word);
5047	if (stp->st_score == SCORE_MAXMAX)
5048	stp->st_score = (SCORE_BIG * `7` + stp->st_altscore) / `8`;
5049	else
5050	stp->st_score = (stp->st_score * `7` + stp->st_altscore) / `8`;
5051	stp->st_salscore = true;
5052	}
5053
5054	// Remove bad suggestions, sort the suggestions and truncate at "maxcount"
5055	// for both lists.
5056	check_suggestions(su, &su->su_ga);
5057	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
5058	check_suggestions(su, &su->su_sga);
5059	(void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
5060
5061	ga_init(&ga, (int)sizeof(suginfo_T), `1`);
5062	ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len);
5063
5064	stp = &SUG(ga, `0`);
5065	for (int i = `0`; i < su->su_ga.ga_len \|\| i < su->su_sga.ga_len; ++i) {
5066	// round 1: get a suggestion from su_ga
5067	// round 2: get a suggestion from su_sga
5068	for (round = `1`; round <= `2`; ++round) {
5069	gap = round == `1` ? &su->su_ga : &su->su_sga;
5070	if (i < gap->ga_len) {
5071	// Don't add a word if it's already there.
5072	p = SUG(*gap, i).st_word;
5073	int j;
5074	for (j = `0`; j < ga.ga_len; ++j)
5075	if (STRCMP(stp[j].st_word, p) == `0`)
5076	break;
5077	if (j == ga.ga_len)
5078	stp[ga.ga_len++] = SUG(*gap, i);
5079	else
5080	xfree(p);
5081	}
5082	}
5083	}
5084
5085	ga_clear(&su->su_ga);
5086	ga_clear(&su->su_sga);
5087
5088	// Truncate the list to the number of suggestions that will be displayed.
5089	if (ga.ga_len > su->su_maxcount) {
5090	for (int i = su->su_maxcount; i < ga.ga_len; ++i) {
5091	xfree(stp[i].st_word);
5092	}
5093	ga.ga_len = su->su_maxcount;
5094	}
5095
5096	su->su_ga = ga;
5097	}
5098
5099	// For the goodword in "stp" compute the soundalike score compared to the
5100	// badword.
5101	static int
5102	stp_sal_score (
5103	suggest_T *stp,
5104	suginfo_T *su,
5105	slang_T *slang,
5106	char_u badsound // sound-folded badword*
5107	)
5108	{
5109	char_u *p;
5110	char_u *pbad;
5111	char_u *pgood;
5112	char_u badsound2[MAXWLEN];
5113	char_u fword[MAXWLEN];
5114	char_u goodsound[MAXWLEN];
5115	char_u goodword[MAXWLEN];
5116	int lendiff;
5117
5118	lendiff = su->su_badlen - stp->st_orglen;
5119	if (lendiff >= `0`)
5120	pbad = badsound;
5121	else {
5122	// soundfold the bad word with more characters following
5123	(void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
5124
5125	// When joining two words the sound often changes a lot. E.g., "t he"
5126	// sounds like "t h" while "the" sounds like "@". Avoid that by
5127	// removing the space. Don't do it when the good word also contains a
5128	// space.
5129	if (ascii_iswhite(su->su_badptr[su->su_badlen])
5130	&& *skiptowhite(stp->st_word) == NUL)
5131	for (p = fword; *(p = skiptowhite(p)) != NUL; )
5132	STRMOVE(p, p + `1`);
5133
5134	spell_soundfold(slang, fword, true, badsound2);
5135	pbad = badsound2;
5136	}
5137
5138	if (lendiff > `0` && stp->st_wordlen + lendiff < MAXWLEN) {
5139	// Add part of the bad word to the good word, so that we soundfold
5140	// what replaces the bad word.
5141	STRCPY(goodword, stp->st_word);
5142	STRLCPY(goodword + stp->st_wordlen,
5143	su->su_badptr + su->su_badlen - lendiff, lendiff + `1`);
5144	pgood = goodword;
5145	} else
5146	pgood = stp->st_word;
5147
5148	// Sound-fold the word and compute the score for the difference.
5149	spell_soundfold(slang, pgood, false, goodsound);
5150
5151	return soundalike_score(goodsound, pbad);
5152	}
5153
5154	static sftword_T dumsft;
5155	#define HIKEY2SFT(p) ((sftword_T )(p - (dumsft.sft_word - (char_u )&dumsft)))
5156	#define HI2SFT(hi) HIKEY2SFT((hi)->hi_key)
5157
5158	// Prepare for calling suggest_try_soundalike().
5159	static void suggest_try_soundalike_prep(void)
5160	{
5161	langp_T *lp;
5162	slang_T *slang;
5163
5164	// Do this for all languages that support sound folding and for which a
5165	// .sug file has been loaded.
5166	for (int lpi = `0`; lpi < curwin->w_s->b_langp.ga_len; ++lpi) {
5167	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
5168	slang = lp->lp_slang;
5169	if (!GA_EMPTY(&slang->sl_sal) && slang->sl_sbyts != NULL)
5170	// prepare the hashtable used by add_sound_suggest()
5171	hash_init(&slang->sl_sounddone);
5172	}
5173	}
5174
5175	// Find suggestions by comparing the word in a sound-a-like form.
5176	// Note: This doesn't support postponed prefixes.
5177	static void suggest_try_soundalike(suginfo_T *su)
5178	{
5179	char_u salword[MAXWLEN];
5180	langp_T *lp;
5181	slang_T *slang;
5182
5183	// Do this for all languages that support sound folding and for which a
5184	// .sug file has been loaded.
5185	for (int lpi = `0`; lpi < curwin->w_s->b_langp.ga_len; ++lpi) {
5186	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
5187	slang = lp->lp_slang;
5188	if (!GA_EMPTY(&slang->sl_sal) && slang->sl_sbyts != NULL) {
5189	// soundfold the bad word
5190	spell_soundfold(slang, su->su_fbadword, true, salword);
5191
5192	// try all kinds of inserts/deletes/swaps/etc.
5193	// TODO: also soundfold the next words, so that we can try joining
5194	// and splitting
5195	#ifdef SUGGEST_PROFILE
5196	prof_init();
5197	#endif
5198	suggest_trie_walk(su, lp, salword, true);
5199	#ifdef SUGGEST_PROFILE
5200	prof_report("soundalike");
5201	#endif
5202	}
5203	}
5204	}
5205
5206	// Finish up after calling suggest_try_soundalike().
5207	static void suggest_try_soundalike_finish(void)
5208	{
5209	langp_T *lp;
5210	slang_T *slang;
5211	int todo;
5212	hashitem_T *hi;
5213
5214	// Do this for all languages that support sound folding and for which a
5215	// .sug file has been loaded.
5216	for (int lpi = `0`; lpi < curwin->w_s->b_langp.ga_len; ++lpi) {
5217	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
5218	slang = lp->lp_slang;
5219	if (!GA_EMPTY(&slang->sl_sal) && slang->sl_sbyts != NULL) {
5220	// Free the info about handled words.
5221	todo = (int)slang->sl_sounddone.ht_used;
5222	for (hi = slang->sl_sounddone.ht_array; todo > `0`; ++hi)
5223	if (!HASHITEM_EMPTY(hi)) {
5224	xfree(HI2SFT(hi));
5225	--todo;
5226	}
5227
5228	// Clear the hashtable, it may also be used by another region.
5229	hash_clear(&slang->sl_sounddone);
5230	hash_init(&slang->sl_sounddone);
5231	}
5232	}
5233	}
5234
5235	// A match with a soundfolded word is found. Add the good word(s) that
5236	// produce this soundfolded word.
5237	static void
5238	add_sound_suggest (
5239	suginfo_T *su,
5240	char_u *goodword,
5241	int score, // soundfold score
5242	langp_T *lp
5243	)
5244	{
5245	slang_T slang = lp->lp_slang; // language for sound folding*
5246	int sfwordnr;
5247	char_u *nrline;
5248	int orgnr;
5249	char_u theword[MAXWLEN];
5250	int i;
5251	int wlen;
5252	char_u *byts;
5253	idx_T *idxs;
5254	int n;
5255	int wordcount;
5256	int wc;
5257	int goodscore;
5258	hash_T hash;
5259	hashitem_T *hi;
5260	sftword_T *sft;
5261	int bc, gc;
5262	int limit;
5263
5264	// It's very well possible that the same soundfold word is found several
5265	// times with different scores. Since the following is quite slow only do
5266	// the words that have a better score than before. Use a hashtable to
5267	// remember the words that have been done.
5268	hash = hash_hash(goodword);
5269	const size_t goodword_len = STRLEN(goodword);
5270	hi = hash_lookup(&slang->sl_sounddone, (const char *)goodword, goodword_len,
5271	hash);
5272	if (HASHITEM_EMPTY(hi)) {
5273	sft = xmalloc(sizeof(sftword_T) + goodword_len);
5274	sft->sft_score = score;
5275	memcpy(sft->sft_word, goodword, goodword_len + `1`);
5276	hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash);
5277	} else {
5278	sft = HI2SFT(hi);
5279	if (score >= sft->sft_score)
5280	return;
5281	sft->sft_score = score;
5282	}
5283
5284	// Find the word nr in the soundfold tree.
5285	sfwordnr = soundfold_find(slang, goodword);
5286	if (sfwordnr < `0`) {
5287	internal_error("add_sound_suggest()");
5288	return;
5289	}
5290
5291	// Go over the list of good words that produce this soundfold word
5292	nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)sfwordnr + `1`, false);
5293	orgnr = `0`;
5294	while (*nrline != NUL) {
5295	// The wordnr was stored in a minimal nr of bytes as an offset to the
5296	// previous wordnr.
5297	orgnr += bytes2offset(&nrline);
5298
5299	byts = slang->sl_fbyts;
5300	idxs = slang->sl_fidxs;
5301
5302	// Lookup the word "orgnr" one of the two tries.
5303	n = `0`;
5304	wordcount = `0`;
5305	for (wlen = `0`; wlen < MAXWLEN - `3`; ++wlen) {
5306	i = `1`;
5307	if (wordcount == orgnr && byts[n + `1`] == NUL)
5308	break; // found end of word
5309
5310	if (byts[n + `1`] == NUL)
5311	++wordcount;
5312
5313	// skip over the NUL bytes
5314	for (; byts[n + i] == NUL; ++i)
5315	if (i > byts[n]) { // safety check
5316	STRCPY(theword + wlen, "BAD");
5317	wlen += `3`;
5318	goto badword;
5319	}
5320
5321	// One of the siblings must have the word.
5322	for (; i < byts[n]; ++i) {
5323	wc = idxs[idxs[n + i]]; // nr of words under this byte
5324	if (wordcount + wc > orgnr)
5325	break;
5326	wordcount += wc;
5327	}
5328
5329	theword[wlen] = byts[n + i];
5330	n = idxs[n + i];
5331	}
5332	badword:
5333	theword[wlen] = NUL;
5334
5335	// Go over the possible flags and regions.
5336	for (; i <= byts[n] && byts[n + i] == NUL; ++i) {
5337	char_u cword[MAXWLEN];
5338	char_u *p;
5339	int flags = (int)idxs[n + i];
5340
5341	// Skip words with the NOSUGGEST flag
5342	if (flags & WF_NOSUGGEST)
5343	continue;
5344
5345	if (flags & WF_KEEPCAP) {
5346	// Must find the word in the keep-case tree.
5347	find_keepcap_word(slang, theword, cword);
5348	p = cword;
5349	} else {
5350	flags \|= su->su_badflags;
5351	if ((flags & WF_CAPMASK) != `0`) {
5352	// Need to fix case according to "flags".
5353	make_case_word(theword, cword, flags);
5354	p = cword;
5355	} else
5356	p = theword;
5357	}
5358
5359	// Add the suggestion.
5360	if (sps_flags & SPS_DOUBLE) {
5361	// Add the suggestion if the score isn't too bad.
5362	if (score <= su->su_maxscore)
5363	add_suggestion(su, &su->su_sga, p, su->su_badlen,
5364	score, `0`, false, slang, false);
5365	} else {
5366	// Add a penalty for words in another region.
5367	if ((flags & WF_REGION)
5368	&& (((unsigned)flags >> `16`) & lp->lp_region) == `0`)
5369	goodscore = SCORE_REGION;
5370	else
5371	goodscore = `0`;
5372
5373	// Add a small penalty for changing the first letter from
5374	// lower to upper case. Helps for "tath" -> "Kath", which is
5375	// less common than "tath" -> "path". Don't do it when the
5376	// letter is the same, that has already been counted.
5377	gc = PTR2CHAR(p);
5378	if (SPELL_ISUPPER(gc)) {
5379	bc = PTR2CHAR(su->su_badword);
5380	if (!SPELL_ISUPPER(bc)
5381	&& SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc))
5382	goodscore += SCORE_ICASE / `2`;
5383	}
5384
5385	// Compute the score for the good word. This only does letter
5386	// insert/delete/swap/replace. REP items are not considered,
5387	// which may make the score a bit higher.
5388	// Use a limit for the score to make it work faster. Use
5389	// MAXSCORE(), because RESCORE() will change the score.
5390	// If the limit is very high then the iterative method is
5391	// inefficient, using an array is quicker.
5392	limit = MAXSCORE(su->su_sfmaxscore - goodscore, score);
5393	if (limit > SCORE_LIMITMAX)
5394	goodscore += spell_edit_score(slang, su->su_badword, p);
5395	else
5396	goodscore += spell_edit_score_limit(slang, su->su_badword,
5397	p, limit);
5398
5399	// When going over the limit don't bother to do the rest.
5400	if (goodscore < SCORE_MAXMAX) {
5401	// Give a bonus to words seen before.
5402	goodscore = score_wordcount_adj(slang, goodscore, p, false);
5403
5404	// Add the suggestion if the score isn't too bad.
5405	goodscore = RESCORE(goodscore, score);
5406	if (goodscore <= su->su_sfmaxscore)
5407	add_suggestion(su, &su->su_ga, p, su->su_badlen,
5408	goodscore, score, true, slang, true);
5409	}
5410	}
5411	}
5412	}
5413	}
5414
5415	// Find word "word" in fold-case tree for "slang" and return the word number.
5416	static int soundfold_find(slang_T slang, char_u word)
5417	{
5418	idx_T arridx = `0`;
5419	int len;
5420	int wlen = `0`;
5421	int c;
5422	char_u *ptr = word;
5423	char_u *byts;
5424	idx_T *idxs;
5425	int wordnr = `0`;
5426
5427	byts = slang->sl_sbyts;
5428	idxs = slang->sl_sidxs;
5429
5430	for (;; ) {
5431	// First byte is the number of possible bytes.
5432	len = byts[arridx++];
5433
5434	// If the first possible byte is a zero the word could end here.
5435	// If the word ends we found the word. If not skip the NUL bytes.
5436	c = ptr[wlen];
5437	if (byts[arridx] == NUL) {
5438	if (c == NUL)
5439	break;
5440
5441	// Skip over the zeros, there can be several.
5442	while (len > `0` && byts[arridx] == NUL) {
5443	++arridx;
5444	--len;
5445	}
5446	if (len == `0`)
5447	return -`1`; // no children, word should have ended here
5448	++wordnr;
5449	}
5450
5451	// If the word ends we didn't find it.
5452	if (c == NUL)
5453	return -`1`;
5454
5455	// Perform a binary search in the list of accepted bytes.
5456	if (c == TAB) // <Tab> is handled like <Space>
5457	c = `' '`;
5458	while (byts[arridx] < c) {
5459	// The word count is in the first idxs[] entry of the child.
5460	wordnr += idxs[idxs[arridx]];
5461	++arridx;
5462	if (--len == `0`) // end of the bytes, didn't find it
5463	return -`1`;
5464	}
5465	if (byts[arridx] != c) // didn't find the byte
5466	return -`1`;
5467
5468	// Continue at the child (if there is one).
5469	arridx = idxs[arridx];
5470	++wlen;
5471
5472	// One space in the good word may stand for several spaces in the
5473	// checked word.
5474	if (c == `' '`)
5475	while (ptr[wlen] == `' '` \|\| ptr[wlen] == TAB)
5476	++wlen;
5477	}
5478
5479	return wordnr;
5480	}
5481
5482	// Copy "fword" to "cword", fixing case according to "flags".
5483	static void make_case_word(char_u fword, char_u cword, int flags)
5484	{
5485	if (flags & WF_ALLCAP)
5486	// Make it all upper-case
5487	allcap_copy(fword, cword);
5488	else if (flags & WF_ONECAP)
5489	// Make the first letter upper-case
5490	onecap_copy(fword, cword, true);
5491	else
5492	// Use goodword as-is.
5493	STRCPY(cword, fword);
5494	}
5495
5496	// Returns true if "c1" and "c2" are similar characters according to the MAP
5497	// lines in the .aff file.
5498	static bool similar_chars(slang_T slang, int* c1, int c2)
5499	{
5500	int m1, m2;
5501	char_u buf[MB_MAXBYTES + `1`];
5502	hashitem_T *hi;
5503
5504	if (c1 >= `256`) {
5505	buf[utf_char2bytes(c1, buf)] = `0`;
5506	hi = hash_find(&slang->sl_map_hash, buf);
5507	if (HASHITEM_EMPTY(hi)) {
5508	m1 = `0`;
5509	} else {
5510	m1 = utf_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + `1`);
5511	}
5512	} else {
5513	m1 = slang->sl_map_array[c1];
5514	}
5515	if (m1 == `0`) {
5516	return false;
5517	}
5518
5519	if (c2 >= `256`) {
5520	buf[utf_char2bytes(c2, buf)] = `0`;
5521	hi = hash_find(&slang->sl_map_hash, buf);
5522	if (HASHITEM_EMPTY(hi)) {
5523	m2 = `0`;
5524	} else {
5525	m2 = utf_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + `1`);
5526	}
5527	} else {
5528	m2 = slang->sl_map_array[c2];
5529	}
5530
5531	return m1 == m2;
5532	}
5533
5534	// Adds a suggestion to the list of suggestions.
5535	// For a suggestion that is already in the list the lowest score is remembered.
5536	static void
5537	add_suggestion (
5538	suginfo_T *su,
5539	garray_T gap, // either su_ga or su_sga*
5540	const char_u *goodword,
5541	int badlenarg, // len of bad word replaced with "goodword"
5542	int score,
5543	int altscore,
5544	bool had_bonus, // value for st_had_bonus
5545	slang_T slang, // language for sound folding*
5546	bool maxsf // su_maxscore applies to soundfold score,
5547	// su_sfmaxscore to the total score.
5548	)
5549	{
5550	int goodlen; // len of goodword changed
5551	int badlen; // len of bad word changed
5552	suggest_T *stp;
5553	suggest_T new_sug;
5554
5555	// Minimize "badlen" for consistency. Avoids that changing "the the" to
5556	// "thee the" is added next to changing the first "the" the "thee".
5557	const char_u *pgood = goodword + STRLEN(goodword);
5558	char_u *pbad = su->su_badptr + badlenarg;
5559	for (;; ) {
5560	goodlen = (int)(pgood - goodword);
5561	badlen = (int)(pbad - su->su_badptr);
5562	if (goodlen <= `0` \|\| badlen <= `0`)
5563	break;
5564	MB_PTR_BACK(goodword, pgood);
5565	MB_PTR_BACK(su->su_badptr, pbad);
5566	if (utf_ptr2char(pgood) != utf_ptr2char(pbad)) {
5567	break;
5568	}
5569	}
5570
5571	if (badlen == `0` && goodlen == `0`)
5572	// goodword doesn't change anything; may happen for "the the" changing
5573	// the first "the" to itself.
5574	return;
5575
5576	int i;
5577	if (GA_EMPTY(gap)) {
5578	i = -`1`;
5579	} else {
5580	// Check if the word is already there. Also check the length that is
5581	// being replaced "thes," -> "these" is a different suggestion from
5582	// "thes" -> "these".
5583	stp = &SUG(*gap, `0`);
5584	for (i = gap->ga_len; --i >= `0`; ++stp) {
5585	if (stp->st_wordlen == goodlen
5586	&& stp->st_orglen == badlen
5587	&& STRNCMP(stp->st_word, goodword, goodlen) == `0`) {
5588	// Found it. Remember the word with the lowest score.
5589	if (stp->st_slang == NULL)
5590	stp->st_slang = slang;
5591
5592	new_sug.st_score = score;
5593	new_sug.st_altscore = altscore;
5594	new_sug.st_had_bonus = had_bonus;
5595
5596	if (stp->st_had_bonus != had_bonus) {
5597	// Only one of the two had the soundalike score computed.
5598	// Need to do that for the other one now, otherwise the
5599	// scores can't be compared. This happens because
5600	// suggest_try_change() doesn't compute the soundalike
5601	// word to keep it fast, while some special methods set
5602	// the soundalike score to zero.
5603	if (had_bonus)
5604	rescore_one(su, stp);
5605	else {
5606	new_sug.st_word = stp->st_word;
5607	new_sug.st_wordlen = stp->st_wordlen;
5608	new_sug.st_slang = stp->st_slang;
5609	new_sug.st_orglen = badlen;
5610	rescore_one(su, &new_sug);
5611	}
5612	}
5613
5614	if (stp->st_score > new_sug.st_score) {
5615	stp->st_score = new_sug.st_score;
5616	stp->st_altscore = new_sug.st_altscore;
5617	stp->st_had_bonus = new_sug.st_had_bonus;
5618	}
5619	break;
5620	}
5621	}
5622	}
5623
5624	if (i < `0`) {
5625	// Add a suggestion.
5626	stp = GA_APPEND_VIA_PTR(suggest_T, gap);
5627	stp->st_word = vim_strnsave(goodword, goodlen);
5628	stp->st_wordlen = goodlen;
5629	stp->st_score = score;
5630	stp->st_altscore = altscore;
5631	stp->st_had_bonus = had_bonus;
5632	stp->st_orglen = badlen;
5633	stp->st_slang = slang;
5634
5635	// If we have too many suggestions now, sort the list and keep
5636	// the best suggestions.
5637	if (gap->ga_len > SUG_MAX_COUNT(su)) {
5638	if (maxsf)
5639	su->su_sfmaxscore = cleanup_suggestions(gap,
5640	su->su_sfmaxscore, SUG_CLEAN_COUNT(su));
5641	else
5642	su->su_maxscore = cleanup_suggestions(gap,
5643	su->su_maxscore, SUG_CLEAN_COUNT(su));
5644	}
5645	}
5646	}
5647
5648	// Suggestions may in fact be flagged as errors. Esp. for banned words and
5649	// for split words, such as "the the". Remove these from the list here.
5650	static void
5651	check_suggestions (
5652	suginfo_T *su,
5653	garray_T gap // either su_ga or su_sga*
5654	)
5655	{
5656	suggest_T *stp;
5657	char_u longword[MAXWLEN + `1`];
5658	int len;
5659	hlf_T attr;
5660
5661	stp = &SUG(*gap, `0`);
5662	for (int i = gap->ga_len - `1`; i >= `0`; --i) {
5663	// Need to append what follows to check for "the the".
5664	STRLCPY(longword, stp[i].st_word, MAXWLEN + `1`);
5665	len = stp[i].st_wordlen;
5666	STRLCPY(longword + len, su->su_badptr + stp[i].st_orglen,
5667	MAXWLEN - len + `1`);
5668	attr = HLF_COUNT;
5669	(void)spell_check(curwin, longword, &attr, NULL, false);
5670	if (attr != HLF_COUNT) {
5671	// Remove this entry.
5672	xfree(stp[i].st_word);
5673	--gap->ga_len;
5674	if (i < gap->ga_len)
5675	memmove(stp + i, stp + i + `1`,
5676	sizeof(suggest_T) * (gap->ga_len - i));
5677	}
5678	}
5679	}
5680
5681
5682	// Add a word to be banned.
5683	static void add_banned(suginfo_T su, char_u word)
5684	{
5685	char_u *s;
5686	hash_T hash;
5687	hashitem_T *hi;
5688
5689	hash = hash_hash(word);
5690	const size_t word_len = STRLEN(word);
5691	hi = hash_lookup(&su->su_banned, (const char *)word, word_len, hash);
5692	if (HASHITEM_EMPTY(hi)) {
5693	s = xmemdupz(word, word_len);
5694	hash_add_item(&su->su_banned, hi, s, hash);
5695	}
5696	}
5697
5698	// Recompute the score for all suggestions if sound-folding is possible. This
5699	// is slow, thus only done for the final results.
5700	static void rescore_suggestions(suginfo_T *su)
5701	{
5702	if (su->su_sallang != NULL) {
5703	for (int i = `0`; i < su->su_ga.ga_len; ++i) {
5704	rescore_one(su, &SUG(su->su_ga, i));
5705	}
5706	}
5707	}
5708
5709	// Recompute the score for one suggestion if sound-folding is possible.
5710	static void rescore_one(suginfo_T su, suggest_T stp)
5711	{
5712	slang_T *slang = stp->st_slang;
5713	char_u sal_badword[MAXWLEN];
5714	char_u *p;
5715
5716	// Only rescore suggestions that have no sal score yet and do have a
5717	// language.
5718	if (slang != NULL && !GA_EMPTY(&slang->sl_sal) && !stp->st_had_bonus) {
5719	if (slang == su->su_sallang)
5720	p = su->su_sal_badword;
5721	else {
5722	spell_soundfold(slang, su->su_fbadword, true, sal_badword);
5723	p = sal_badword;
5724	}
5725
5726	stp->st_altscore = stp_sal_score(stp, su, slang, p);
5727	if (stp->st_altscore == SCORE_MAXMAX)
5728	stp->st_altscore = SCORE_BIG;
5729	stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
5730	stp->st_had_bonus = true;
5731	}
5732	}
5733
5734
5735	// Function given to qsort() to sort the suggestions on st_score.
5736	// First on "st_score", then "st_altscore" then alphabetically.
5737	static int sug_compare(const void s1, const* void *s2)
5738	{
5739	suggest_T p1 = (suggest_T )s1;
5740	suggest_T p2 = (suggest_T )s2;
5741	int n = p1->st_score - p2->st_score;
5742
5743	if (n == `0`) {
5744	n = p1->st_altscore - p2->st_altscore;
5745	if (n == `0`)
5746	n = STRICMP(p1->st_word, p2->st_word);
5747	}
5748	return n;
5749	}
5750
5751	// Cleanup the suggestions:
5752	// - Sort on score.
5753	// - Remove words that won't be displayed.
5754	// Returns the maximum score in the list or "maxscore" unmodified.
5755	static int
5756	cleanup_suggestions (
5757	garray_T *gap,
5758	int maxscore,
5759	int keep // nr of suggestions to keep
5760	)
5761	{
5762	suggest_T stp = &SUG(gap, `0`);
5763
5764	// Sort the list.
5765	qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare);
5766
5767	// Truncate the list to the number of suggestions that will be displayed.
5768	if (gap->ga_len > keep) {
5769	for (int i = keep; i < gap->ga_len; ++i) {
5770	xfree(stp[i].st_word);
5771	}
5772	gap->ga_len = keep;
5773	return stp[keep - `1`].st_score;
5774	}
5775	return maxscore;
5776	}
5777
5778	/// Soundfold a string, for soundfold()
5779	///
5780	/// @param[in] word Word to soundfold.
5781	///
5782	/// @return [allocated] soundfolded string or NULL in case of error. May return
5783	/// copy of the input string if soundfolding is not
5784	/// supported by any of the languages in &spellang.
5785	char eval_soundfold(const* char *const word)
5786	FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
5787	{
5788	if (curwin->w_p_spell && *curwin->w_s->b_p_spl != NUL) {
5789	// Use the sound-folding of the first language that supports it.
5790	for (int lpi = `0`; lpi < curwin->w_s->b_langp.ga_len; lpi++) {
5791	langp_T *const lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
5792	if (!GA_EMPTY(&lp->lp_slang->sl_sal)) {
5793	// soundfold the word
5794	char_u sound[MAXWLEN];
5795	spell_soundfold(lp->lp_slang, (char_u *)word, false, sound);
5796	return xstrdup((const char *)sound);
5797	}
5798	}
5799	}
5800
5801	// No language with sound folding, return word as-is.
5802	return xstrdup(word);
5803	}
5804
5805	/// Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
5806	///
5807	/// There are many ways to turn a word into a sound-a-like representation. The
5808	/// oldest is Soundex (1918!). A nice overview can be found in "Approximate
5809	/// swedish name matching - survey and test of different algorithms" by Klas
5810	/// Erikson.
5811	///
5812	/// We support two methods:
5813	/// 1. SOFOFROM/SOFOTO do a simple character mapping.
5814	/// 2. SAL items define a more advanced sound-folding (and much slower).
5815	///
5816	/// @param[in] slang
5817	/// @param[in] inword word to soundfold
5818	/// @param[in] folded whether inword is already case-folded
5819	/// @param[in,out] res destination for soundfolded word
5820	void spell_soundfold(slang_T slang, char_u inword, bool folded, char_u *res)
5821	{
5822	char_u fword[MAXWLEN];
5823	char_u *word;
5824
5825	if (slang->sl_sofo)
5826	// SOFOFROM and SOFOTO used
5827	spell_soundfold_sofo(slang, inword, res);
5828	else {
5829	// SAL items used. Requires the word to be case-folded.
5830	if (folded)
5831	word = inword;
5832	else {
5833	(void)spell_casefold(inword, (int)STRLEN(inword), fword, MAXWLEN);
5834	word = fword;
5835	}
5836
5837	if (has_mbyte)
5838	spell_soundfold_wsal(slang, word, res);
5839	else
5840	spell_soundfold_sal(slang, word, res);
5841	}
5842	}
5843
5844	// Perform sound folding of "inword" into "res" according to SOFOFROM and
5845	// SOFOTO lines.
5846	static void spell_soundfold_sofo(slang_T slang, char_u inword, char_u *res)
5847	{
5848	char_u *s;
5849	int ri = `0`;
5850	int c;
5851
5852	if (has_mbyte) {
5853	int prevc = `0`;
5854	int *ip;
5855
5856	// The sl_sal_first[] table contains the translation for chars up to
5857	// 255, sl_sal the rest.
5858	for (s = inword; *s != NUL; ) {
5859	c = mb_cptr2char_adv((const char_u **)&s);
5860	if (enc_utf8 ? utf_class(c) == `0` : ascii_iswhite(c)) {
5861	c = `' '`;
5862	} else if (c < `256`) {
5863	c = slang->sl_sal_first[c];
5864	} else {
5865	ip = ((int **)slang->sl_sal.ga_data)[c & `0xff`];
5866	if (ip == NULL) // empty list, can't match
5867	c = NUL;
5868	else
5869	for (;; ) { // find "c" in the list
5870	if (ip == `0`) { // not found*
5871	c = NUL;
5872	break;
5873	}
5874	if (ip == c) { // match!*
5875	c = ip[`1`];
5876	break;
5877	}
5878	ip += `2`;
5879	}
5880	}
5881
5882	if (c != NUL && c != prevc) {
5883	ri += utf_char2bytes(c, res + ri);
5884	if (ri + MB_MAXBYTES > MAXWLEN) {
5885	break;
5886	}
5887	prevc = c;
5888	}
5889	}
5890	} else {
5891	// The sl_sal_first[] table contains the translation.
5892	for (s = inword; (c = *s) != NUL; ++s) {
5893	if (ascii_iswhite(c))
5894	c = `' '`;
5895	else
5896	c = slang->sl_sal_first[c];
5897	if (c != NUL && (ri == `0` \|\| res[ri - `1`] != c))
5898	res[ri++] = c;
5899	}
5900	}
5901
5902	res[ri] = NUL;
5903	}
5904
5905	static void spell_soundfold_sal(slang_T slang, char_u inword, char_u *res)
5906	{
5907	salitem_T *smp;
5908	char_u word[MAXWLEN];
5909	char_u *s = inword;
5910	char_u *t;
5911	char_u *pf;
5912	int i, j, z;
5913	int reslen;
5914	int n, k = `0`;
5915	int z0;
5916	int k0;
5917	int n0;
5918	int c;
5919	int pri;
5920	int p0 = -`333`;
5921	int c0;
5922
5923	// Remove accents, if wanted. We actually remove all non-word characters.
5924	// But keep white space. We need a copy, the word may be changed here.
5925	if (slang->sl_rem_accents) {
5926	t = word;
5927	while (*s != NUL) {
5928	if (ascii_iswhite(*s)) {
5929	*t++ = `' '`;
5930	s = skipwhite(s);
5931	} else {
5932	if (spell_iswordp_nmw(s, curwin))
5933	t++ = s;
5934	++s;
5935	}
5936	}
5937	*t = NUL;
5938	} else
5939	STRLCPY(word, s, MAXWLEN);
5940
5941	smp = (salitem_T *)slang->sl_sal.ga_data;
5942
5943	// This comes from Aspell phonet.cpp. Converted from C++ to C.
5944	// Changed to keep spaces.
5945	i = reslen = z = `0`;
5946	while ((c = word[i]) != NUL) {
5947	// Start with the first rule that has the character in the word.
5948	n = slang->sl_sal_first[c];
5949	z0 = `0`;
5950
5951	if (n >= `0`) {
5952	// check all rules for the same letter
5953	for (; (s = smp[n].sm_lead)[`0`] == c; ++n) {
5954	// Quickly skip entries that don't match the word. Most
5955	// entries are less then three chars, optimize for that.
5956	k = smp[n].sm_leadlen;
5957	if (k > `1`) {
5958	if (word[i + `1`] != s[`1`])
5959	continue;
5960	if (k > `2`) {
5961	for (j = `2`; j < k; ++j)
5962	if (word[i + j] != s[j])
5963	break;
5964	if (j < k)
5965	continue;
5966	}
5967	}
5968
5969	if ((pf = smp[n].sm_oneof) != NULL) {
5970	// Check for match with one of the chars in "sm_oneof".
5971	while (pf != NUL && pf != word[i + k])
5972	++pf;
5973	if (*pf == NUL)
5974	continue;
5975	++k;
5976	}
5977	s = smp[n].sm_rules;
5978	pri = `5`; // default priority
5979
5980	p0 = *s;
5981	k0 = k;
5982	while (*s == `'-'` && k > `1`) {
5983	k--;
5984	s++;
5985	}
5986	if (*s == `'<'`)
5987	s++;
5988	if (ascii_isdigit(*s)) {
5989	// determine priority
5990	pri = *s - `'0'`;
5991	s++;
5992	}
5993	if (s == `'^'` && (s + `1`) == `'^'`)
5994	s++;
5995
5996	if (*s == NUL
5997	\|\| (*s == `'^'`
5998	&& (i == `0` \|\| !(word[i - `1`] == `' '`
5999	\|\| spell_iswordp(word + i - `1`, curwin)))
6000	&& (*(s + `1`) != `'$'`
6001	\|\| (!spell_iswordp(word + i + k0, curwin))))
6002	\|\| (*s == `'$'` && i > `0`
6003	&& spell_iswordp(word + i - `1`, curwin)
6004	&& (!spell_iswordp(word + i + k0, curwin)))) {
6005	// search for followup rules, if:
6006	// followup and k > 1 and NO '-' in searchstring
6007	c0 = word[i + k - `1`];
6008	n0 = slang->sl_sal_first[c0];
6009
6010	if (slang->sl_followup && k > `1` && n0 >= `0`
6011	&& p0 != `'-'` && word[i + k] != NUL) {
6012	// test follow-up rule for "word[i + k]"
6013	for (; (s = smp[n0].sm_lead)[`0`] == c0; ++n0) {
6014	// Quickly skip entries that don't match the word.
6015	k0 = smp[n0].sm_leadlen;
6016	if (k0 > `1`) {
6017	if (word[i + k] != s[`1`])
6018	continue;
6019	if (k0 > `2`) {
6020	pf = word + i + k + `1`;
6021	for (j = `2`; j < k0; ++j)
6022	if (*pf++ != s[j])
6023	break;
6024	if (j < k0)
6025	continue;
6026	}
6027	}
6028	k0 += k - `1`;
6029
6030	if ((pf = smp[n0].sm_oneof) != NULL) {
6031	// Check for match with one of the chars in
6032	// "sm_oneof".
6033	while (pf != NUL && pf != word[i + k0])
6034	++pf;
6035	if (*pf == NUL)
6036	continue;
6037	++k0;
6038	}
6039
6040	p0 = `5`;
6041	s = smp[n0].sm_rules;
6042	while (*s == `'-'`) {
6043	// "k0" gets NOT reduced because
6044	// "if (k0 == k)"
6045	s++;
6046	}
6047	if (*s == `'<'`)
6048	s++;
6049	if (ascii_isdigit(*s)) {
6050	p0 = *s - `'0'`;
6051	s++;
6052	}
6053
6054	if (*s == NUL
6055	// s == '^' cuts*
6056	\|\| (*s == `'$'`
6057	&& !spell_iswordp(word + i + k0,
6058	curwin))) {
6059	if (k0 == k)
6060	// this is just a piece of the string
6061	continue;
6062
6063	if (p0 < pri)
6064	// priority too low
6065	continue;
6066	// rule fits; stop search
6067	break;
6068	}
6069	}
6070
6071	if (p0 >= pri && smp[n0].sm_lead[`0`] == c0)
6072	continue;
6073	}
6074
6075	// replace string
6076	s = smp[n].sm_to;
6077	if (s == NULL)
6078	s = (char_u *)"";
6079	pf = smp[n].sm_rules;
6080	p0 = (vim_strchr(pf, `'<'`) != NULL) ? `1` : `0`;
6081	if (p0 == `1` && z == `0`) {
6082	// rule with '<' is used
6083	if (reslen > `0` && *s != NUL && (res[reslen - `1`] == c
6084	\|\| res[reslen - `1`] == *s))
6085	reslen--;
6086	z0 = `1`;
6087	z = `1`;
6088	k0 = `0`;
6089	while (*s != NUL && word[i + k0] != NUL) {
6090	word[i + k0] = *s;
6091	k0++;
6092	s++;
6093	}
6094	if (k > k0)
6095	STRMOVE(word + i + k0, word + i + k);
6096
6097	// new "actual letter"
6098	c = word[i];
6099	} else {
6100	// no '<' rule used
6101	i += k - `1`;
6102	z = `0`;
6103	while (*s != NUL && s[`1`] != NUL && reslen < MAXWLEN) {
6104	if (reslen == `0` \|\| res[reslen - `1`] != *s)
6105	res[reslen++] = *s;
6106	s++;
6107	}
6108	// new "actual letter"
6109	c = *s;
6110	if (strstr((char *)pf, "^^") != NULL) {
6111	if (c != NUL)
6112	res[reslen++] = c;
6113	STRMOVE(word, word + i + `1`);
6114	i = `0`;
6115	z0 = `1`;
6116	}
6117	}
6118	break;
6119	}
6120	}
6121	} else if (ascii_iswhite(c)) {
6122	c = `' '`;
6123	k = `1`;
6124	}
6125
6126	if (z0 == `0`) {
6127	if (k && !p0 && reslen < MAXWLEN && c != NUL
6128	&& (!slang->sl_collapse \|\| reslen == `0`
6129	\|\| res[reslen - `1`] != c))
6130	// condense only double letters
6131	res[reslen++] = c;
6132
6133	i++;
6134	z = `0`;
6135	k = `0`;
6136	}
6137	}
6138
6139	res[reslen] = NUL;
6140	}
6141
6142	// Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
6143	// Multi-byte version of spell_soundfold().
6144	static void spell_soundfold_wsal(slang_T slang, char_u inword, char_u *res)
6145	{
6146	salitem_T smp = (salitem_T )slang->sl_sal.ga_data;
6147	int word[MAXWLEN];
6148	int wres[MAXWLEN];
6149	int l;
6150	int *ws;
6151	int *pf;
6152	int i, j, z;
6153	int reslen;
6154	int n, k = `0`;
6155	int z0;
6156	int k0;
6157	int n0;
6158	int c;
6159	int pri;
6160	int p0 = -`333`;
6161	int c0;
6162	bool did_white = false;
6163	int wordlen;
6164
6165
6166	// Convert the multi-byte string to a wide-character string.
6167	// Remove accents, if wanted. We actually remove all non-word characters.
6168	// But keep white space.
6169	wordlen = `0`;
6170	for (const char_u s = inword; s != NUL; ) {
6171	const char_u *t = s;
6172	c = mb_cptr2char_adv((const char_u **)&s);
6173	if (slang->sl_rem_accents) {
6174	if (enc_utf8 ? utf_class(c) == `0` : ascii_iswhite(c)) {
6175	if (did_white)
6176	continue;
6177	c = `' '`;
6178	did_white = true;
6179	} else {
6180	did_white = false;
6181	if (!spell_iswordp_nmw(t, curwin)) {
6182	continue;
6183	}
6184	}
6185	}
6186	word[wordlen++] = c;
6187	}
6188	word[wordlen] = NUL;
6189
6190	// This algorithm comes from Aspell phonet.cpp.
6191	// Converted from C++ to C. Added support for multi-byte chars.
6192	// Changed to keep spaces.
6193	i = reslen = z = `0`;
6194	while ((c = word[i]) != NUL) {
6195	// Start with the first rule that has the character in the word.
6196	n = slang->sl_sal_first[c & `0xff`];
6197	z0 = `0`;
6198
6199	if (n >= `0`) {
6200	// Check all rules for the same index byte.
6201	// If c is 0x300 need extra check for the end of the array, as
6202	// (c & 0xff) is NUL.
6203	for (; ((ws = smp[n].sm_lead_w)[`0`] & `0xff`) == (c & `0xff`)
6204	&& ws[`0`] != NUL; ++n) {
6205	// Quickly skip entries that don't match the word. Most
6206	// entries are less then three chars, optimize for that.
6207	if (c != ws[`0`])
6208	continue;
6209	k = smp[n].sm_leadlen;
6210	if (k > `1`) {
6211	if (word[i + `1`] != ws[`1`])
6212	continue;
6213	if (k > `2`) {
6214	for (j = `2`; j < k; ++j)
6215	if (word[i + j] != ws[j])
6216	break;
6217	if (j < k)
6218	continue;
6219	}
6220	}
6221
6222	if ((pf = smp[n].sm_oneof_w) != NULL) {
6223	// Check for match with one of the chars in "sm_oneof".
6224	while (pf != NUL && pf != word[i + k])
6225	++pf;
6226	if (*pf == NUL)
6227	continue;
6228	++k;
6229	}
6230	char_u *s = smp[n].sm_rules;
6231	pri = `5`; // default priority
6232
6233	p0 = *s;
6234	k0 = k;
6235	while (*s == `'-'` && k > `1`) {
6236	k--;
6237	s++;
6238	}
6239	if (*s == `'<'`)
6240	s++;
6241	if (ascii_isdigit(*s)) {
6242	// determine priority
6243	pri = *s - `'0'`;
6244	s++;
6245	}
6246	if (s == `'^'` && (s + `1`) == `'^'`)
6247	s++;
6248
6249	if (*s == NUL
6250	\|\| (*s == `'^'`
6251	&& (i == `0` \|\| !(word[i - `1`] == `' '`
6252	\|\| spell_iswordp_w(word + i - `1`, curwin)))
6253	&& (*(s + `1`) != `'$'`
6254	\|\| (!spell_iswordp_w(word + i + k0, curwin))))
6255	\|\| (*s == `'$'` && i > `0`
6256	&& spell_iswordp_w(word + i - `1`, curwin)
6257	&& (!spell_iswordp_w(word + i + k0, curwin)))) {
6258	// search for followup rules, if:
6259	// followup and k > 1 and NO '-' in searchstring
6260	c0 = word[i + k - `1`];
6261	n0 = slang->sl_sal_first[c0 & `0xff`];
6262
6263	if (slang->sl_followup && k > `1` && n0 >= `0`
6264	&& p0 != `'-'` && word[i + k] != NUL) {
6265	// Test follow-up rule for "word[i + k]"; loop over
6266	// all entries with the same index byte.
6267	for (; ((ws = smp[n0].sm_lead_w)[`0`] & `0xff`)
6268	== (c0 & `0xff`); ++n0) {
6269	// Quickly skip entries that don't match the word.
6270	if (c0 != ws[`0`])
6271	continue;
6272	k0 = smp[n0].sm_leadlen;
6273	if (k0 > `1`) {
6274	if (word[i + k] != ws[`1`])
6275	continue;
6276	if (k0 > `2`) {
6277	pf = word + i + k + `1`;
6278	for (j = `2`; j < k0; ++j)
6279	if (*pf++ != ws[j])
6280	break;
6281	if (j < k0)
6282	continue;
6283	}
6284	}
6285	k0 += k - `1`;
6286
6287	if ((pf = smp[n0].sm_oneof_w) != NULL) {
6288	// Check for match with one of the chars in
6289	// "sm_oneof".
6290	while (pf != NUL && pf != word[i + k0])
6291	++pf;
6292	if (*pf == NUL)
6293	continue;
6294	++k0;
6295	}
6296
6297	p0 = `5`;
6298	s = smp[n0].sm_rules;
6299	while (*s == `'-'`) {
6300	// "k0" gets NOT reduced because
6301	// "if (k0 == k)"
6302	s++;
6303	}
6304	if (*s == `'<'`)
6305	s++;
6306	if (ascii_isdigit(*s)) {
6307	p0 = *s - `'0'`;
6308	s++;
6309	}
6310
6311	if (*s == NUL
6312	// s == '^' cuts*
6313	\|\| (*s == `'$'`
6314	&& !spell_iswordp_w(word + i + k0,
6315	curwin))) {
6316	if (k0 == k)
6317	// this is just a piece of the string
6318	continue;
6319
6320	if (p0 < pri)
6321	// priority too low
6322	continue;
6323	// rule fits; stop search
6324	break;
6325	}
6326	}
6327
6328	if (p0 >= pri && (smp[n0].sm_lead_w[`0`] & `0xff`)
6329	== (c0 & `0xff`))
6330	continue;
6331	}
6332
6333	// replace string
6334	ws = smp[n].sm_to_w;
6335	s = smp[n].sm_rules;
6336	p0 = (vim_strchr(s, `'<'`) != NULL) ? `1` : `0`;
6337	if (p0 == `1` && z == `0`) {
6338	// rule with '<' is used
6339	if (reslen > `0` && ws != NULL && *ws != NUL
6340	&& (wres[reslen - `1`] == c
6341	\|\| wres[reslen - `1`] == *ws))
6342	reslen--;
6343	z0 = `1`;
6344	z = `1`;
6345	k0 = `0`;
6346	if (ws != NULL)
6347	while (*ws != NUL && word[i + k0] != NUL) {
6348	word[i + k0] = *ws;
6349	k0++;
6350	ws++;
6351	}
6352	if (k > k0)
6353	memmove(word + i + k0, word + i + k,
6354	sizeof(int) * (wordlen - (i + k) + `1`));
6355
6356	// new "actual letter"
6357	c = word[i];
6358	} else {
6359	// no '<' rule used
6360	i += k - `1`;
6361	z = `0`;
6362	if (ws != NULL)
6363	while (*ws != NUL && ws[`1`] != NUL
6364	&& reslen < MAXWLEN) {
6365	if (reslen == `0` \|\| wres[reslen - `1`] != *ws)
6366	wres[reslen++] = *ws;
6367	ws++;
6368	}
6369	// new "actual letter"
6370	if (ws == NULL)
6371	c = NUL;
6372	else
6373	c = *ws;
6374	if (strstr((char *)s, "^^") != NULL) {
6375	if (c != NUL)
6376	wres[reslen++] = c;
6377	memmove(word, word + i + `1`,
6378	sizeof(int) * (wordlen - (i + `1`) + `1`));
6379	i = `0`;
6380	z0 = `1`;
6381	}
6382	}
6383	break;
6384	}
6385	}
6386	} else if (ascii_iswhite(c)) {
6387	c = `' '`;
6388	k = `1`;
6389	}
6390
6391	if (z0 == `0`) {
6392	if (k && !p0 && reslen < MAXWLEN && c != NUL
6393	&& (!slang->sl_collapse \|\| reslen == `0`
6394	\|\| wres[reslen - `1`] != c))
6395	// condense only double letters
6396	wres[reslen++] = c;
6397
6398	i++;
6399	z = `0`;
6400	k = `0`;
6401	}
6402	}
6403
6404	// Convert wide characters in "wres" to a multi-byte string in "res".
6405	l = `0`;
6406	for (n = `0`; n < reslen; n++) {
6407	l += utf_char2bytes(wres[n], res + l);
6408	if (l + MB_MAXBYTES > MAXWLEN) {
6409	break;
6410	}
6411	}
6412	res[l] = NUL;
6413	}
6414
6415	// Compute a score for two sound-a-like words.
6416	// This permits up to two inserts/deletes/swaps/etc. to keep things fast.
6417	// Instead of a generic loop we write out the code. That keeps it fast by
6418	// avoiding checks that will not be possible.
6419	static int
6420	soundalike_score (
6421	char_u goodstart, // sound-folded good word*
6422	char_u badstart // sound-folded bad word*
6423	)
6424	{
6425	char_u *goodsound = goodstart;
6426	char_u *badsound = badstart;
6427	int goodlen;
6428	int badlen;
6429	int n;
6430	char_u pl, ps;
6431	char_u pl2, ps2;
6432	int score = `0`;
6433
6434	// Adding/inserting "" at the start (word starts with vowel) shouldn't be*
6435	// counted so much, vowels in the middle of the word aren't counted at all.
6436	if ((badsound == `''` \|\| goodsound == `''`) && badsound != goodsound) {
6437	if ((badsound[`0`] == NUL && goodsound[`1`] == NUL)
6438	\|\| (goodsound[`0`] == NUL && badsound[`1`] == NUL))
6439	// changing word with vowel to word without a sound
6440	return SCORE_DEL;
6441	if (badsound[`0`] == NUL \|\| goodsound[`0`] == NUL)
6442	// more than two changes
6443	return SCORE_MAXMAX;
6444
6445	if (badsound[`1`] == goodsound[`1`]
6446	\|\| (badsound[`1`] != NUL
6447	&& goodsound[`1`] != NUL
6448	&& badsound[`2`] == goodsound[`2`])) {
6449	// handle like a substitute
6450	} else {
6451	score = `2` * SCORE_DEL / `3`;
6452	if (badsound == `''`)
6453	++badsound;
6454	else
6455	++goodsound;
6456	}
6457	}
6458
6459	goodlen = (int)STRLEN(goodsound);
6460	badlen = (int)STRLEN(badsound);
6461
6462	// Return quickly if the lengths are too different to be fixed by two
6463	// changes.
6464	n = goodlen - badlen;
6465	if (n < -`2` \|\| n > `2`)
6466	return SCORE_MAXMAX;
6467
6468	if (n > `0`) {
6469	pl = goodsound; // goodsound is longest
6470	ps = badsound;
6471	} else {
6472	pl = badsound; // badsound is longest
6473	ps = goodsound;
6474	}
6475
6476	// Skip over the identical part.
6477	while (pl == ps && *pl != NUL) {
6478	++pl;
6479	++ps;
6480	}
6481
6482	switch (n) {
6483	case -`2`:
6484	case `2`:
6485	// Must delete two characters from "pl".
6486	++pl; // first delete
6487	while (pl == ps) {
6488	++pl;
6489	++ps;
6490	}
6491	// strings must be equal after second delete
6492	if (STRCMP(pl + `1`, ps) == `0`)
6493	return score + SCORE_DEL * `2`;
6494
6495	// Failed to compare.
6496	break;
6497
6498	case -`1`:
6499	case `1`:
6500	// Minimal one delete from "pl" required.
6501
6502	// 1: delete
6503	pl2 = pl + `1`;
6504	ps2 = ps;
6505	while (pl2 == ps2) {
6506	if (pl2 == NUL) // reached the end*
6507	return score + SCORE_DEL;
6508	++pl2;
6509	++ps2;
6510	}
6511
6512	// 2: delete then swap, then rest must be equal
6513	if (pl2[`0`] == ps2[`1`] && pl2[`1`] == ps2[`0`]
6514	&& STRCMP(pl2 + `2`, ps2 + `2`) == `0`)
6515	return score + SCORE_DEL + SCORE_SWAP;
6516
6517	// 3: delete then substitute, then the rest must be equal
6518	if (STRCMP(pl2 + `1`, ps2 + `1`) == `0`)
6519	return score + SCORE_DEL + SCORE_SUBST;
6520
6521	// 4: first swap then delete
6522	if (pl[`0`] == ps[`1`] && pl[`1`] == ps[`0`]) {
6523	pl2 = pl + `2`; // swap, skip two chars
6524	ps2 = ps + `2`;
6525	while (pl2 == ps2) {
6526	++pl2;
6527	++ps2;
6528	}
6529	// delete a char and then strings must be equal
6530	if (STRCMP(pl2 + `1`, ps2) == `0`)
6531	return score + SCORE_SWAP + SCORE_DEL;
6532	}
6533
6534	// 5: first substitute then delete
6535	pl2 = pl + `1`; // substitute, skip one char
6536	ps2 = ps + `1`;
6537	while (pl2 == ps2) {
6538	++pl2;
6539	++ps2;
6540	}
6541	// delete a char and then strings must be equal
6542	if (STRCMP(pl2 + `1`, ps2) == `0`)
6543	return score + SCORE_SUBST + SCORE_DEL;
6544
6545	// Failed to compare.
6546	break;
6547
6548	case `0`:
6549	// Lengths are equal, thus changes must result in same length: An
6550	// insert is only possible in combination with a delete.
6551	// 1: check if for identical strings
6552	if (*pl == NUL)
6553	return score;
6554
6555	// 2: swap
6556	if (pl[`0`] == ps[`1`] && pl[`1`] == ps[`0`]) {
6557	pl2 = pl + `2`; // swap, skip two chars
6558	ps2 = ps + `2`;
6559	while (pl2 == ps2) {
6560	if (pl2 == NUL) // reached the end*
6561	return score + SCORE_SWAP;
6562	++pl2;
6563	++ps2;
6564	}
6565	// 3: swap and swap again
6566	if (pl2[`0`] == ps2[`1`] && pl2[`1`] == ps2[`0`]
6567	&& STRCMP(pl2 + `2`, ps2 + `2`) == `0`)
6568	return score + SCORE_SWAP + SCORE_SWAP;
6569
6570	// 4: swap and substitute
6571	if (STRCMP(pl2 + `1`, ps2 + `1`) == `0`)
6572	return score + SCORE_SWAP + SCORE_SUBST;
6573	}
6574
6575	// 5: substitute
6576	pl2 = pl + `1`;
6577	ps2 = ps + `1`;
6578	while (pl2 == ps2) {
6579	if (pl2 == NUL) // reached the end*
6580	return score + SCORE_SUBST;
6581	++pl2;
6582	++ps2;
6583	}
6584
6585	// 6: substitute and swap
6586	if (pl2[`0`] == ps2[`1`] && pl2[`1`] == ps2[`0`]
6587	&& STRCMP(pl2 + `2`, ps2 + `2`) == `0`)
6588	return score + SCORE_SUBST + SCORE_SWAP;
6589
6590	// 7: substitute and substitute
6591	if (STRCMP(pl2 + `1`, ps2 + `1`) == `0`)
6592	return score + SCORE_SUBST + SCORE_SUBST;
6593
6594	// 8: insert then delete
6595	pl2 = pl;
6596	ps2 = ps + `1`;
6597	while (pl2 == ps2) {
6598	++pl2;
6599	++ps2;
6600	}
6601	if (STRCMP(pl2 + `1`, ps2) == `0`)
6602	return score + SCORE_INS + SCORE_DEL;
6603
6604	// 9: delete then insert
6605	pl2 = pl + `1`;
6606	ps2 = ps;
6607	while (pl2 == ps2) {
6608	++pl2;
6609	++ps2;
6610	}
6611	if (STRCMP(pl2, ps2 + `1`) == `0`)
6612	return score + SCORE_INS + SCORE_DEL;
6613
6614	// Failed to compare.
6615	break;
6616	}
6617
6618	return SCORE_MAXMAX;
6619	}
6620
6621	// Compute the "edit distance" to turn "badword" into "goodword". The less
6622	// deletes/inserts/substitutes/swaps are required the lower the score.
6623	//
6624	// The algorithm is described by Du and Chang, 1992.
6625	// The implementation of the algorithm comes from Aspell editdist.cpp,
6626	// edit_distance(). It has been converted from C++ to C and modified to
6627	// support multi-byte characters.
6628	static int spell_edit_score(slang_T slang, char_u badword, char_u *goodword)
6629	{
6630	int *cnt;
6631	int j, i;
6632	int t;
6633	int bc, gc;
6634	int pbc, pgc;
6635	int wbadword[MAXWLEN];
6636	int wgoodword[MAXWLEN];
6637	const bool l_has_mbyte = has_mbyte;
6638
6639	// Lengths with NUL.
6640	int badlen;
6641	int goodlen;
6642	if (l_has_mbyte) {
6643	// Get the characters from the multi-byte strings and put them in an
6644	// int array for easy access.
6645	badlen = `0`;
6646	for (const char_u p = badword; p != NUL; ) {
6647	wbadword[badlen++] = mb_cptr2char_adv(&p);
6648	}
6649	wbadword[badlen++] = `0`;
6650	goodlen = `0`;
6651	for (const char_u p = goodword; p != NUL; ) {
6652	wgoodword[goodlen++] = mb_cptr2char_adv(&p);
6653	}
6654	wgoodword[goodlen++] = `0`;
6655	} else {
6656	badlen = (int)STRLEN(badword) + `1`;
6657	goodlen = (int)STRLEN(goodword) + `1`;
6658	}
6659
6660	// We use "cnt" as an array: CNT(badword_idx, goodword_idx).
6661	#define CNT(a, b) cnt[(a) + (b) * (badlen + 1)]
6662	cnt = xmalloc(sizeof(int) * (badlen + `1`) * (goodlen + `1`));
6663
6664	CNT(`0`, `0`) = `0`;
6665	for (j = `1`; j <= goodlen; ++j)
6666	CNT(`0`, j) = CNT(`0`, j - `1`) + SCORE_INS;
6667
6668	for (i = `1`; i <= badlen; ++i) {
6669	CNT(i, `0`) = CNT(i - `1`, `0`) + SCORE_DEL;
6670	for (j = `1`; j <= goodlen; ++j) {
6671	if (l_has_mbyte) {
6672	bc = wbadword[i - `1`];
6673	gc = wgoodword[j - `1`];
6674	} else {
6675	bc = badword[i - `1`];
6676	gc = goodword[j - `1`];
6677	}
6678	if (bc == gc)
6679	CNT(i, j) = CNT(i - `1`, j - `1`);
6680	else {
6681	// Use a better score when there is only a case difference.
6682	if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
6683	CNT(i, j) = SCORE_ICASE + CNT(i - `1`, j - `1`);
6684	else {
6685	// For a similar character use SCORE_SIMILAR.
6686	if (slang != NULL
6687	&& slang->sl_has_map
6688	&& similar_chars(slang, gc, bc))
6689	CNT(i, j) = SCORE_SIMILAR + CNT(i - `1`, j - `1`);
6690	else
6691	CNT(i, j) = SCORE_SUBST + CNT(i - `1`, j - `1`);
6692	}
6693
6694	if (i > `1` && j > `1`) {
6695	if (l_has_mbyte) {
6696	pbc = wbadword[i - `2`];
6697	pgc = wgoodword[j - `2`];
6698	} else {
6699	pbc = badword[i - `2`];
6700	pgc = goodword[j - `2`];
6701	}
6702	if (bc == pgc && pbc == gc) {
6703	t = SCORE_SWAP + CNT(i - `2`, j - `2`);
6704	if (t < CNT(i, j))
6705	CNT(i, j) = t;
6706	}
6707	}
6708	t = SCORE_DEL + CNT(i - `1`, j);
6709	if (t < CNT(i, j))
6710	CNT(i, j) = t;
6711	t = SCORE_INS + CNT(i, j - `1`);
6712	if (t < CNT(i, j))
6713	CNT(i, j) = t;
6714	}
6715	}
6716	}
6717
6718	i = CNT(badlen - `1`, goodlen - `1`);
6719	xfree(cnt);
6720	return i;
6721	}
6722
6723	// Like spell_edit_score(), but with a limit on the score to make it faster.
6724	// May return SCORE_MAXMAX when the score is higher than "limit".
6725	//
6726	// This uses a stack for the edits still to be tried.
6727	// The idea comes from Aspell leditdist.cpp. Rewritten in C and added support
6728	// for multi-byte characters.
6729	static int spell_edit_score_limit(slang_T slang, char_u badword, char_u goodword, int* limit)
6730	{
6731	limitscore_T stack[`10`]; // allow for over 3 2 edits*
6732	int stackidx;
6733	int bi, gi;
6734	int bi2, gi2;
6735	int bc, gc;
6736	int score;
6737	int score_off;
6738	int minscore;
6739	int round;
6740
6741	// Multi-byte characters require a bit more work, use a different function
6742	// to avoid testing "has_mbyte" quite often.
6743	if (has_mbyte)
6744	return spell_edit_score_limit_w(slang, badword, goodword, limit);
6745
6746	// The idea is to go from start to end over the words. So long as
6747	// characters are equal just continue, this always gives the lowest score.
6748	// When there is a difference try several alternatives. Each alternative
6749	// increases "score" for the edit distance. Some of the alternatives are
6750	// pushed unto a stack and tried later, some are tried right away. At the
6751	// end of the word the score for one alternative is known. The lowest
6752	// possible score is stored in "minscore".
6753	stackidx = `0`;
6754	bi = `0`;
6755	gi = `0`;
6756	score = `0`;
6757	minscore = limit + `1`;
6758
6759	for (;; ) {
6760	// Skip over an equal part, score remains the same.
6761	for (;; ) {
6762	bc = badword[bi];
6763	gc = goodword[gi];
6764	if (bc != gc) // stop at a char that's different
6765	break;
6766	if (bc == NUL) { // both words end
6767	if (score < minscore)
6768	minscore = score;
6769	goto pop; // do next alternative
6770	}
6771	++bi;
6772	++gi;
6773	}
6774
6775	if (gc == NUL) { // goodword ends, delete badword chars
6776	do {
6777	if ((score += SCORE_DEL) >= minscore)
6778	goto pop; // do next alternative
6779	} while (badword[++bi] != NUL);
6780	minscore = score;
6781	} else if (bc == NUL) { // badword ends, insert badword chars
6782	do {
6783	if ((score += SCORE_INS) >= minscore)
6784	goto pop; // do next alternative
6785	} while (goodword[++gi] != NUL);
6786	minscore = score;
6787	} else { // both words continue
6788	// If not close to the limit, perform a change. Only try changes
6789	// that may lead to a lower score than "minscore".
6790	// round 0: try deleting a char from badword
6791	// round 1: try inserting a char in badword
6792	for (round = `0`; round <= `1`; ++round) {
6793	score_off = score + (round == `0` ? SCORE_DEL : SCORE_INS);
6794	if (score_off < minscore) {
6795	if (score_off + SCORE_EDIT_MIN >= minscore) {
6796	// Near the limit, rest of the words must match. We
6797	// can check that right now, no need to push an item
6798	// onto the stack.
6799	bi2 = bi + `1` - round;
6800	gi2 = gi + round;
6801	while (goodword[gi2] == badword[bi2]) {
6802	if (goodword[gi2] == NUL) {
6803	minscore = score_off;
6804	break;
6805	}
6806	++bi2;
6807	++gi2;
6808	}
6809	} else {
6810	// try deleting/inserting a character later
6811	stack[stackidx].badi = bi + `1` - round;
6812	stack[stackidx].goodi = gi + round;
6813	stack[stackidx].score = score_off;
6814	++stackidx;
6815	}
6816	}
6817	}
6818
6819	if (score + SCORE_SWAP < minscore) {
6820	// If swapping two characters makes a match then the
6821	// substitution is more expensive, thus there is no need to
6822	// try both.
6823	if (gc == badword[bi + `1`] && bc == goodword[gi + `1`]) {
6824	// Swap two characters, that is: skip them.
6825	gi += `2`;
6826	bi += `2`;
6827	score += SCORE_SWAP;
6828	continue;
6829	}
6830	}
6831
6832	// Substitute one character for another which is the same
6833	// thing as deleting a character from both goodword and badword.
6834	// Use a better score when there is only a case difference.
6835	if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
6836	score += SCORE_ICASE;
6837	else {
6838	// For a similar character use SCORE_SIMILAR.
6839	if (slang != NULL
6840	&& slang->sl_has_map
6841	&& similar_chars(slang, gc, bc))
6842	score += SCORE_SIMILAR;
6843	else
6844	score += SCORE_SUBST;
6845	}
6846
6847	if (score < minscore) {
6848	// Do the substitution.
6849	++gi;
6850	++bi;
6851	continue;
6852	}
6853	}
6854	pop:
6855	// Get here to try the next alternative, pop it from the stack.
6856	if (stackidx == `0`) // stack is empty, finished
6857	break;
6858
6859	// pop an item from the stack
6860	--stackidx;
6861	gi = stack[stackidx].goodi;
6862	bi = stack[stackidx].badi;
6863	score = stack[stackidx].score;
6864	}
6865
6866	// When the score goes over "limit" it may actually be much higher.
6867	// Return a very large number to avoid going below the limit when giving a
6868	// bonus.
6869	if (minscore > limit)
6870	return SCORE_MAXMAX;
6871	return minscore;
6872	}
6873
6874	// Multi-byte version of spell_edit_score_limit().
6875	// Keep it in sync with the above!
6876	static int spell_edit_score_limit_w(slang_T slang, char_u badword, char_u goodword, int* limit)
6877	{
6878	limitscore_T stack[`10`]; // allow for over 3 2 edits*
6879	int stackidx;
6880	int bi, gi;
6881	int bi2, gi2;
6882	int bc, gc;
6883	int score;
6884	int score_off;
6885	int minscore;
6886	int round;
6887	int wbadword[MAXWLEN];
6888	int wgoodword[MAXWLEN];
6889
6890	// Get the characters from the multi-byte strings and put them in an
6891	// int array for easy access.
6892	bi = `0`;
6893	for (const char_u p = badword; p != NUL; ) {
6894	wbadword[bi++] = mb_cptr2char_adv(&p);
6895	}
6896	wbadword[bi++] = `0`;
6897	gi = `0`;
6898	for (const char_u p = goodword; p != NUL; ) {
6899	wgoodword[gi++] = mb_cptr2char_adv(&p);
6900	}
6901	wgoodword[gi++] = `0`;
6902
6903	// The idea is to go from start to end over the words. So long as
6904	// characters are equal just continue, this always gives the lowest score.
6905	// When there is a difference try several alternatives. Each alternative
6906	// increases "score" for the edit distance. Some of the alternatives are
6907	// pushed unto a stack and tried later, some are tried right away. At the
6908	// end of the word the score for one alternative is known. The lowest
6909	// possible score is stored in "minscore".
6910	stackidx = `0`;
6911	bi = `0`;
6912	gi = `0`;
6913	score = `0`;
6914	minscore = limit + `1`;
6915
6916	for (;; ) {
6917	// Skip over an equal part, score remains the same.
6918	for (;; ) {
6919	bc = wbadword[bi];
6920	gc = wgoodword[gi];
6921
6922	if (bc != gc) // stop at a char that's different
6923	break;
6924	if (bc == NUL) { // both words end
6925	if (score < minscore)
6926	minscore = score;
6927	goto pop; // do next alternative
6928	}
6929	++bi;
6930	++gi;
6931	}
6932
6933	if (gc == NUL) { // goodword ends, delete badword chars
6934	do {
6935	if ((score += SCORE_DEL) >= minscore)
6936	goto pop; // do next alternative
6937	} while (wbadword[++bi] != NUL);
6938	minscore = score;
6939	} else if (bc == NUL) { // badword ends, insert badword chars
6940	do {
6941	if ((score += SCORE_INS) >= minscore)
6942	goto pop; // do next alternative
6943	} while (wgoodword[++gi] != NUL);
6944	minscore = score;
6945	} else { // both words continue
6946	// If not close to the limit, perform a change. Only try changes
6947	// that may lead to a lower score than "minscore".
6948	// round 0: try deleting a char from badword
6949	// round 1: try inserting a char in badword
6950	for (round = `0`; round <= `1`; ++round) {
6951	score_off = score + (round == `0` ? SCORE_DEL : SCORE_INS);
6952	if (score_off < minscore) {
6953	if (score_off + SCORE_EDIT_MIN >= minscore) {
6954	// Near the limit, rest of the words must match. We
6955	// can check that right now, no need to push an item
6956	// onto the stack.
6957	bi2 = bi + `1` - round;
6958	gi2 = gi + round;
6959	while (wgoodword[gi2] == wbadword[bi2]) {
6960	if (wgoodword[gi2] == NUL) {
6961	minscore = score_off;
6962	break;
6963	}
6964	++bi2;
6965	++gi2;
6966	}
6967	} else {
6968	// try deleting a character from badword later
6969	stack[stackidx].badi = bi + `1` - round;
6970	stack[stackidx].goodi = gi + round;
6971	stack[stackidx].score = score_off;
6972	++stackidx;
6973	}
6974	}
6975	}
6976
6977	if (score + SCORE_SWAP < minscore) {
6978	// If swapping two characters makes a match then the
6979	// substitution is more expensive, thus there is no need to
6980	// try both.
6981	if (gc == wbadword[bi + `1`] && bc == wgoodword[gi + `1`]) {
6982	// Swap two characters, that is: skip them.
6983	gi += `2`;
6984	bi += `2`;
6985	score += SCORE_SWAP;
6986	continue;
6987	}
6988	}
6989
6990	// Substitute one character for another which is the same
6991	// thing as deleting a character from both goodword and badword.
6992	// Use a better score when there is only a case difference.
6993	if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
6994	score += SCORE_ICASE;
6995	else {
6996	// For a similar character use SCORE_SIMILAR.
6997	if (slang != NULL
6998	&& slang->sl_has_map
6999	&& similar_chars(slang, gc, bc))
7000	score += SCORE_SIMILAR;
7001	else
7002	score += SCORE_SUBST;
7003	}
7004
7005	if (score < minscore) {
7006	// Do the substitution.
7007	++gi;
7008	++bi;
7009	continue;
7010	}
7011	}
7012	pop:
7013	// Get here to try the next alternative, pop it from the stack.
7014	if (stackidx == `0`) // stack is empty, finished
7015	break;
7016
7017	// pop an item from the stack
7018	--stackidx;
7019	gi = stack[stackidx].goodi;
7020	bi = stack[stackidx].badi;
7021	score = stack[stackidx].score;
7022	}
7023
7024	// When the score goes over "limit" it may actually be much higher.
7025	// Return a very large number to avoid going below the limit when giving a
7026	// bonus.
7027	if (minscore > limit)
7028	return SCORE_MAXMAX;
7029	return minscore;
7030	}
7031
7032	// ":spellinfo"
7033	void ex_spellinfo(exarg_T *eap)
7034	{
7035	if (no_spell_checking(curwin)) {
7036	return;
7037	}
7038
7039	msg_start();
7040	for (int lpi = `0`; lpi < curwin->w_s->b_langp.ga_len && !got_int; lpi++) {
7041	langp_T *const lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
7042	msg_puts("file: ");
7043	msg_puts((const char *)lp->lp_slang->sl_fname);
7044	msg_putchar(`'\n'`);
7045	const char *const p = (const char *)lp->lp_slang->sl_info;
7046	if (p != NULL) {
7047	msg_puts(p);
7048	msg_putchar(`'\n'`);
7049	}
7050	}
7051	msg_end();
7052	}
7053
7054	#define DUMPFLAG_KEEPCASE 1 // round 2: keep-case tree
7055	#define DUMPFLAG_COUNT 2 // include word count
7056	#define DUMPFLAG_ICASE 4 // ignore case when finding matches
7057	#define DUMPFLAG_ONECAP 8 // pattern starts with capital
7058	#define DUMPFLAG_ALLCAP 16 // pattern is all capitals
7059
7060	// ":spelldump"
7061	void ex_spelldump(exarg_T *eap)
7062	{
7063	char_u *spl;
7064	long dummy;
7065
7066	if (no_spell_checking(curwin)) {
7067	return;
7068	}
7069	get_option_value((char_u *)"spl", &dummy, &spl, OPT_LOCAL);
7070
7071	// Create a new empty buffer in a new window.
7072	do_cmdline_cmd("new");
7073
7074	// enable spelling locally in the new window
7075	set_option_value("spell", true, "", OPT_LOCAL);
7076	set_option_value("spl", dummy, (char *)spl, OPT_LOCAL);
7077	xfree(spl);
7078
7079	if (!BUFEMPTY()) {
7080	return;
7081	}
7082
7083	spell_dump_compl(NULL, `0`, NULL, eap->forceit ? DUMPFLAG_COUNT : `0`);
7084
7085	// Delete the empty line that we started with.
7086	if (curbuf->b_ml.ml_line_count > `1`) {
7087	ml_delete(curbuf->b_ml.ml_line_count, false);
7088	}
7089	redraw_later(NOT_VALID);
7090	}
7091
7092	// Go through all possible words and:
7093	// 1. When "pat" is NULL: dump a list of all words in the current buffer.
7094	// "ic" and "dir" are not used.
7095	// 2. When "pat" is not NULL: add matching words to insert mode completion.
7096	void
7097	spell_dump_compl (
7098	char_u pat, // leading part of the word*
7099	int ic, // ignore case
7100	int dir, // direction for adding matches*
7101	int dumpflags_arg // DUMPFLAG_*
7102	)
7103	{
7104	langp_T *lp;
7105	slang_T *slang;
7106	idx_T arridx[MAXWLEN];
7107	int curi[MAXWLEN];
7108	char_u word[MAXWLEN];
7109	int c;
7110	char_u *byts;
7111	idx_T *idxs;
7112	linenr_T lnum = `0`;
7113	int round;
7114	int depth;
7115	int n;
7116	int flags;
7117	char_u region_names = NULL; // region names being used*
7118	bool do_region = true; // dump region names and numbers
7119	char_u *p;
7120	int dumpflags = dumpflags_arg;
7121	int patlen;
7122
7123	// When ignoring case or when the pattern starts with capital pass this on
7124	// to dump_word().
7125	if (pat != NULL) {
7126	if (ic)
7127	dumpflags \|= DUMPFLAG_ICASE;
7128	else {
7129	n = captype(pat, NULL);
7130	if (n == WF_ONECAP)
7131	dumpflags \|= DUMPFLAG_ONECAP;
7132	else if (n == WF_ALLCAP
7133	&& (int)STRLEN(pat) > mb_ptr2len(pat)
7134	)
7135	dumpflags \|= DUMPFLAG_ALLCAP;
7136	}
7137	}
7138
7139	// Find out if we can support regions: All languages must support the same
7140	// regions or none at all.
7141	for (int lpi = `0`; lpi < curwin->w_s->b_langp.ga_len; ++lpi) {
7142	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
7143	p = lp->lp_slang->sl_regions;
7144	if (p[`0`] != `0`) {
7145	if (region_names == NULL) // first language with regions
7146	region_names = p;
7147	else if (STRCMP(region_names, p) != `0`) {
7148	do_region = false; // region names are different
7149	break;
7150	}
7151	}
7152	}
7153
7154	if (do_region && region_names != NULL) {
7155	if (pat == NULL) {
7156	vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
7157	ml_append(lnum++, IObuff, (colnr_T)`0`, false);
7158	}
7159	} else
7160	do_region = false;
7161
7162	// Loop over all files loaded for the entries in 'spelllang'.
7163	for (int lpi = `0`; lpi < curwin->w_s->b_langp.ga_len; ++lpi) {
7164	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
7165	slang = lp->lp_slang;
7166	if (slang->sl_fbyts == NULL) // reloading failed
7167	continue;
7168
7169	if (pat == NULL) {
7170	vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
7171	ml_append(lnum++, IObuff, (colnr_T)`0`, false);
7172	}
7173
7174	// When matching with a pattern and there are no prefixes only use
7175	// parts of the tree that match "pat".
7176	if (pat != NULL && slang->sl_pbyts == NULL)
7177	patlen = (int)STRLEN(pat);
7178	else
7179	patlen = -`1`;
7180
7181	// round 1: case-folded tree
7182	// round 2: keep-case tree
7183	for (round = `1`; round <= `2`; ++round) {
7184	if (round == `1`) {
7185	dumpflags &= ~DUMPFLAG_KEEPCASE;
7186	byts = slang->sl_fbyts;
7187	idxs = slang->sl_fidxs;
7188	} else {
7189	dumpflags \|= DUMPFLAG_KEEPCASE;
7190	byts = slang->sl_kbyts;
7191	idxs = slang->sl_kidxs;
7192	}
7193	if (byts == NULL)
7194	continue; // array is empty
7195
7196	depth = `0`;
7197	arridx[`0`] = `0`;
7198	curi[`0`] = `1`;
7199	while (depth >= `0` && !got_int
7200	&& (pat == NULL \|\| !compl_interrupted)) {
7201	if (curi[depth] > byts[arridx[depth]]) {
7202	// Done all bytes at this node, go up one level.
7203	--depth;
7204	line_breakcheck();
7205	ins_compl_check_keys(`50`, false);
7206	} else {
7207	// Do one more byte at this node.
7208	n = arridx[depth] + curi[depth];
7209	++curi[depth];
7210	c = byts[n];
7211	if (c == `0`) {
7212	// End of word, deal with the word.
7213	// Don't use keep-case words in the fold-case tree,
7214	// they will appear in the keep-case tree.
7215	// Only use the word when the region matches.
7216	flags = (int)idxs[n];
7217	if ((round == `2` \|\| (flags & WF_KEEPCAP) == `0`)
7218	&& (flags & WF_NEEDCOMP) == `0`
7219	&& (do_region
7220	\|\| (flags & WF_REGION) == `0`
7221	\|\| (((unsigned)flags >> `16`)
7222	& lp->lp_region) != `0`)) {
7223	word[depth] = NUL;
7224	if (!do_region)
7225	flags &= ~WF_REGION;
7226
7227	// Dump the basic word if there is no prefix or
7228	// when it's the first one.
7229	c = (unsigned)flags >> `24`;
7230	if (c == `0` \|\| curi[depth] == `2`) {
7231	dump_word(slang, word, pat, dir,
7232	dumpflags, flags, lnum);
7233	if (pat == NULL)
7234	++lnum;
7235	}
7236
7237	// Apply the prefix, if there is one.
7238	if (c != `0`)
7239	lnum = dump_prefixes(slang, word, pat, dir,
7240	dumpflags, flags, lnum);
7241	}
7242	} else {
7243	// Normal char, go one level deeper.
7244	word[depth++] = c;
7245	arridx[depth] = idxs[n];
7246	curi[depth] = `1`;
7247
7248	// Check if this characters matches with the pattern.
7249	// If not skip the whole tree below it.
7250	// Always ignore case here, dump_word() will check
7251	// proper case later. This isn't exactly right when
7252	// length changes for multi-byte characters with
7253	// ignore case...
7254	assert(depth >= `0`);
7255	if (depth <= patlen
7256	&& mb_strnicmp(word, pat, (size_t)depth) != `0`)
7257	--depth;
7258	}
7259	}
7260	}
7261	}
7262	}
7263	}
7264
7265	// Dumps one word: apply case modifications and append a line to the buffer.
7266	// When "lnum" is zero add insert mode completion.
7267	static void dump_word(slang_T slang, char_u word, char_u pat, int* dir, int* dumpflags, int wordflags, linenr_T lnum)
7268	{
7269	bool keepcap = false;
7270	char_u *p;
7271	char_u *tw;
7272	char_u cword[MAXWLEN];
7273	char_u badword[MAXWLEN + `10`];
7274	int i;
7275	int flags = wordflags;
7276
7277	if (dumpflags & DUMPFLAG_ONECAP)
7278	flags \|= WF_ONECAP;
7279	if (dumpflags & DUMPFLAG_ALLCAP)
7280	flags \|= WF_ALLCAP;
7281
7282	if ((dumpflags & DUMPFLAG_KEEPCASE) == `0` && (flags & WF_CAPMASK) != `0`) {
7283	// Need to fix case according to "flags".
7284	make_case_word(word, cword, flags);
7285	p = cword;
7286	} else {
7287	p = word;
7288	if ((dumpflags & DUMPFLAG_KEEPCASE)
7289	&& ((captype(word, NULL) & WF_KEEPCAP) == `0`
7290	\|\| (flags & WF_FIXCAP) != `0`))
7291	keepcap = true;
7292	}
7293	tw = p;
7294
7295	if (pat == NULL) {
7296	// Add flags and regions after a slash.
7297	if ((flags & (WF_BANNED \| WF_RARE \| WF_REGION)) \|\| keepcap) {
7298	STRCPY(badword, p);
7299	STRCAT(badword, "/");
7300	if (keepcap) {
7301	STRCAT(badword, "=");
7302	}
7303	if (flags & WF_BANNED) {
7304	STRCAT(badword, "!");
7305	} else if (flags & WF_RARE) {
7306	STRCAT(badword, "?");
7307	}
7308	if (flags & WF_REGION) {
7309	for (i = `0`; i < `7`; i++) {
7310	if (flags & (`0x10000` << i)) {
7311	const size_t badword_len = STRLEN(badword);
7312	snprintf((char *)badword + badword_len,
7313	sizeof(badword) - badword_len,
7314	"%d", i + `1`);
7315	}
7316	}
7317	}
7318	p = badword;
7319	}
7320
7321	if (dumpflags & DUMPFLAG_COUNT) {
7322	hashitem_T *hi;
7323
7324	// Include the word count for ":spelldump!".
7325	hi = hash_find(&slang->sl_wordcount, tw);
7326	if (!HASHITEM_EMPTY(hi)) {
7327	vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d",
7328	tw, HI2WC(hi)->wc_count);
7329	p = IObuff;
7330	}
7331	}
7332
7333	ml_append(lnum, p, (colnr_T)`0`, false);
7334	} else if (((dumpflags & DUMPFLAG_ICASE)
7335	? mb_strnicmp(p, pat, STRLEN(pat)) == `0`
7336	: STRNCMP(p, pat, STRLEN(pat)) == `0`)
7337	&& ins_compl_add_infercase(p, (int)STRLEN(p),
7338	p_ic, NULL, *dir, false) == OK) {
7339	// if dir was BACKWARD then honor it just once
7340	*dir = FORWARD;
7341	}
7342	}
7343
7344	// For ":spelldump": Find matching prefixes for "word". Prepend each to
7345	// "word" and append a line to the buffer.
7346	// When "lnum" is zero add insert mode completion.
7347	// Return the updated line number.
7348	static linenr_T
7349	dump_prefixes (
7350	slang_T *slang,
7351	char_u word, // case-folded word*
7352	char_u *pat,
7353	int *dir,
7354	int dumpflags,
7355	int flags, // flags with prefix ID
7356	linenr_T startlnum
7357	)
7358	{
7359	idx_T arridx[MAXWLEN];
7360	int curi[MAXWLEN];
7361	char_u prefix[MAXWLEN];
7362	char_u word_up[MAXWLEN];
7363	bool has_word_up = false;
7364	int c;
7365	char_u *byts;
7366	idx_T *idxs;
7367	linenr_T lnum = startlnum;
7368	int depth;
7369	int n;
7370	int len;
7371	int i;
7372
7373	// If the word starts with a lower-case letter make the word with an
7374	// upper-case letter in word_up[].
7375	c = PTR2CHAR(word);
7376	if (SPELL_TOUPPER(c) != c) {
7377	onecap_copy(word, word_up, true);
7378	has_word_up = true;
7379	}
7380
7381	byts = slang->sl_pbyts;
7382	idxs = slang->sl_pidxs;
7383	if (byts != NULL) { // array not is empty
7384	// Loop over all prefixes, building them byte-by-byte in prefix[].
7385	// When at the end of a prefix check that it supports "flags".
7386	depth = `0`;
7387	arridx[`0`] = `0`;
7388	curi[`0`] = `1`;
7389	while (depth >= `0` && !got_int) {
7390	n = arridx[depth];
7391	len = byts[n];
7392	if (curi[depth] > len) {
7393	// Done all bytes at this node, go up one level.
7394	--depth;
7395	line_breakcheck();
7396	} else {
7397	// Do one more byte at this node.
7398	n += curi[depth];
7399	++curi[depth];
7400	c = byts[n];
7401	if (c == `0`) {
7402	// End of prefix, find out how many IDs there are.
7403	for (i = `1`; i < len; ++i)
7404	if (byts[n + i] != `0`)
7405	break;
7406	curi[depth] += i - `1`;
7407
7408	c = valid_word_prefix(i, n, flags, word, slang, false);
7409	if (c != `0`) {
7410	STRLCPY(prefix + depth, word, MAXWLEN - depth);
7411	dump_word(slang, prefix, pat, dir, dumpflags,
7412	(c & WF_RAREPFX) ? (flags \| WF_RARE)
7413	: flags, lnum);
7414	if (lnum != `0`)
7415	++lnum;
7416	}
7417
7418	// Check for prefix that matches the word when the
7419	// first letter is upper-case, but only if the prefix has
7420	// a condition.
7421	if (has_word_up) {
7422	c = valid_word_prefix(i, n, flags, word_up, slang,
7423	true);
7424	if (c != `0`) {
7425	STRLCPY(prefix + depth, word_up, MAXWLEN - depth);
7426	dump_word(slang, prefix, pat, dir, dumpflags,
7427	(c & WF_RAREPFX) ? (flags \| WF_RARE)
7428	: flags, lnum);
7429	if (lnum != `0`)
7430	++lnum;
7431	}
7432	}
7433	} else {
7434	// Normal char, go one level deeper.
7435	prefix[depth++] = c;
7436	arridx[depth] = idxs[n];
7437	curi[depth] = `1`;
7438	}
7439	}
7440	}
7441	}
7442
7443	return lnum;
7444	}
7445
7446	// Move "p" to the end of word "start".
7447	// Uses the spell-checking word characters.
7448	char_u spell_to_word_end(char_u start, win_T *win)
7449	{
7450	char_u *p = start;
7451
7452	while (*p != NUL && spell_iswordp(p, win)) {
7453	MB_PTR_ADV(p);
7454	}
7455	return p;
7456	}
7457
7458	// For Insert mode completion CTRL-X s:
7459	// Find start of the word in front of column "startcol".
7460	// We don't check if it is badly spelled, with completion we can only change
7461	// the word in front of the cursor.
7462	// Returns the column number of the word.
7463	int spell_word_start(int startcol)
7464	{
7465	char_u *line;
7466	char_u *p;
7467	int col = `0`;
7468
7469	if (no_spell_checking(curwin)) {
7470	return startcol;
7471	}
7472
7473	// Find a word character before "startcol".
7474	line = get_cursor_line_ptr();
7475	for (p = line + startcol; p > line; ) {
7476	MB_PTR_BACK(line, p);
7477	if (spell_iswordp_nmw(p, curwin)) {
7478	break;
7479	}
7480	}
7481
7482	// Go back to start of the word.
7483	while (p > line) {
7484	col = (int)(p - line);
7485	MB_PTR_BACK(line, p);
7486	if (!spell_iswordp(p, curwin)) {
7487	break;
7488	}
7489	col = `0`;
7490	}
7491
7492	return col;
7493	}
7494
7495	// Need to check for 'spellcapcheck' now, the word is removed before
7496	// expand_spelling() is called. Therefore the ugly global variable.
7497	static bool spell_expand_need_cap;
7498
7499	void spell_expand_check_cap(colnr_T col)
7500	{
7501	spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
7502	}
7503
7504	// Get list of spelling suggestions.
7505	// Used for Insert mode completion CTRL-X ?.
7506	// Returns the number of matches. The matches are in "matchp[]", array of
7507	// allocated strings.
7508	int expand_spelling(linenr_T lnum, char_u pat, char_u **matchp)
7509	{
7510	garray_T ga;
7511
7512	spell_suggest_list(&ga, pat, `100`, spell_expand_need_cap, true);
7513	*matchp = ga.ga_data;
7514	return ga.ga_len;
7515	}
7516

Browse the source code of neovim/src/nvim/spell.c