spellfile.c source code [neovim/src/nvim/spellfile.c]

1	// This is an open source non-commercial project. Dear PVS-Studio, please check
2	// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
3
4	// spellfile.c: code for reading and writing spell files.
5	//
6	// See spell.c for information about spell checking.
7
8	// Vim spell file format: <HEADER>
9	// <SECTIONS>
10	// <LWORDTREE>
11	// <KWORDTREE>
12	// <PREFIXTREE>
13	//
14	// <HEADER>: <fileID> <versionnr>
15	//
16	// <fileID> 8 bytes "VIMspell"
17	// <versionnr> 1 byte VIMSPELLVERSION
18	//
19	//
20	// Sections make it possible to add information to the .spl file without
21	// making it incompatible with previous versions. There are two kinds of
22	// sections:
23	// 1. Not essential for correct spell checking. E.g. for making suggestions.
24	// These are skipped when not supported.
25	// 2. Optional information, but essential for spell checking when present.
26	// E.g. conditions for affixes. When this section is present but not
27	// supported an error message is given.
28	//
29	// <SECTIONS>: <section> ... <sectionend>
30	//
31	// <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
32	//
33	// <sectionID> 1 byte number from 0 to 254 identifying the section
34	//
35	// <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct
36	// spell checking
37	//
38	// <sectionlen> 4 bytes length of section contents, MSB first
39	//
40	// <sectionend> 1 byte SN_END
41	//
42	//
43	// sectionID == SN_INFO: <infotext>
44	// <infotext> N bytes free format text with spell file info (version,
45	// website, etc)
46	//
47	// sectionID == SN_REGION: <regionname> ...
48	// <regionname> 2 bytes Up to MAXREGIONS region names: ca, au, etc.
49	// Lower case.
50	// First <regionname> is region 1.
51	//
52	// sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
53	// <folcharslen> <folchars>
54	// <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
55	// <charflags> N bytes List of flags (first one is for character 128):
56	// 0x01 word character CF_WORD
57	// 0x02 upper-case character CF_UPPER
58	// <folcharslen> 2 bytes Number of bytes in <folchars>.
59	// <folchars> N bytes Folded characters, first one is for character 128.
60	//
61	// sectionID == SN_MIDWORD: <midword>
62	// <midword> N bytes Characters that are word characters only when used
63	// in the middle of a word.
64	//
65	// sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
66	// <prefcondcnt> 2 bytes Number of <prefcond> items following.
67	// <prefcond> : <condlen> <condstr>
68	// <condlen> 1 byte Length of <condstr>.
69	// <condstr> N bytes Condition for the prefix.
70	//
71	// sectionID == SN_REP: <repcount> <rep> ...
72	// <repcount> 2 bytes number of <rep> items, MSB first.
73	// <rep> : <repfromlen> <repfrom> <reptolen> <repto>
74	// <repfromlen> 1 byte length of <repfrom>
75	// <repfrom> N bytes "from" part of replacement
76	// <reptolen> 1 byte length of <repto>
77	// <repto> N bytes "to" part of replacement
78	//
79	// sectionID == SN_REPSAL: <repcount> <rep> ...
80	// just like SN_REP but for soundfolded words
81	//
82	// sectionID == SN_SAL: <salflags> <salcount> <sal> ...
83	// <salflags> 1 byte flags for soundsalike conversion:
84	// SAL_F0LLOWUP
85	// SAL_COLLAPSE
86	// SAL_REM_ACCENTS
87	// <salcount> 2 bytes number of <sal> items following
88	// <sal> : <salfromlen> <salfrom> <saltolen> <salto>
89	// <salfromlen> 1 byte length of <salfrom>
90	// <salfrom> N bytes "from" part of soundsalike
91	// <saltolen> 1 byte length of <salto>
92	// <salto> N bytes "to" part of soundsalike
93	//
94	// sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
95	// <sofofromlen> 2 bytes length of <sofofrom>
96	// <sofofrom> N bytes "from" part of soundfold
97	// <sofotolen> 2 bytes length of <sofoto>
98	// <sofoto> N bytes "to" part of soundfold
99	//
100	// sectionID == SN_SUGFILE: <timestamp>
101	// <timestamp> 8 bytes time in seconds that must match with .sug file
102	//
103	// sectionID == SN_NOSPLITSUGS: nothing
104	//
105	// sectionID == SN_NOCOMPOUNDSUGS: nothing
106	//
107	// sectionID == SN_WORDS: <word> ...
108	// <word> N bytes NUL terminated common word
109	//
110	// sectionID == SN_MAP: <mapstr>
111	// <mapstr> N bytes String with sequences of similar characters,
112	// separated by slashes.
113	//
114	// sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions>
115	// <comppatcount> <comppattern> ... <compflags>
116	// <compmax> 1 byte Maximum nr of words in compound word.
117	// <compminlen> 1 byte Minimal word length for compounding.
118	// <compsylmax> 1 byte Maximum nr of syllables in compound word.
119	// <compoptions> 2 bytes COMP_ flags.
120	// <comppatcount> 2 bytes number of <comppattern> following
121	// <compflags> N bytes Flags from COMPOUNDRULE items, separated by
122	// slashes.
123	//
124	// <comppattern>: <comppatlen> <comppattext>
125	// <comppatlen> 1 byte length of <comppattext>
126	// <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN
127	//
128	// sectionID == SN_NOBREAK: (empty, its presence is what matters)
129	//
130	// sectionID == SN_SYLLABLE: <syllable>
131	// <syllable> N bytes String from SYLLABLE item.
132	//
133	// <LWORDTREE>: <wordtree>
134	//
135	// <KWORDTREE>: <wordtree>
136	//
137	// <PREFIXTREE>: <wordtree>
138	//
139	//
140	// <wordtree>: <nodecount> <nodedata> ...
141	//
142	// <nodecount> 4 bytes Number of nodes following. MSB first.
143	//
144	// <nodedata>: <siblingcount> <sibling> ...
145	//
146	// <siblingcount> 1 byte Number of siblings in this node. The siblings
147	// follow in sorted order.
148	//
149	// <sibling>: <byte> [ <nodeidx> <xbyte>
150	// \| <flags> [<flags2>] [<region>] [<affixID>]
151	// \| [<pflags>] <affixID> <prefcondnr> ]
152	//
153	// <byte> 1 byte Byte value of the sibling. Special cases:
154	// BY_NOFLAGS: End of word without flags and for all
155	// regions.
156	// For PREFIXTREE <affixID> and
157	// <prefcondnr> follow.
158	// BY_FLAGS: End of word, <flags> follow.
159	// For PREFIXTREE <pflags>, <affixID>
160	// and <prefcondnr> follow.
161	// BY_FLAGS2: End of word, <flags> and <flags2>
162	// follow. Not used in PREFIXTREE.
163	// BY_INDEX: Child of sibling is shared, <nodeidx>
164	// and <xbyte> follow.
165	//
166	// <nodeidx> 3 bytes Index of child for this sibling, MSB first.
167	//
168	// <xbyte> 1 byte Byte value of the sibling.
169	//
170	// <flags> 1 byte Bitmask of:
171	// WF_ALLCAP word must have only capitals
172	// WF_ONECAP first char of word must be capital
173	// WF_KEEPCAP keep-case word
174	// WF_FIXCAP keep-case word, all caps not allowed
175	// WF_RARE rare word
176	// WF_BANNED bad word
177	// WF_REGION <region> follows
178	// WF_AFX <affixID> follows
179	//
180	// <flags2> 1 byte Bitmask of:
181	// WF_HAS_AFF >> 8 word includes affix
182	// WF_NEEDCOMP >> 8 word only valid in compound
183	// WF_NOSUGGEST >> 8 word not used for suggestions
184	// WF_COMPROOT >> 8 word already a compound
185	// WF_NOCOMPBEF >> 8 no compounding before this word
186	// WF_NOCOMPAFT >> 8 no compounding after this word
187	//
188	// <pflags> 1 byte Bitmask of:
189	// WFP_RARE rare prefix
190	// WFP_NC non-combining prefix
191	// WFP_UP letter after prefix made upper case
192	//
193	// <region> 1 byte Bitmask for regions in which word is valid. When
194	// omitted it's valid in all regions.
195	// Lowest bit is for region 1.
196	//
197	// <affixID> 1 byte ID of affix that can be used with this word. In
198	// PREFIXTREE used for the required prefix ID.
199	//
200	// <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
201	// from HEADER.
202	//
203	// All text characters are in 'encoding', but stored as single bytes.
204
205	// Vim .sug file format: <SUGHEADER>
206	// <SUGWORDTREE>
207	// <SUGTABLE>
208	//
209	// <SUGHEADER>: <fileID> <versionnr> <timestamp>
210	//
211	// <fileID> 6 bytes "VIMsug"
212	// <versionnr> 1 byte VIMSUGVERSION
213	// <timestamp> 8 bytes timestamp that must match with .spl file
214	//
215	//
216	// <SUGWORDTREE>: <wordtree> (see above, no flags or region used)
217	//
218	//
219	// <SUGTABLE>: <sugwcount> <sugline> ...
220	//
221	// <sugwcount> 4 bytes number of <sugline> following
222	//
223	// <sugline>: <sugnr> ... NUL
224	//
225	// <sugnr>: X bytes word number that results in this soundfolded word,
226	// stored as an offset to the previous number in as
227	// few bytes as possible, see offset2bytes())
228
229	#include <stdio.h>
230	#include <stdint.h>
231	#include <wctype.h>
232
233	#include "nvim/vim.h"
234	#include "nvim/spell_defs.h"
235	#include "nvim/ascii.h"
236	#include "nvim/buffer.h"
237	#include "nvim/charset.h"
238	#include "nvim/ex_cmds2.h"
239	#include "nvim/fileio.h"
240	#include "nvim/memory.h"
241	#include "nvim/memline.h"
242	#include "nvim/misc1.h"
243	#include "nvim/option.h"
244	#include "nvim/os/os.h"
245	#include "nvim/path.h"
246	#include "nvim/regexp.h"
247	#include "nvim/screen.h"
248	#include "nvim/spell.h"
249	#include "nvim/spellfile.h"
250	#include "nvim/ui.h"
251	#include "nvim/undo.h"
252
253	#ifndef UNIX // it's in os/unix_defs.h for Unix
254	# include <time.h> // for time_t
255	#endif
256
257	// Special byte values for <byte>. Some are only used in the tree for
258	// postponed prefixes, some only in the other trees. This is a bit messy...
259	#define BY_NOFLAGS 0 // end of word without flags or region; for
260	// postponed prefix: no <pflags>
261	#define BY_INDEX 1 // child is shared, index follows
262	#define BY_FLAGS 2 // end of word, <flags> byte follows; for
263	// postponed prefix: <pflags> follows
264	#define BY_FLAGS2 3 // end of word, <flags> and <flags2> bytes
265	// follow; never used in prefix tree
266	#define BY_SPECIAL BY_FLAGS2 // highest special byte value
267
268	// Flags used in .spl file for soundsalike flags.
269	#define SAL_F0LLOWUP 1
270	#define SAL_COLLAPSE 2
271	#define SAL_REM_ACCENTS 4
272
273	#define VIMSPELLMAGIC "VIMspell" // string at start of Vim spell file
274	#define VIMSPELLMAGICL (sizeof(VIMSPELLMAGIC) - 1)
275	#define VIMSPELLVERSION 50
276
277	// Section IDs. Only renumber them when VIMSPELLVERSION changes!
278	#define SN_REGION 0 // <regionname> section
279	#define SN_CHARFLAGS 1 // charflags section
280	#define SN_MIDWORD 2 // <midword> section
281	#define SN_PREFCOND 3 // <prefcond> section
282	#define SN_REP 4 // REP items section
283	#define SN_SAL 5 // SAL items section
284	#define SN_SOFO 6 // soundfolding section
285	#define SN_MAP 7 // MAP items section
286	#define SN_COMPOUND 8 // compound words section
287	#define SN_SYLLABLE 9 // syllable section
288	#define SN_NOBREAK 10 // NOBREAK section
289	#define SN_SUGFILE 11 // timestamp for .sug file
290	#define SN_REPSAL 12 // REPSAL items section
291	#define SN_WORDS 13 // common words
292	#define SN_NOSPLITSUGS 14 // don't split word for suggestions
293	#define SN_INFO 15 // info section
294	#define SN_NOCOMPOUNDSUGS 16 // don't compound for suggestions
295	#define SN_END 255 // end of sections
296
297	#define SNF_REQUIRED 1 // <sectionflags>: required section
298
299	#define CF_WORD 0x01
300	#define CF_UPPER 0x02
301
302	static char *e_spell_trunc = N_("E758: Truncated spell file");
303	static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
304	static char *e_affname = N_("Affix name too long in %s line %d: %s");
305	static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
306	static char *e_affrange = N_(
307	"E762: Character in FOL, LOW or UPP is out of range");
308	static char *msg_compressing = N_("Compressing word tree...");
309
310	#define MAXLINELEN 500 // Maximum length in bytes of a line in a .aff
311	// and .dic file.
312	// Main structure to store the contents of a ".aff" file.
313	typedef struct afffile_S {
314	char_u af_enc; // "SET", normalized, alloc'ed string or NULL*
315	int af_flagtype; // AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG
316	unsigned af_rare; // RARE ID for rare word
317	unsigned af_keepcase; // KEEPCASE ID for keep-case word
318	unsigned af_bad; // BAD ID for banned word
319	unsigned af_needaffix; // NEEDAFFIX ID
320	unsigned af_circumfix; // CIRCUMFIX ID
321	unsigned af_needcomp; // NEEDCOMPOUND ID
322	unsigned af_comproot; // COMPOUNDROOT ID
323	unsigned af_compforbid; // COMPOUNDFORBIDFLAG ID
324	unsigned af_comppermit; // COMPOUNDPERMITFLAG ID
325	unsigned af_nosuggest; // NOSUGGEST ID
326	int af_pfxpostpone; // postpone prefixes without chop string and
327	// without flags
328	bool af_ignoreextra; // IGNOREEXTRA present
329	hashtab_T af_pref; // hashtable for prefixes, affheader_T
330	hashtab_T af_suff; // hashtable for suffixes, affheader_T
331	hashtab_T af_comp; // hashtable for compound flags, compitem_T
332	} afffile_T;
333
334	#define AFT_CHAR 0 // flags are one character
335	#define AFT_LONG 1 // flags are two characters
336	#define AFT_CAPLONG 2 // flags are one or two characters
337	#define AFT_NUM 3 // flags are numbers, comma separated
338
339	typedef struct affentry_S affentry_T;
340	// Affix entry from ".aff" file. Used for prefixes and suffixes.
341	struct affentry_S {
342	affentry_T ae_next; // next affix with same name/number*
343	char_u ae_chop; // text to chop off basic word (can be NULL)*
344	char_u ae_add; // text to add to basic word (can be NULL)*
345	char_u ae_flags; // flags on the affix (can be NULL)*
346	char_u ae_cond; // condition (NULL for ".")*
347	regprog_T ae_prog; // regexp program for ae_cond or NULL*
348	char ae_compforbid; // COMPOUNDFORBIDFLAG found
349	char ae_comppermit; // COMPOUNDPERMITFLAG found
350	};
351
352	# define AH_KEY_LEN 17 // 2 x 8 bytes + NUL
353
354	// Affix header from ".aff" file. Used for af_pref and af_suff.
355	typedef struct affheader_S {
356	char_u ah_key[AH_KEY_LEN]; // key for hashtab == name of affix
357	unsigned ah_flag; // affix name as number, uses "af_flagtype"
358	int ah_newID; // prefix ID after renumbering; 0 if not used
359	int ah_combine; // suffix may combine with prefix
360	int ah_follows; // another affix block should be following
361	affentry_T ah_first; // first affix entry*
362	} affheader_T;
363
364	#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
365
366	// Flag used in compound items.
367	typedef struct compitem_S {
368	char_u ci_key[AH_KEY_LEN]; // key for hashtab == name of compound
369	unsigned ci_flag; // affix name as number, uses "af_flagtype"
370	int ci_newID; // affix ID after renumbering.
371	} compitem_T;
372
373	#define HI2CI(hi) ((compitem_T *)(hi)->hi_key)
374
375	// Structure that is used to store the items in the word tree. This avoids
376	// the need to keep track of each allocated thing, everything is freed all at
377	// once after ":mkspell" is done.
378	// Note: "sb_next" must be just before "sb_data" to make sure the alignment of
379	// "sb_data" is correct for systems where pointers must be aligned on
380	// pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc).
381	#define SBLOCKSIZE 16000 // size of sb_data
382	typedef struct sblock_S sblock_T;
383	struct sblock_S {
384	int sb_used; // nr of bytes already in use
385	sblock_T sb_next; // next block in list*
386	char_u sb_data[`1`]; // data, actually longer
387	};
388
389	// A node in the tree.
390	typedef struct wordnode_S wordnode_T;
391	struct wordnode_S {
392	union { // shared to save space
393	char_u hashkey[`6`]; // the hash key, only used while compressing
394	int index; // index in written nodes (valid after first
395	// round)
396	} wn_u1;
397	union { // shared to save space
398	wordnode_T next; // next node with same hash key*
399	wordnode_T wnode; // parent node that will write this node*
400	} wn_u2;
401	wordnode_T wn_child; // child (next byte in word)*
402	wordnode_T wn_sibling; // next sibling (alternate byte in word,*
403	// always sorted)
404	int wn_refs; // Nr. of references to this node. Only
405	// relevant for first node in a list of
406	// siblings, in following siblings it is
407	// always one.
408	char_u wn_byte; // Byte for this node. NUL for word end
409
410	// Info for when "wn_byte" is NUL.
411	// In PREFIXTREE "wn_region" is used for the prefcondnr.
412	// In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
413	// "wn_region" the LSW of the wordnr.
414	char_u wn_affixID; // supported/required prefix ID or 0
415	uint16_t wn_flags; // WF_ flags
416	short wn_region; // region mask
417
418	#ifdef SPELL_PRINTTREE
419	int wn_nr; // sequence nr for printing
420	#endif
421	};
422
423	#define WN_MASK 0xffff // mask relevant bits of "wn_flags"
424
425	#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
426
427	// Info used while reading the spell files.
428	typedef struct spellinfo_S {
429	wordnode_T si_foldroot; // tree with case-folded words*
430	long si_foldwcount; // nr of words in si_foldroot
431
432	wordnode_T si_keeproot; // tree with keep-case words*
433	long si_keepwcount; // nr of words in si_keeproot
434
435	wordnode_T si_prefroot; // tree with postponed prefixes*
436
437	long si_sugtree; // creating the soundfolding trie
438
439	sblock_T si_blocks; // memory blocks used*
440	long si_blocks_cnt; // memory blocks allocated
441	int si_did_emsg; // TRUE when ran out of memory
442
443	long si_compress_cnt; // words to add before lowering
444	// compression limit
445	wordnode_T si_first_free; // List of nodes that have been freed during*
446	// compression, linked by "wn_child" field.
447	long si_free_count; // number of nodes in si_first_free
448	#ifdef SPELL_PRINTTREE
449	int si_wordnode_nr; // sequence nr for nodes
450	#endif
451	buf_T si_spellbuf; // buffer used to store soundfold word table*
452
453	int si_ascii; // handling only ASCII words
454	int si_add; // addition file
455	int si_clear_chartab; // when TRUE clear char tables
456	int si_region; // region mask
457	vimconv_T si_conv; // for conversion to 'encoding'
458	int si_memtot; // runtime memory used
459	int si_verbose; // verbose messages
460	int si_msg_count; // number of words added since last message
461	char_u si_info; // info text chars or NULL*
462	int si_region_count; // number of regions supported (1 when there
463	// are no regions)
464	char_u si_region_name[MAXREGIONS * `2` + `1`];
465	// region names; used only if
466	// si_region_count > 1)
467
468	garray_T si_rep; // list of fromto_T entries from REP lines
469	garray_T si_repsal; // list of fromto_T entries from REPSAL lines
470	garray_T si_sal; // list of fromto_T entries from SAL lines
471	char_u si_sofofr; // SOFOFROM text*
472	char_u si_sofoto; // SOFOTO text*
473	int si_nosugfile; // NOSUGFILE item found
474	int si_nosplitsugs; // NOSPLITSUGS item found
475	int si_nocompoundsugs; // NOCOMPOUNDSUGS item found
476	int si_followup; // soundsalike: ?
477	int si_collapse; // soundsalike: ?
478	hashtab_T si_commonwords; // hashtable for common words
479	time_t si_sugtime; // timestamp for .sug file
480	int si_rem_accents; // soundsalike: remove accents
481	garray_T si_map; // MAP info concatenated
482	char_u si_midword; // MIDWORD chars or NULL*
483	int si_compmax; // max nr of words for compounding
484	int si_compminlen; // minimal length for compounding
485	int si_compsylmax; // max nr of syllables for compounding
486	int si_compoptions; // COMP_ flags
487	garray_T si_comppat; // CHECKCOMPOUNDPATTERN items, each stored as
488	// a string
489	char_u si_compflags; // flags used for compounding*
490	char_u si_nobreak; // NOBREAK
491	char_u si_syllable; // syllable string*
492	garray_T si_prefcond; // table with conditions for postponed
493	// prefixes, each stored as a string
494	int si_newprefID; // current value for ah_newID
495	int si_newcompID; // current value for compound ID
496	} spellinfo_T;
497
498	#ifdef INCLUDE_GENERATED_DECLARATIONS
499	# include "spellfile.c.generated.h"
500	#endif
501
502	/// Read n bytes from fd to buf, returning on errors
503	///
504	/// @param[out] buf Buffer to read to, must be at least n bytes long.
505	/// @param[in] n Amount of bytes to read.
506	/// @param fd FILE to read from.*
507	/// @param exit_code Code to run before returning.
508	///
509	/// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if
510	/// there are not enough bytes, returns SP_OTHERERROR if reading failed.
511	#define SPELL_READ_BYTES(buf, n, fd, exit_code) \
512	do { \
513	const size_t n__SPRB = (n); \
514	FILE *const fd__SPRB = (fd); \
515	char *const buf__SPRB = (buf); \
516	const size_t read_bytes__SPRB = fread(buf__SPRB, 1, n__SPRB, fd__SPRB); \
517	if (read_bytes__SPRB != n__SPRB) { \
518	exit_code; \
519	return feof(fd__SPRB) ? SP_TRUNCERROR : SP_OTHERERROR; \
520	} \
521	} while (0)
522
523	/// Like #SPELL_READ_BYTES, but also error out if NUL byte was read
524	///
525	/// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if
526	/// there are not enough bytes, returns SP_OTHERERROR if reading failed,
527	/// returns SP_FORMERROR if read out a NUL byte.
528	#define SPELL_READ_NONNUL_BYTES(buf, n, fd, exit_code) \
529	do { \
530	const size_t n__SPRNB = (n); \
531	FILE *const fd__SPRNB = (fd); \
532	char *const buf__SPRNB = (buf); \
533	SPELL_READ_BYTES(buf__SPRNB, n__SPRNB, fd__SPRNB, exit_code); \
534	if (memchr(buf__SPRNB, NUL, (size_t)n__SPRNB)) { \
535	exit_code; \
536	return SP_FORMERROR; \
537	} \
538	} while (0)
539
540	/// Check that spell file starts with a magic string
541	///
542	/// Does not check for version of the file.
543	///
544	/// @param fd File to check.
545	///
546	/// @return 0 in case of success, SP_TRUNCERROR if file contains not enough
547	/// bytes, SP_FORMERROR if it does not match magic string and
548	/// SP_OTHERERROR if reading file failed.
549	static inline int spell_check_magic_string(FILE *const fd)
550	FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE
551	{
552	char buf[VIMSPELLMAGICL];
553	SPELL_READ_BYTES(buf, VIMSPELLMAGICL, fd, ;);
554	if (memcmp(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != `0`) {
555	return SP_FORMERROR;
556	}
557	return `0`;
558	}
559
560	// Load one spell file and store the info into a slang_T.
561	//
562	// This is invoked in three ways:
563	// - From spell_load_cb() to load a spell file for the first time. "lang" is
564	// the language name, "old_lp" is NULL. Will allocate an slang_T.
565	// - To reload a spell file that was changed. "lang" is NULL and "old_lp"
566	// points to the existing slang_T.
567	// - Just after writing a .spl file; it's read back to produce the .sug file.
568	// "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T.
569	//
570	// Returns the slang_T the spell file was loaded into. NULL for error.
571	slang_T *
572	spell_load_file (
573	char_u *fname,
574	char_u *lang,
575	slang_T *old_lp,
576	bool silent // no error if file doesn't exist
577	)
578	{
579	FILE *fd;
580	char_u *p;
581	int n;
582	int len;
583	char_u *save_sourcing_name = sourcing_name;
584	linenr_T save_sourcing_lnum = sourcing_lnum;
585	slang_T *lp = NULL;
586	int c = `0`;
587	int res;
588
589	fd = os_fopen((char *)fname, "r");
590	if (fd == NULL) {
591	if (!silent)
592	EMSG2(_(e_notopen), fname);
593	else if (p_verbose > `2`) {
594	verbose_enter();
595	smsg((char *)e_notopen, fname);
596	verbose_leave();
597	}
598	goto endFAIL;
599	}
600	if (p_verbose > `2`) {
601	verbose_enter();
602	smsg(_("Reading spell file \"%s\""), fname);
603	verbose_leave();
604	}
605
606	if (old_lp == NULL) {
607	lp = slang_alloc(lang);
608
609	// Remember the file name, used to reload the file when it's updated.
610	lp->sl_fname = vim_strsave(fname);
611
612	// Check for .add.spl.
613	lp->sl_add = strstr((char *)path_tail(fname), SPL_FNAME_ADD) != NULL;
614	} else
615	lp = old_lp;
616
617	// Set sourcing_name, so that error messages mention the file name.
618	sourcing_name = fname;
619	sourcing_lnum = `0`;
620
621	// <HEADER>: <fileID>
622	const int scms_ret = spell_check_magic_string(fd);
623	switch (scms_ret) {
624	case SP_FORMERROR:
625	case SP_TRUNCERROR: {
626	emsgf(_("E757: This does not look like a spell file"));
627	goto endFAIL;
628	}
629	case SP_OTHERERROR: {
630	emsgf(_("E5042: Failed to read spell file %s: %s"),
631	fname, strerror(ferror(fd)));
632	}
633	case `0`: {
634	break;
635	}
636	}
637	c = getc(fd); // <versionnr>
638	if (c < VIMSPELLVERSION) {
639	EMSG(_("E771: Old spell file, needs to be updated"));
640	goto endFAIL;
641	} else if (c > VIMSPELLVERSION) {
642	EMSG(_("E772: Spell file is for newer version of Vim"));
643	goto endFAIL;
644	}
645
646
647	// <SECTIONS>: <section> ... <sectionend>
648	// <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
649	for (;; ) {
650	n = getc(fd); // <sectionID> or <sectionend>
651	if (n == SN_END)
652	break;
653	c = getc(fd); // <sectionflags>
654	len = get4c(fd); // <sectionlen>
655	if (len < `0`)
656	goto truncerr;
657
658	res = `0`;
659	switch (n) {
660	case SN_INFO:
661	lp->sl_info = READ_STRING(fd, len); // <infotext>
662	if (lp->sl_info == NULL)
663	goto endFAIL;
664	break;
665
666	case SN_REGION:
667	res = read_region_section(fd, lp, len);
668	break;
669
670	case SN_CHARFLAGS:
671	res = read_charflags_section(fd);
672	break;
673
674	case SN_MIDWORD:
675	lp->sl_midword = READ_STRING(fd, len); // <midword>
676	if (lp->sl_midword == NULL)
677	goto endFAIL;
678	break;
679
680	case SN_PREFCOND:
681	res = read_prefcond_section(fd, lp);
682	break;
683
684	case SN_REP:
685	res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
686	break;
687
688	case SN_REPSAL:
689	res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
690	break;
691
692	case SN_SAL:
693	res = read_sal_section(fd, lp);
694	break;
695
696	case SN_SOFO:
697	res = read_sofo_section(fd, lp);
698	break;
699
700	case SN_MAP:
701	p = READ_STRING(fd, len); // <mapstr>
702	if (p == NULL)
703	goto endFAIL;
704	set_map_str(lp, p);
705	xfree(p);
706	break;
707
708	case SN_WORDS:
709	res = read_words_section(fd, lp, len);
710	break;
711
712	case SN_SUGFILE:
713	lp->sl_sugtime = get8ctime(fd); // <timestamp>
714	break;
715
716	case SN_NOSPLITSUGS:
717	lp->sl_nosplitsugs = true;
718	break;
719
720	case SN_NOCOMPOUNDSUGS:
721	lp->sl_nocompoundsugs = true;
722	break;
723
724	case SN_COMPOUND:
725	res = read_compound(fd, lp, len);
726	break;
727
728	case SN_NOBREAK:
729	lp->sl_nobreak = true;
730	break;
731
732	case SN_SYLLABLE:
733	lp->sl_syllable = READ_STRING(fd, len); // <syllable>
734	if (lp->sl_syllable == NULL)
735	goto endFAIL;
736	if (init_syl_tab(lp) == FAIL)
737	goto endFAIL;
738	break;
739
740	default:
741	// Unsupported section. When it's required give an error
742	// message. When it's not required skip the contents.
743	if (c & SNF_REQUIRED) {
744	EMSG(_("E770: Unsupported section in spell file"));
745	goto endFAIL;
746	}
747	while (--len >= `0`)
748	if (getc(fd) < `0`)
749	goto truncerr;
750	break;
751	}
752	someerror:
753	if (res == SP_FORMERROR) {
754	EMSG(_(e_format));
755	goto endFAIL;
756	}
757	if (res == SP_TRUNCERROR) {
758	truncerr:
759	EMSG(_(e_spell_trunc));
760	goto endFAIL;
761	}
762	if (res == SP_OTHERERROR)
763	goto endFAIL;
764	}
765
766	// <LWORDTREE>
767	res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, false, `0`);
768	if (res != `0`)
769	goto someerror;
770
771	// <KWORDTREE>
772	res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, false, `0`);
773	if (res != `0`)
774	goto someerror;
775
776	// <PREFIXTREE>
777	res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, true,
778	lp->sl_prefixcnt);
779	if (res != `0`)
780	goto someerror;
781
782	// For a new file link it in the list of spell files.
783	if (old_lp == NULL && lang != NULL) {
784	lp->sl_next = first_lang;
785	first_lang = lp;
786	}
787
788	goto endOK;
789
790	endFAIL:
791	if (lang != NULL)
792	// truncating the name signals the error to spell_load_lang()
793	*lang = NUL;
794	if (lp != NULL && old_lp == NULL)
795	slang_free(lp);
796	lp = NULL;
797
798	endOK:
799	if (fd != NULL)
800	fclose(fd);
801	sourcing_name = save_sourcing_name;
802	sourcing_lnum = save_sourcing_lnum;
803
804	return lp;
805	}
806
807	// Fill in the wordcount fields for a trie.
808	// Returns the total number of words.
809	static void tree_count_words(char_u byts, idx_T idxs)
810	{
811	int depth;
812	idx_T arridx[MAXWLEN];
813	int curi[MAXWLEN];
814	int c;
815	idx_T n;
816	int wordcount[MAXWLEN];
817
818	arridx[`0`] = `0`;
819	curi[`0`] = `1`;
820	wordcount[`0`] = `0`;
821	depth = `0`;
822	while (depth >= `0` && !got_int) {
823	if (curi[depth] > byts[arridx[depth]]) {
824	// Done all bytes at this node, go up one level.
825	idxs[arridx[depth]] = wordcount[depth];
826	if (depth > `0`)
827	wordcount[depth - `1`] += wordcount[depth];
828
829	--depth;
830	fast_breakcheck();
831	} else {
832	// Do one more byte at this node.
833	n = arridx[depth] + curi[depth];
834	++curi[depth];
835
836	c = byts[n];
837	if (c == `0`) {
838	// End of word, count it.
839	++wordcount[depth];
840
841	// Skip over any other NUL bytes (same word with different
842	// flags).
843	while (byts[n + `1`] == `0`) {
844	++n;
845	++curi[depth];
846	}
847	} else {
848	// Normal char, go one level deeper to count the words.
849	++depth;
850	arridx[depth] = idxs[n];
851	curi[depth] = `1`;
852	wordcount[depth] = `0`;
853	}
854	}
855	}
856	}
857
858	// Load the .sug files for languages that have one and weren't loaded yet.
859	void suggest_load_files(void)
860	{
861	langp_T *lp;
862	slang_T *slang;
863	char_u *dotp;
864	FILE *fd;
865	char_u buf[MAXWLEN];
866	int i;
867	time_t timestamp;
868	int wcount;
869	int wordnr;
870	garray_T ga;
871	int c;
872
873	// Do this for all languages that support sound folding.
874	for (int lpi = `0`; lpi < curwin->w_s->b_langp.ga_len; ++lpi) {
875	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
876	slang = lp->lp_slang;
877	if (slang->sl_sugtime != `0` && !slang->sl_sugloaded) {
878	// Change ".spl" to ".sug" and open the file. When the file isn't
879	// found silently skip it. Do set "sl_sugloaded" so that we
880	// don't try again and again.
881	slang->sl_sugloaded = true;
882
883	dotp = STRRCHR(slang->sl_fname, `'.'`);
884	if (dotp == NULL \|\| fnamecmp(dotp, ".spl") != `0`) {
885	continue;
886	}
887	STRCPY(dotp, ".sug");
888	fd = os_fopen((char *)slang->sl_fname, "r");
889	if (fd == NULL) {
890	goto nextone;
891	}
892
893	// <SUGHEADER>: <fileID> <versionnr> <timestamp>
894	for (i = `0`; i < VIMSUGMAGICL; ++i)
895	buf[i] = getc(fd); // <fileID>
896	if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != `0`) {
897	EMSG2(_("E778: This does not look like a .sug file: %s"),
898	slang->sl_fname);
899	goto nextone;
900	}
901	c = getc(fd); // <versionnr>
902	if (c < VIMSUGVERSION) {
903	EMSG2(_("E779: Old .sug file, needs to be updated: %s"),
904	slang->sl_fname);
905	goto nextone;
906	} else if (c > VIMSUGVERSION) {
907	EMSG2(_("E780: .sug file is for newer version of Vim: %s"),
908	slang->sl_fname);
909	goto nextone;
910	}
911
912	// Check the timestamp, it must be exactly the same as the one in
913	// the .spl file. Otherwise the word numbers won't match.
914	timestamp = get8ctime(fd); // <timestamp>
915	if (timestamp != slang->sl_sugtime) {
916	EMSG2(_("E781: .sug file doesn't match .spl file: %s"),
917	slang->sl_fname);
918	goto nextone;
919	}
920
921	// <SUGWORDTREE>: <wordtree>
922	// Read the trie with the soundfolded words.
923	if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs,
924	false, `0`) != `0`) {
925	someerror:
926	EMSG2(_("E782: error while reading .sug file: %s"),
927	slang->sl_fname);
928	slang_clear_sug(slang);
929	goto nextone;
930	}
931
932	// <SUGTABLE>: <sugwcount> <sugline> ...
933	//
934	// Read the table with word numbers. We use a file buffer for
935	// this, because it's so much like a file with lines. Makes it
936	// possible to swap the info and save on memory use.
937	slang->sl_sugbuf = open_spellbuf();
938
939	// <sugwcount>
940	wcount = get4c(fd);
941	if (wcount < `0`)
942	goto someerror;
943
944	// Read all the wordnr lists into the buffer, one NUL terminated
945	// list per line.
946	ga_init(&ga, `1`, `100`);
947	for (wordnr = `0`; wordnr < wcount; ++wordnr) {
948	ga.ga_len = `0`;
949	for (;; ) {
950	c = getc(fd); // <sugline>
951	if (c < `0`) {
952	goto someerror;
953	}
954	GA_APPEND(char_u, &ga, c);
955	if (c == NUL)
956	break;
957	}
958	if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
959	ga.ga_data, ga.ga_len, true) == FAIL) {
960	goto someerror;
961	}
962	}
963	ga_clear(&ga);
964
965	// Need to put word counts in the word tries, so that we can find
966	// a word by its number.
967	tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
968	tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
969
970	nextone:
971	if (fd != NULL)
972	fclose(fd);
973	STRCPY(dotp, ".spl");
974	}
975	}
976	}
977
978
979	// Read a length field from "fd" in "cnt_bytes" bytes.
980	// Allocate memory, read the string into it and add a NUL at the end.
981	// Returns NULL when the count is zero.
982	// Sets "cntp" to SP_ERROR when there is an error, length of the result
983	// otherwise.
984	static char_u read_cnt_string(FILE fd, int cnt_bytes, int *cntp)
985	{
986	int cnt = `0`;
987	int i;
988	char_u *str;
989
990	// read the length bytes, MSB first
991	for (i = `0`; i < cnt_bytes; ++i)
992	cnt = (cnt << `8`) + getc(fd);
993	if (cnt < `0`) {
994	*cntp = SP_TRUNCERROR;
995	return NULL;
996	}
997	*cntp = cnt;
998	if (cnt == `0`)
999	return NULL; // nothing to read, return NULL
1000
1001	str = READ_STRING(fd, cnt);
1002	if (str == NULL)
1003	*cntp = SP_OTHERERROR;
1004	return str;
1005	}
1006
1007	// Read SN_REGION: <regionname> ...
1008	// Return SP_ERROR flags.*
1009	static int read_region_section(FILE fd, slang_T lp, int len)
1010	{
1011	if (len > MAXREGIONS * `2`) {
1012	return SP_FORMERROR;
1013	}
1014	SPELL_READ_NONNUL_BYTES((char *)lp->sl_regions, (size_t)len, fd, ;);
1015	lp->sl_regions[len] = NUL;
1016	return `0`;
1017	}
1018
1019	// Read SN_CHARFLAGS section: <charflagslen> <charflags>
1020	// <folcharslen> <folchars>
1021	// Return SP_ERROR flags.*
1022	static int read_charflags_section(FILE *fd)
1023	{
1024	char_u *flags;
1025	char_u *fol;
1026	int flagslen, follen;
1027
1028	// <charflagslen> <charflags>
1029	flags = read_cnt_string(fd, `1`, &flagslen);
1030	if (flagslen < `0`)
1031	return flagslen;
1032
1033	// <folcharslen> <folchars>
1034	fol = read_cnt_string(fd, `2`, &follen);
1035	if (follen < `0`) {
1036	xfree(flags);
1037	return follen;
1038	}
1039
1040	// Set the word-char flags and fill SPELL_ISUPPER() table.
1041	if (flags != NULL && fol != NULL)
1042	set_spell_charflags(flags, flagslen, fol);
1043
1044	xfree(flags);
1045	xfree(fol);
1046
1047	// When <charflagslen> is zero then <fcharlen> must also be zero.
1048	if ((flags == NULL) != (fol == NULL))
1049	return SP_FORMERROR;
1050	return `0`;
1051	}
1052
1053	// Read SN_PREFCOND section.
1054	// Return SP_ERROR flags.*
1055	static int read_prefcond_section(FILE fd, slang_T lp)
1056	{
1057	// <prefcondcnt> <prefcond> ...
1058	const int cnt = get2c(fd); // <prefcondcnt>
1059	if (cnt <= `0`) {
1060	return SP_FORMERROR;
1061	}
1062
1063	lp->sl_prefprog = xcalloc(cnt, sizeof(regprog_T *));
1064	lp->sl_prefixcnt = cnt;
1065
1066	for (int i = `0`; i < cnt; i++) {
1067	// <prefcond> : <condlen> <condstr>
1068	const int n = getc(fd); // <condlen>
1069	if (n < `0` \|\| n >= MAXWLEN) {
1070	return SP_FORMERROR;
1071	}
1072
1073	// When <condlen> is zero we have an empty condition. Otherwise
1074	// compile the regexp program used to check for the condition.
1075	if (n > `0`) {
1076	char buf[MAXWLEN + `1`];
1077	buf[`0`] = `'^'`; // always match at one position only
1078	SPELL_READ_NONNUL_BYTES(buf + `1`, (size_t)n, fd, ;);
1079	buf[n + `1`] = NUL;
1080	lp->sl_prefprog[i] = vim_regcomp((char_u *)buf, RE_MAGIC \| RE_STRING);
1081	}
1082	}
1083	return `0`;
1084	}
1085
1086	// Read REP or REPSAL items section from "fd": <repcount> <rep> ...
1087	// Return SP_ERROR flags.*
1088	static int read_rep_section(FILE fd, garray_T gap, int16_t *first)
1089	{
1090	int cnt;
1091	fromto_T *ftp;
1092
1093	cnt = get2c(fd); // <repcount>
1094	if (cnt < `0`)
1095	return SP_TRUNCERROR;
1096
1097	ga_grow(gap, cnt);
1098
1099	// <rep> : <repfromlen> <repfrom> <reptolen> <repto>
1100	for (; gap->ga_len < cnt; ++gap->ga_len) {
1101	int c;
1102	ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
1103	ftp->ft_from = read_cnt_string(fd, `1`, &c);
1104	if (c < `0`)
1105	return c;
1106	if (c == `0`)
1107	return SP_FORMERROR;
1108	ftp->ft_to = read_cnt_string(fd, `1`, &c);
1109	if (c <= `0`) {
1110	xfree(ftp->ft_from);
1111	if (c < `0`)
1112	return c;
1113	return SP_FORMERROR;
1114	}
1115	}
1116
1117	// Fill the first-index table.
1118	for (int i = `0`; i < `256`; ++i) {
1119	first[i] = -`1`;
1120	}
1121	for (int i = `0`; i < gap->ga_len; ++i) {
1122	ftp = &((fromto_T *)gap->ga_data)[i];
1123	if (first[*ftp->ft_from] == -`1`)
1124	first[*ftp->ft_from] = i;
1125	}
1126	return `0`;
1127	}
1128
1129	// Read SN_SAL section: <salflags> <salcount> <sal> ...
1130	// Return SP_ERROR flags.*
1131	static int read_sal_section(FILE fd, slang_T slang)
1132	{
1133	int cnt;
1134	garray_T *gap;
1135	salitem_T *smp;
1136	int ccnt;
1137	char_u *p;
1138	int c = NUL;
1139
1140	slang->sl_sofo = false;
1141
1142	const int flags = getc(fd); // <salflags>
1143	if (flags & SAL_F0LLOWUP) {
1144	slang->sl_followup = true;
1145	}
1146	if (flags & SAL_COLLAPSE) {
1147	slang->sl_collapse = true;
1148	}
1149	if (flags & SAL_REM_ACCENTS) {
1150	slang->sl_rem_accents = true;
1151	}
1152
1153	cnt = get2c(fd); // <salcount>
1154	if (cnt < `0`)
1155	return SP_TRUNCERROR;
1156
1157	gap = &slang->sl_sal;
1158	ga_init(gap, sizeof(salitem_T), `10`);
1159	ga_grow(gap, cnt + `1`);
1160
1161	// <sal> : <salfromlen> <salfrom> <saltolen> <salto>
1162	for (; gap->ga_len < cnt; ++gap->ga_len) {
1163	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
1164	ccnt = getc(fd); // <salfromlen>
1165	if (ccnt < `0`)
1166	return SP_TRUNCERROR;
1167	p = xmalloc(ccnt + `2`);
1168	smp->sm_lead = p;
1169
1170	// Read up to the first special char into sm_lead.
1171	int i = `0`;
1172	for (; i < ccnt; ++i) {
1173	c = getc(fd); // <salfrom>
1174	if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
1175	break;
1176	*p++ = c;
1177	}
1178	smp->sm_leadlen = (int)(p - smp->sm_lead);
1179	*p++ = NUL;
1180
1181	// Put (abc) chars in sm_oneof, if any.
1182	if (c == `'('`) {
1183	smp->sm_oneof = p;
1184	for (++i; i < ccnt; ++i) {
1185	c = getc(fd); // <salfrom>
1186	if (c == `')'`)
1187	break;
1188	*p++ = c;
1189	}
1190	*p++ = NUL;
1191	if (++i < ccnt)
1192	c = getc(fd);
1193	} else
1194	smp->sm_oneof = NULL;
1195
1196	// Any following chars go in sm_rules.
1197	smp->sm_rules = p;
1198	if (i < ccnt) {
1199	// store the char we got while checking for end of sm_lead
1200	*p++ = c;
1201	}
1202	i++;
1203	if (i < ccnt) {
1204	SPELL_READ_NONNUL_BYTES( // <salfrom>
1205	(char *)p, (size_t)(ccnt - i), fd, xfree(smp->sm_lead));
1206	p += (ccnt - i);
1207	}
1208	*p++ = NUL;
1209
1210	// <saltolen> <salto>
1211	smp->sm_to = read_cnt_string(fd, `1`, &ccnt);
1212	if (ccnt < `0`) {
1213	xfree(smp->sm_lead);
1214	return ccnt;
1215	}
1216
1217	if (has_mbyte) {
1218	// convert the multi-byte strings to wide char strings
1219	smp->sm_lead_w = mb_str2wide(smp->sm_lead);
1220	smp->sm_leadlen = mb_charlen(smp->sm_lead);
1221	if (smp->sm_oneof == NULL)
1222	smp->sm_oneof_w = NULL;
1223	else
1224	smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
1225	if (smp->sm_to == NULL)
1226	smp->sm_to_w = NULL;
1227	else
1228	smp->sm_to_w = mb_str2wide(smp->sm_to);
1229	}
1230	}
1231
1232	if (!GA_EMPTY(gap)) {
1233	// Add one extra entry to mark the end with an empty sm_lead. Avoids
1234	// that we need to check the index every time.
1235	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
1236	p = xmalloc(`1`);
1237	p[`0`] = NUL;
1238	smp->sm_lead = p;
1239	smp->sm_leadlen = `0`;
1240	smp->sm_oneof = NULL;
1241	smp->sm_rules = p;
1242	smp->sm_to = NULL;
1243	if (has_mbyte) {
1244	smp->sm_lead_w = mb_str2wide(smp->sm_lead);
1245	smp->sm_leadlen = `0`;
1246	smp->sm_oneof_w = NULL;
1247	smp->sm_to_w = NULL;
1248	}
1249	++gap->ga_len;
1250	}
1251
1252	// Fill the first-index table.
1253	set_sal_first(slang);
1254
1255	return `0`;
1256	}
1257
1258	// Read SN_WORDS: <word> ...
1259	// Return SP_ERROR flags.*
1260	static int read_words_section(FILE fd, slang_T lp, int len)
1261	{
1262	int done = `0`;
1263	int i;
1264	int c;
1265	char_u word[MAXWLEN];
1266
1267	while (done < len) {
1268	// Read one word at a time.
1269	for (i = `0`;; ++i) {
1270	c = getc(fd);
1271	if (c == EOF)
1272	return SP_TRUNCERROR;
1273	word[i] = c;
1274	if (word[i] == NUL)
1275	break;
1276	if (i == MAXWLEN - `1`)
1277	return SP_FORMERROR;
1278	}
1279
1280	// Init the count to 10.
1281	count_common_word(lp, word, -`1`, `10`);
1282	done += i + `1`;
1283	}
1284	return `0`;
1285	}
1286
1287	// SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
1288	// Return SP_ERROR flags.*
1289	static int read_sofo_section(FILE fd, slang_T slang)
1290	{
1291	int cnt;
1292	char_u from, to;
1293	int res;
1294
1295	slang->sl_sofo = true;
1296
1297	// <sofofromlen> <sofofrom>
1298	from = read_cnt_string(fd, `2`, &cnt);
1299	if (cnt < `0`)
1300	return cnt;
1301
1302	// <sofotolen> <sofoto>
1303	to = read_cnt_string(fd, `2`, &cnt);
1304	if (cnt < `0`) {
1305	xfree(from);
1306	return cnt;
1307	}
1308
1309	// Store the info in slang->sl_sal and/or slang->sl_sal_first.
1310	if (from != NULL && to != NULL)
1311	res = set_sofo(slang, from, to);
1312	else if (from != NULL \|\| to != NULL)
1313	res = SP_FORMERROR; // only one of two strings is an error
1314	else
1315	res = `0`;
1316
1317	xfree(from);
1318	xfree(to);
1319	return res;
1320	}
1321
1322	// Read the compound section from the .spl file:
1323	// <compmax> <compminlen> <compsylmax> <compoptions> <compflags>
1324	// Returns SP_ERROR flags.*
1325	static int read_compound(FILE fd, slang_T slang, int len)
1326	{
1327	int todo = len;
1328	int c;
1329	int atstart;
1330	char_u *pat;
1331	char_u *pp;
1332	char_u *cp;
1333	char_u *ap;
1334	char_u *crp;
1335	int cnt;
1336	garray_T *gap;
1337
1338	if (todo < `2`)
1339	return SP_FORMERROR; // need at least two bytes
1340
1341	--todo;
1342	c = getc(fd); // <compmax>
1343	if (c < `2`)
1344	c = MAXWLEN;
1345	slang->sl_compmax = c;
1346
1347	--todo;
1348	c = getc(fd); // <compminlen>
1349	if (c < `1`)
1350	c = `0`;
1351	slang->sl_compminlen = c;
1352
1353	--todo;
1354	c = getc(fd); // <compsylmax>
1355	if (c < `1`)
1356	c = MAXWLEN;
1357	slang->sl_compsylmax = c;
1358
1359	c = getc(fd); // <compoptions>
1360	if (c != `0`)
1361	ungetc(c, fd); // be backwards compatible with Vim 7.0b
1362	else {
1363	--todo;
1364	c = getc(fd); // only use the lower byte for now
1365	--todo;
1366	slang->sl_compoptions = c;
1367
1368	gap = &slang->sl_comppat;
1369	c = get2c(fd); // <comppatcount>
1370	todo -= `2`;
1371	ga_init(gap, sizeof(char_u *), c);
1372	ga_grow(gap, c);
1373	while (--c >= `0`) {
1374	((char_u **)(gap->ga_data))[gap->ga_len++] =
1375	read_cnt_string(fd, `1`, &cnt);
1376	// <comppatlen> <comppattext>
1377	if (cnt < `0`)
1378	return cnt;
1379	todo -= cnt + `1`;
1380	}
1381	}
1382	if (todo < `0`)
1383	return SP_FORMERROR;
1384
1385	// Turn the COMPOUNDRULE items into a regexp pattern:
1386	// "a[bc]/ab+" -> "^$a[bc]\\|ab\+$$".
1387	// Inserting backslashes may double the length, "^$<Nul>" is 7 bytes.
1388	// Conversion to utf-8 may double the size.
1389	c = todo * `2` + `7`;
1390	if (enc_utf8)
1391	c += todo * `2`;
1392	pat = xmalloc(c);
1393
1394	// We also need a list of all flags that can appear at the start and one
1395	// for all flags.
1396	cp = xmalloc(todo + `1`);
1397	slang->sl_compstartflags = cp;
1398	*cp = NUL;
1399
1400	ap = xmalloc(todo + `1`);
1401	slang->sl_compallflags = ap;
1402	*ap = NUL;
1403
1404	// And a list of all patterns in their original form, for checking whether
1405	// compounding may work in match_compoundrule(). This is freed when we
1406	// encounter a wildcard, the check doesn't work then.
1407	crp = xmalloc(todo + `1`);
1408	slang->sl_comprules = crp;
1409
1410	pp = pat;
1411	*pp++ = `'^'`;
1412	*pp++ = `'\\'`;
1413	*pp++ = `'('`;
1414
1415	atstart = `1`;
1416	while (todo-- > `0`) {
1417	c = getc(fd); // <compflags>
1418	if (c == EOF) {
1419	xfree(pat);
1420	return SP_TRUNCERROR;
1421	}
1422
1423	// Add all flags to "sl_compallflags".
1424	if (vim_strchr((char_u )"?+[]/", c) == NULL
1425	&& !byte_in_str(slang->sl_compallflags, c)) {
1426	*ap++ = c;
1427	*ap = NUL;
1428	}
1429
1430	if (atstart != `0`) {
1431	// At start of item: copy flags to "sl_compstartflags". For a
1432	// [abc] item set "atstart" to 2 and copy up to the ']'.
1433	if (c == `'['`)
1434	atstart = `2`;
1435	else if (c == `']'`)
1436	atstart = `0`;
1437	else {
1438	if (!byte_in_str(slang->sl_compstartflags, c)) {
1439	*cp++ = c;
1440	*cp = NUL;
1441	}
1442	if (atstart == `1`)
1443	atstart = `0`;
1444	}
1445	}
1446
1447	// Copy flag to "sl_comprules", unless we run into a wildcard.
1448	if (crp != NULL) {
1449	if (c == `'?'` \|\| c == `'+'` \|\| c == `'*'`) {
1450	XFREE_CLEAR(slang->sl_comprules);
1451	crp = NULL;
1452	} else
1453	*crp++ = c;
1454	}
1455
1456	if (c == `'/'`) { // slash separates two items
1457	*pp++ = `'\\'`;
1458	*pp++ = `'\|'`;
1459	atstart = `1`;
1460	} else { // normal char, "[abc]" and '' are copied as-is*
1461	if (c == `'?'` \|\| c == `'+'` \|\| c == `'~'`) {
1462	pp++ = `'\\'`; // "a?" becomes "a\?", "a+" becomes "a\+"*
1463	}
1464	pp += utf_char2bytes(c, pp);
1465	}
1466	}
1467
1468	*pp++ = `'\\'`;
1469	*pp++ = `')'`;
1470	*pp++ = `'$'`;
1471	*pp = NUL;
1472
1473	if (crp != NULL)
1474	*crp = NUL;
1475
1476	slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
1477	xfree(pat);
1478	if (slang->sl_compprog == NULL)
1479	return SP_FORMERROR;
1480
1481	return `0`;
1482	}
1483
1484	// Set the SOFOFROM and SOFOTO items in language "lp".
1485	// Returns SP_ERROR flags when there is something wrong.*
1486	static int set_sofo(slang_T lp, char_u from, char_u *to)
1487	{
1488	int i;
1489
1490	garray_T *gap;
1491	char_u *s;
1492	char_u *p;
1493	int c;
1494	int *inp;
1495
1496	if (has_mbyte) {
1497	// Use "sl_sal" as an array with 256 pointers to a list of wide
1498	// characters. The index is the low byte of the character.
1499	// The list contains from-to pairs with a terminating NUL.
1500	// sl_sal_first[] is used for latin1 "from" characters.
1501	gap = &lp->sl_sal;
1502	ga_init(gap, sizeof(int *), `1`);
1503	ga_grow(gap, `256`);
1504	memset(gap->ga_data, `0`, sizeof(int ) `256`);
1505	gap->ga_len = `256`;
1506
1507	// First count the number of items for each list. Temporarily use
1508	// sl_sal_first[] for this.
1509	for (p = from, s = to; p != NUL && s != NUL; ) {
1510	c = mb_cptr2char_adv((const char_u **)&p);
1511	MB_CPTR_ADV(s);
1512	if (c >= `256`) {
1513	lp->sl_sal_first[c & `0xff`]++;
1514	}
1515	}
1516	if (p != NUL \|\| s != NUL) // lengths differ
1517	return SP_FORMERROR;
1518
1519	// Allocate the lists.
1520	for (i = `0`; i < `256`; ++i)
1521	if (lp->sl_sal_first[i] > `0`) {
1522	p = xmalloc(sizeof(int) * (lp->sl_sal_first[i] * `2` + `1`));
1523	((int *)gap->ga_data)[i] = (int* *)p;
1524	(int* *)p = `0`;
1525	}
1526
1527	// Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
1528	// list.
1529	memset(lp->sl_sal_first, `0`, sizeof(salfirst_T) * `256`);
1530	for (p = from, s = to; p != NUL && s != NUL; ) {
1531	c = mb_cptr2char_adv((const char_u **)&p);
1532	i = mb_cptr2char_adv((const char_u **)&s);
1533	if (c >= `256`) {
1534	// Append the from-to chars at the end of the list with
1535	// the low byte.
1536	inp = ((int **)gap->ga_data)[c & `0xff`];
1537	while (*inp != `0`)
1538	++inp;
1539	inp++ = c; // from char*
1540	inp++ = i; // to char*
1541	inp++ = NUL; // NUL at the end*
1542	} else
1543	// mapping byte to char is done in sl_sal_first[]
1544	lp->sl_sal_first[c] = i;
1545	}
1546	} else {
1547	// mapping bytes to bytes is done in sl_sal_first[]
1548	if (STRLEN(from) != STRLEN(to))
1549	return SP_FORMERROR;
1550
1551	for (i = `0`; to[i] != NUL; ++i)
1552	lp->sl_sal_first[from[i]] = to[i];
1553	lp->sl_sal.ga_len = `1`; // indicates we have soundfolding
1554	}
1555
1556	return `0`;
1557	}
1558
1559	// Fill the first-index table for "lp".
1560	static void set_sal_first(slang_T *lp)
1561	{
1562	salfirst_T *sfirst;
1563	salitem_T *smp;
1564	int c;
1565	garray_T *gap = &lp->sl_sal;
1566
1567	sfirst = lp->sl_sal_first;
1568	for (int i = `0`; i < `256`; ++i) {
1569	sfirst[i] = -`1`;
1570	}
1571	smp = (salitem_T *)gap->ga_data;
1572	for (int i = `0`; i < gap->ga_len; ++i) {
1573	if (has_mbyte)
1574	// Use the lowest byte of the first character. For latin1 it's
1575	// the character, for other encodings it should differ for most
1576	// characters.
1577	c = *smp[i].sm_lead_w & `0xff`;
1578	else
1579	c = *smp[i].sm_lead;
1580	if (sfirst[c] == -`1`) {
1581	sfirst[c] = i;
1582	if (has_mbyte) {
1583	int n;
1584
1585	// Make sure all entries with this byte are following each
1586	// other. Move the ones that are in the wrong position. Do
1587	// keep the same ordering!
1588	while (i + `1` < gap->ga_len
1589	&& (*smp[i + `1`].sm_lead_w & `0xff`) == c)
1590	// Skip over entry with same index byte.
1591	++i;
1592
1593	for (n = `1`; i + n < gap->ga_len; ++n)
1594	if ((*smp[i + n].sm_lead_w & `0xff`) == c) {
1595	salitem_T tsal;
1596
1597	// Move entry with same index byte after the entries
1598	// we already found.
1599	++i;
1600	--n;
1601	tsal = smp[i + n];
1602	memmove(smp + i + `1`, smp + i,
1603	sizeof(salitem_T) * n);
1604	smp[i] = tsal;
1605	}
1606	}
1607	}
1608	}
1609	}
1610
1611	// Turn a multi-byte string into a wide character string.
1612	// Return it in allocated memory.
1613	static int mb_str2wide(char_u s)
1614	{
1615	int i = `0`;
1616
1617	int res = xmalloc((mb_charlen(s) + `1`) sizeof(int));
1618	for (char_u p = s; p != NUL; ) {
1619	res[i++] = mb_ptr2char_adv((const char_u **)&p);
1620	}
1621	res[i] = NUL;
1622
1623	return res;
1624	}
1625
1626	// Reads a tree from the .spl or .sug file.
1627	// Allocates the memory and stores pointers in "bytsp" and "idxsp".
1628	// This is skipped when the tree has zero length.
1629	// Returns zero when OK, SP_ value for an error.
1630	static int
1631	spell_read_tree (
1632	FILE *fd,
1633	char_u **bytsp,
1634	idx_T **idxsp,
1635	bool prefixtree, // true for the prefix tree
1636	int prefixcnt // when "prefixtree" is true: prefix count
1637	)
1638	{
1639	int idx;
1640	char_u *bp;
1641	idx_T *ip;
1642
1643	// The tree size was computed when writing the file, so that we can
1644	// allocate it as one long block. <nodecount>
1645	long len = get4c(fd);
1646	if (len < `0`) {
1647	return SP_TRUNCERROR;
1648	}
1649	if ((size_t)len >= SIZE_MAX / sizeof(int)) { // -V547
1650	// Invalid length, multiply with sizeof(int) would overflow.
1651	return SP_FORMERROR;
1652	}
1653	if (len > `0`) {
1654	// Allocate the byte array.
1655	bp = xmalloc(len);
1656	*bytsp = bp;
1657
1658	// Allocate the index array.
1659	ip = xcalloc(len, sizeof(*ip));
1660	*idxsp = ip;
1661
1662	// Recursively read the tree and store it in the array.
1663	idx = read_tree_node(fd, bp, ip, len, `0`, prefixtree, prefixcnt);
1664	if (idx < `0`)
1665	return idx;
1666	}
1667	return `0`;
1668	}
1669
1670	// Read one row of siblings from the spell file and store it in the byte array
1671	// "byts" and index array "idxs". Recursively read the children.
1672	//
1673	// NOTE: The code here must match put_node()!
1674	//
1675	// Returns the index (>= 0) following the siblings.
1676	// Returns SP_TRUNCERROR if the file is shorter than expected.
1677	// Returns SP_FORMERROR if there is a format error.
1678	static idx_T
1679	read_tree_node (
1680	FILE *fd,
1681	char_u *byts,
1682	idx_T *idxs,
1683	int maxidx, // size of arrays
1684	idx_T startidx, // current index in "byts" and "idxs"
1685	bool prefixtree, // true for reading PREFIXTREE
1686	int maxprefcondnr // maximum for <prefcondnr>
1687	)
1688	{
1689	int len;
1690	int i;
1691	int n;
1692	idx_T idx = startidx;
1693	int c;
1694	int c2;
1695	#define SHARED_MASK 0x8000000
1696
1697	len = getc(fd); // <siblingcount>
1698	if (len <= `0`)
1699	return SP_TRUNCERROR;
1700
1701	if (startidx + len >= maxidx)
1702	return SP_FORMERROR;
1703	byts[idx++] = len;
1704
1705	// Read the byte values, flag/region bytes and shared indexes.
1706	for (i = `1`; i <= len; ++i) {
1707	c = getc(fd); // <byte>
1708	if (c < `0`)
1709	return SP_TRUNCERROR;
1710	if (c <= BY_SPECIAL) {
1711	if (c == BY_NOFLAGS && !prefixtree) {
1712	// No flags, all regions.
1713	idxs[idx] = `0`;
1714	c = `0`;
1715	} else if (c != BY_INDEX) {
1716	if (prefixtree) {
1717	// Read the optional pflags byte, the prefix ID and the
1718	// condition nr. In idxs[] store the prefix ID in the low
1719	// byte, the condition index shifted up 8 bits, the flags
1720	// shifted up 24 bits.
1721	if (c == BY_FLAGS)
1722	c = getc(fd) << `24`; // <pflags>
1723	else
1724	c = `0`;
1725
1726	c \|= getc(fd); // <affixID>
1727
1728	n = get2c(fd); // <prefcondnr>
1729	if (n >= maxprefcondnr)
1730	return SP_FORMERROR;
1731	c \|= (n << `8`);
1732	} else { // c must be BY_FLAGS or BY_FLAGS2
1733	// Read flags and optional region and prefix ID. In
1734	// idxs[] the flags go in the low two bytes, region above
1735	// that and prefix ID above the region.
1736	c2 = c;
1737	c = getc(fd); // <flags>
1738	if (c2 == BY_FLAGS2)
1739	c = (getc(fd) << `8`) + c; // <flags2>
1740	if (c & WF_REGION)
1741	c = (getc(fd) << `16`) + c; // <region>
1742	if (c & WF_AFX)
1743	c = (getc(fd) << `24`) + c; // <affixID>
1744	}
1745
1746	idxs[idx] = c;
1747	c = `0`;
1748	} else { // c == BY_INDEX
1749	// <nodeidx>
1750	n = get3c(fd);
1751	if (n < `0` \|\| n >= maxidx)
1752	return SP_FORMERROR;
1753	idxs[idx] = n + SHARED_MASK;
1754	c = getc(fd); // <xbyte>
1755	}
1756	}
1757	byts[idx++] = c;
1758	}
1759
1760	// Recursively read the children for non-shared siblings.
1761	// Skip the end-of-word ones (zero byte value) and the shared ones (and
1762	// remove SHARED_MASK)
1763	for (i = `1`; i <= len; ++i)
1764	if (byts[startidx + i] != `0`) {
1765	if (idxs[startidx + i] & SHARED_MASK)
1766	idxs[startidx + i] &= ~SHARED_MASK;
1767	else {
1768	idxs[startidx + i] = idx;
1769	idx = read_tree_node(fd, byts, idxs, maxidx, idx,
1770	prefixtree, maxprefcondnr);
1771	if (idx < `0`)
1772	break;
1773	}
1774	}
1775
1776	return idx;
1777	}
1778
1779	// Reload the spell file "fname" if it's loaded.
1780	static void
1781	spell_reload_one (
1782	char_u *fname,
1783	bool added_word // invoked through "zg"
1784	)
1785	{
1786	slang_T *slang;
1787	bool didit = false;
1788
1789	for (slang = first_lang; slang != NULL; slang = slang->sl_next) {
1790	if (path_full_compare(fname, slang->sl_fname, false) == kEqualFiles) {
1791	slang_clear(slang);
1792	if (spell_load_file(fname, NULL, slang, false) == NULL)
1793	// reloading failed, clear the language
1794	slang_clear(slang);
1795	redraw_all_later(SOME_VALID);
1796	didit = true;
1797	}
1798	}
1799
1800	// When "zg" was used and the file wasn't loaded yet, should redo
1801	// 'spelllang' to load it now.
1802	if (added_word && !didit)
1803	did_set_spelllang(curwin);
1804	}
1805
1806	// Functions for ":mkspell".
1807
1808	// In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
1809	// but it must be negative to indicate the prefix tree to tree_add_word().
1810	// Use a negative number with the lower 8 bits zero.
1811	#define PFX_FLAGS -256
1812
1813	// flags for "condit" argument of store_aff_word()
1814	#define CONDIT_COMB 1 // affix must combine
1815	#define CONDIT_CFIX 2 // affix must have CIRCUMFIX flag
1816	#define CONDIT_SUF 4 // add a suffix for matching flags
1817	#define CONDIT_AFF 8 // word already has an affix
1818
1819	// Tunable parameters for when the tree is compressed. See 'mkspellmem'.
1820	static long compress_start = `30000`; // memory / SBLOCKSIZE
1821	static long compress_inc = `100`; // memory / SBLOCKSIZE
1822	static long compress_added = `500000`; // word count
1823
1824	// Check the 'mkspellmem' option. Return FAIL if it's wrong.
1825	// Sets "sps_flags".
1826	int spell_check_msm(void)
1827	{
1828	char_u *p = p_msm;
1829	long start = `0`;
1830	long incr = `0`;
1831	long added = `0`;
1832
1833	if (!ascii_isdigit(*p))
1834	return FAIL;
1835	// block count = (value 1024) / SBLOCKSIZE (but avoid overflow)*
1836	start = (getdigits_long(&p, true, `0`) * `10`) / (SBLOCKSIZE / `102`);
1837	if (*p != `','`) {
1838	return FAIL;
1839	}
1840	p++;
1841	if (!ascii_isdigit(*p)) {
1842	return FAIL;
1843	}
1844	incr = (getdigits_long(&p, true, `0`) * `102`) / (SBLOCKSIZE / `10`);
1845	if (*p != `','`) {
1846	return FAIL;
1847	}
1848	p++;
1849	if (!ascii_isdigit(*p)) {
1850	return FAIL;
1851	}
1852	added = getdigits_long(&p, true, `0`) * `1024`;
1853	if (*p != NUL) {
1854	return FAIL;
1855	}
1856
1857	if (start == `0` \|\| incr == `0` \|\| added == `0` \|\| incr > start) {
1858	return FAIL;
1859	}
1860
1861	compress_start = start;
1862	compress_inc = incr;
1863	compress_added = added;
1864	return OK;
1865	}
1866
1867	#ifdef SPELL_PRINTTREE
1868	// For debugging the tree code: print the current tree in a (more or less)
1869	// readable format, so that we can see what happens when adding a word and/or
1870	// compressing the tree.
1871	// Based on code from Olaf Seibert.
1872	#define PRINTLINESIZE 1000
1873	#define PRINTWIDTH 6
1874
1875	#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
1876	PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
1877
1878	static char line1[PRINTLINESIZE];
1879	static char line2[PRINTLINESIZE];
1880	static char line3[PRINTLINESIZE];
1881
1882	static void spell_clear_flags(wordnode_T *node)
1883	{
1884	wordnode_T *np;
1885
1886	for (np = node; np != NULL; np = np->wn_sibling) {
1887	np->wn_u1.index = FALSE;
1888	spell_clear_flags(np->wn_child);
1889	}
1890	}
1891
1892	static void spell_print_node(wordnode_T node, int* depth)
1893	{
1894	if (node->wn_u1.index) {
1895	// Done this node before, print the reference.
1896	PRINTSOME(line1, depth, "(%d)", node->wn_nr, `0`);
1897	PRINTSOME(line2, depth, " ", `0`, `0`);
1898	PRINTSOME(line3, depth, " ", `0`, `0`);
1899	msg((char_u *)line1);
1900	msg((char_u *)line2);
1901	msg((char_u *)line3);
1902	} else {
1903	node->wn_u1.index = TRUE;
1904
1905	if (node->wn_byte != NUL) {
1906	if (node->wn_child != NULL)
1907	PRINTSOME(line1, depth, " %c -> ", node->wn_byte, `0`);
1908	else
1909	// Cannot happen?
1910	PRINTSOME(line1, depth, " %c ???", node->wn_byte, `0`);
1911	} else
1912	PRINTSOME(line1, depth, " $ ", `0`, `0`);
1913
1914	PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
1915
1916	if (node->wn_sibling != NULL)
1917	PRINTSOME(line3, depth, " \| ", `0`, `0`);
1918	else
1919	PRINTSOME(line3, depth, " ", `0`, `0`);
1920
1921	if (node->wn_byte == NUL) {
1922	msg((char_u *)line1);
1923	msg((char_u *)line2);
1924	msg((char_u *)line3);
1925	}
1926
1927	// do the children
1928	if (node->wn_byte != NUL && node->wn_child != NULL)
1929	spell_print_node(node->wn_child, depth + `1`);
1930
1931	// do the siblings
1932	if (node->wn_sibling != NULL) {
1933	// get rid of all parent details except \|
1934	STRCPY(line1, line3);
1935	STRCPY(line2, line3);
1936	spell_print_node(node->wn_sibling, depth);
1937	}
1938	}
1939	}
1940
1941	static void spell_print_tree(wordnode_T *root)
1942	{
1943	if (root != NULL) {
1944	// Clear the "wn_u1.index" fields, used to remember what has been
1945	// done.
1946	spell_clear_flags(root);
1947
1948	// Recursively print the tree.
1949	spell_print_node(root, `0`);
1950	}
1951	}
1952
1953	#endif // SPELL_PRINTTREE
1954
1955	// Reads the affix file "fname".
1956	// Returns an afffile_T, NULL for complete failure.
1957	static afffile_T spell_read_aff(spellinfo_T spin, char_u *fname)
1958	{
1959	FILE *fd;
1960	char_u rline[MAXLINELEN];
1961	char_u *line;
1962	char_u *pc = NULL;
1963	#define MAXITEMCNT 30
1964	char_u *(items[MAXITEMCNT]);
1965	int itemcnt;
1966	char_u *p;
1967	int lnum = `0`;
1968	affheader_T *cur_aff = NULL;
1969	bool did_postpone_prefix = false;
1970	int aff_todo = `0`;
1971	hashtab_T *tp;
1972	char_u *low = NULL;
1973	char_u *fol = NULL;
1974	char_u *upp = NULL;
1975	int do_rep;
1976	int do_repsal;
1977	int do_sal;
1978	int do_mapline;
1979	bool found_map = false;
1980	hashitem_T *hi;
1981	int l;
1982	int compminlen = `0`; // COMPOUNDMIN value
1983	int compsylmax = `0`; // COMPOUNDSYLMAX value
1984	int compoptions = `0`; // COMP_ flags
1985	int compmax = `0`; // COMPOUNDWORDMAX value
1986	char_u compflags = NULL; // COMPOUNDFLAG and COMPOUNDRULE*
1987	// concatenated
1988	char_u midword = NULL; // MIDWORD value*
1989	char_u syllable = NULL; // SYLLABLE value*
1990	char_u sofofrom = NULL; // SOFOFROM value*
1991	char_u sofoto = NULL; // SOFOTO value*
1992
1993	// Open the file.
1994	fd = os_fopen((char *)fname, "r");
1995	if (fd == NULL) {
1996	EMSG2(_(e_notopen), fname);
1997	return NULL;
1998	}
1999
2000	vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s..."), fname);
2001	spell_message(spin, IObuff);
2002
2003	// Only do REP lines when not done in another .aff file already.
2004	do_rep = GA_EMPTY(&spin->si_rep);
2005
2006	// Only do REPSAL lines when not done in another .aff file already.
2007	do_repsal = GA_EMPTY(&spin->si_repsal);
2008
2009	// Only do SAL lines when not done in another .aff file already.
2010	do_sal = GA_EMPTY(&spin->si_sal);
2011
2012	// Only do MAP lines when not done in another .aff file already.
2013	do_mapline = GA_EMPTY(&spin->si_map);
2014
2015	// Allocate and init the afffile_T structure.
2016	afffile_T aff = getroom(spin, sizeof(aff), true);
2017	hash_init(&aff->af_pref);
2018	hash_init(&aff->af_suff);
2019	hash_init(&aff->af_comp);
2020
2021	// Read all the lines in the file one by one.
2022	while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) {
2023	line_breakcheck();
2024	++lnum;
2025
2026	// Skip comment lines.
2027	if (*rline == `'#'`)
2028	continue;
2029
2030	// Convert from "SET" to 'encoding' when needed.
2031	xfree(pc);
2032	if (spin->si_conv.vc_type != CONV_NONE) {
2033	pc = string_convert(&spin->si_conv, rline, NULL);
2034	if (pc == NULL) {
2035	smsg(_("Conversion failure for word in %s line %d: %s"),
2036	fname, lnum, rline);
2037	continue;
2038	}
2039	line = pc;
2040	} else {
2041	pc = NULL;
2042	line = rline;
2043	}
2044
2045	// Split the line up in white separated items. Put a NUL after each
2046	// item.
2047	itemcnt = `0`;
2048	for (p = line;; ) {
2049	while (p != NUL && p <= `' '`) // skip white space and CR/NL
2050	++p;
2051	if (*p == NUL)
2052	break;
2053	if (itemcnt == MAXITEMCNT) // too many items
2054	break;
2055	items[itemcnt++] = p;
2056	// A few items have arbitrary text argument, don't split them.
2057	if (itemcnt == `2` && spell_info_item(items[`0`]))
2058	while (p >= `' '` \|\| p == TAB) // skip until CR/NL
2059	++p;
2060	else
2061	while (p > `' '`) // skip until white space or CR/NL*
2062	++p;
2063	if (*p == NUL)
2064	break;
2065	*p++ = NUL;
2066	}
2067
2068	// Handle non-empty lines.
2069	if (itemcnt > `0`) {
2070	if (is_aff_rule(items, itemcnt, "SET", `2`) && aff->af_enc == NULL) {
2071	// Setup for conversion from "ENC" to 'encoding'.
2072	aff->af_enc = enc_canonize(items[`1`]);
2073	if (!spin->si_ascii
2074	&& convert_setup(&spin->si_conv, aff->af_enc,
2075	p_enc) == FAIL)
2076	smsg(_("Conversion in %s not supported: from %s to %s"),
2077	fname, aff->af_enc, p_enc);
2078	spin->si_conv.vc_fail = true;
2079	} else if (is_aff_rule(items, itemcnt, "FLAG", `2`)
2080	&& aff->af_flagtype == AFT_CHAR) {
2081	if (STRCMP(items[`1`], "long") == `0`)
2082	aff->af_flagtype = AFT_LONG;
2083	else if (STRCMP(items[`1`], "num") == `0`)
2084	aff->af_flagtype = AFT_NUM;
2085	else if (STRCMP(items[`1`], "caplong") == `0`)
2086	aff->af_flagtype = AFT_CAPLONG;
2087	else
2088	smsg(_("Invalid value for FLAG in %s line %d: %s"),
2089	fname, lnum, items[`1`]);
2090	if (aff->af_rare != `0`
2091	\|\| aff->af_keepcase != `0`
2092	\|\| aff->af_bad != `0`
2093	\|\| aff->af_needaffix != `0`
2094	\|\| aff->af_circumfix != `0`
2095	\|\| aff->af_needcomp != `0`
2096	\|\| aff->af_comproot != `0`
2097	\|\| aff->af_nosuggest != `0`
2098	\|\| compflags != NULL
2099	\|\| aff->af_suff.ht_used > `0`
2100	\|\| aff->af_pref.ht_used > `0`)
2101	smsg(_("FLAG after using flags in %s line %d: %s"),
2102	fname, lnum, items[`1`]);
2103	} else if (spell_info_item(items[`0`]) && itemcnt > `1`) {
2104	p = getroom(spin,
2105	(spin->si_info == NULL ? `0` : STRLEN(spin->si_info))
2106	+ STRLEN(items[`0`])
2107	+ STRLEN(items[`1`]) + `3`, false);
2108	if (spin->si_info != NULL) {
2109	STRCPY(p, spin->si_info);
2110	STRCAT(p, "\n");
2111	}
2112	STRCAT(p, items[`0`]);
2113	STRCAT(p, " ");
2114	STRCAT(p, items[`1`]);
2115	spin->si_info = p;
2116	} else if (is_aff_rule(items, itemcnt, "MIDWORD", `2`)
2117	&& midword == NULL) {
2118	midword = getroom_save(spin, items[`1`]);
2119	} else if (is_aff_rule(items, itemcnt, "TRY", `2`)) {
2120	// ignored, we look in the tree for what chars may appear
2121	}
2122	// TODO: remove "RAR" later
2123	else if ((is_aff_rule(items, itemcnt, "RAR", `2`)
2124	\|\| is_aff_rule(items, itemcnt, "RARE", `2`))
2125	&& aff->af_rare == `0`) {
2126	aff->af_rare = affitem2flag(aff->af_flagtype, items[`1`],
2127	fname, lnum);
2128	}
2129	// TODO: remove "KEP" later
2130	else if ((is_aff_rule(items, itemcnt, "KEP", `2`)
2131	\|\| is_aff_rule(items, itemcnt, "KEEPCASE", `2`))
2132	&& aff->af_keepcase == `0`) {
2133	aff->af_keepcase = affitem2flag(aff->af_flagtype, items[`1`],
2134	fname, lnum);
2135	} else if ((is_aff_rule(items, itemcnt, "BAD", `2`)
2136	\|\| is_aff_rule(items, itemcnt, "FORBIDDENWORD", `2`))
2137	&& aff->af_bad == `0`) {
2138	aff->af_bad = affitem2flag(aff->af_flagtype, items[`1`],
2139	fname, lnum);
2140	} else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", `2`)
2141	&& aff->af_needaffix == `0`) {
2142	aff->af_needaffix = affitem2flag(aff->af_flagtype, items[`1`],
2143	fname, lnum);
2144	} else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", `2`)
2145	&& aff->af_circumfix == `0`) {
2146	aff->af_circumfix = affitem2flag(aff->af_flagtype, items[`1`],
2147	fname, lnum);
2148	} else if (is_aff_rule(items, itemcnt, "NOSUGGEST", `2`)
2149	&& aff->af_nosuggest == `0`) {
2150	aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[`1`],
2151	fname, lnum);
2152	} else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", `2`)
2153	\|\| is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", `2`))
2154	&& aff->af_needcomp == `0`) {
2155	aff->af_needcomp = affitem2flag(aff->af_flagtype, items[`1`],
2156	fname, lnum);
2157	} else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", `2`)
2158	&& aff->af_comproot == `0`) {
2159	aff->af_comproot = affitem2flag(aff->af_flagtype, items[`1`],
2160	fname, lnum);
2161	} else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", `2`)
2162	&& aff->af_compforbid == `0`) {
2163	aff->af_compforbid = affitem2flag(aff->af_flagtype, items[`1`],
2164	fname, lnum);
2165	if (aff->af_pref.ht_used > `0`)
2166	smsg(_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"),
2167	fname, lnum);
2168	} else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", `2`)
2169	&& aff->af_comppermit == `0`) {
2170	aff->af_comppermit = affitem2flag(aff->af_flagtype, items[`1`],
2171	fname, lnum);
2172	if (aff->af_pref.ht_used > `0`)
2173	smsg(_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"),
2174	fname, lnum);
2175	} else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", `2`)
2176	&& compflags == NULL) {
2177	// Turn flag "c" into COMPOUNDRULE compatible string "c+",
2178	// "Na" into "Na+", "1234" into "1234+".
2179	p = getroom(spin, STRLEN(items[`1`]) + `2`, false);
2180	STRCPY(p, items[`1`]);
2181	STRCAT(p, "+");
2182	compflags = p;
2183	} else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", `2`)) {
2184	// We don't use the count, but do check that it's a number and
2185	// not COMPOUNDRULE mistyped.
2186	if (atoi((char *)items[`1`]) == `0`)
2187	smsg(_("Wrong COMPOUNDRULES value in %s line %d: %s"),
2188	fname, lnum, items[`1`]);
2189	} else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", `2`)) {
2190	// Don't use the first rule if it is a number.
2191	if (compflags != NULL \|\| *skipdigits(items[`1`]) != NUL) {
2192	// Concatenate this string to previously defined ones,
2193	// using a slash to separate them.
2194	l = (int)STRLEN(items[`1`]) + `1`;
2195	if (compflags != NULL)
2196	l += (int)STRLEN(compflags) + `1`;
2197	p = getroom(spin, l, false);
2198	if (compflags != NULL) {
2199	STRCPY(p, compflags);
2200	STRCAT(p, "/");
2201	}
2202	STRCAT(p, items[`1`]);
2203	compflags = p;
2204	}
2205	} else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", `2`)
2206	&& compmax == `0`) {
2207	compmax = atoi((char *)items[`1`]);
2208	if (compmax == `0`)
2209	smsg(_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"),
2210	fname, lnum, items[`1`]);
2211	} else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", `2`)
2212	&& compminlen == `0`) {
2213	compminlen = atoi((char *)items[`1`]);
2214	if (compminlen == `0`)
2215	smsg(_("Wrong COMPOUNDMIN value in %s line %d: %s"),
2216	fname, lnum, items[`1`]);
2217	} else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", `2`)
2218	&& compsylmax == `0`) {
2219	compsylmax = atoi((char *)items[`1`]);
2220	if (compsylmax == `0`)
2221	smsg(_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
2222	fname, lnum, items[`1`]);
2223	} else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", `1`)) {
2224	compoptions \|= COMP_CHECKDUP;
2225	} else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", `1`)) {
2226	compoptions \|= COMP_CHECKREP;
2227	} else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", `1`)) {
2228	compoptions \|= COMP_CHECKCASE;
2229	} else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", `1`)) {
2230	compoptions \|= COMP_CHECKTRIPLE;
2231	} else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", `2`)) {
2232	if (atoi((char *)items[`1`]) == `0`)
2233	smsg(_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"),
2234	fname, lnum, items[`1`]);
2235	} else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", `3`)) {
2236	garray_T *gap = &spin->si_comppat;
2237	int i;
2238
2239	// Only add the couple if it isn't already there.
2240	for (i = `0`; i < gap->ga_len - `1`; i += `2`)
2241	if (STRCMP(((char_u **)(gap->ga_data))[i], items[`1`]) == `0`
2242	&& STRCMP(((char_u **)(gap->ga_data))[i + `1`],
2243	items[`2`]) == `0`)
2244	break;
2245	if (i >= gap->ga_len) {
2246	ga_grow(gap, `2`);
2247	((char_u **)(gap->ga_data))[gap->ga_len++]
2248	= getroom_save(spin, items[`1`]);
2249	((char_u **)(gap->ga_data))[gap->ga_len++]
2250	= getroom_save(spin, items[`2`]);
2251	}
2252	} else if (is_aff_rule(items, itemcnt, "SYLLABLE", `2`)
2253	&& syllable == NULL) {
2254	syllable = getroom_save(spin, items[`1`]);
2255	} else if (is_aff_rule(items, itemcnt, "NOBREAK", `1`)) {
2256	spin->si_nobreak = true;
2257	} else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", `1`)) {
2258	spin->si_nosplitsugs = true;
2259	} else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", `1`)) {
2260	spin->si_nocompoundsugs = true;
2261	} else if (is_aff_rule(items, itemcnt, "NOSUGFILE", `1`)) {
2262	spin->si_nosugfile = true;
2263	} else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", `1`)) {
2264	aff->af_pfxpostpone = true;
2265	} else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", `1`)) {
2266	aff->af_ignoreextra = true;
2267	} else if ((STRCMP(items[`0`], "PFX") == `0`
2268	\|\| STRCMP(items[`0`], "SFX") == `0`)
2269	&& aff_todo == `0`
2270	&& itemcnt >= `4`) {
2271	int lasti = `4`;
2272	char_u key[AH_KEY_LEN];
2273
2274	if (*items[`0`] == `'P'`)
2275	tp = &aff->af_pref;
2276	else
2277	tp = &aff->af_suff;
2278
2279	// Myspell allows the same affix name to be used multiple
2280	// times. The affix files that do this have an undocumented
2281	// "S" flag on all but the last block, thus we check for that
2282	// and store it in ah_follows.
2283	STRLCPY(key, items[`1`], AH_KEY_LEN);
2284	hi = hash_find(tp, key);
2285	if (!HASHITEM_EMPTY(hi)) {
2286	cur_aff = HI2AH(hi);
2287	if (cur_aff->ah_combine != (*items[`2`] == `'Y'`))
2288	smsg(_("Different combining flag in continued affix block in %s line %d: %s"),
2289	fname, lnum, items[`1`]);
2290	if (!cur_aff->ah_follows)
2291	smsg(_("Duplicate affix in %s line %d: %s"),
2292	fname, lnum, items[`1`]);
2293	} else {
2294	// New affix letter.
2295	cur_aff = getroom(spin, sizeof(*cur_aff), true);
2296	cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[`1`],
2297	fname, lnum);
2298	if (cur_aff->ah_flag == `0` \|\| STRLEN(items[`1`]) >= AH_KEY_LEN) {
2299	break;
2300	}
2301	if (cur_aff->ah_flag == aff->af_bad
2302	\|\| cur_aff->ah_flag == aff->af_rare
2303	\|\| cur_aff->ah_flag == aff->af_keepcase
2304	\|\| cur_aff->ah_flag == aff->af_needaffix
2305	\|\| cur_aff->ah_flag == aff->af_circumfix
2306	\|\| cur_aff->ah_flag == aff->af_nosuggest
2307	\|\| cur_aff->ah_flag == aff->af_needcomp
2308	\|\| cur_aff->ah_flag == aff->af_comproot) {
2309	smsg(_("Affix also used for "
2310	"BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST"
2311	"in %s line %d: %s"),
2312	fname, lnum, items[`1`]);
2313	}
2314	STRCPY(cur_aff->ah_key, items[`1`]);
2315	hash_add(tp, cur_aff->ah_key);
2316
2317	cur_aff->ah_combine = (*items[`2`] == `'Y'`);
2318	}
2319
2320	// Check for the "S" flag, which apparently means that another
2321	// block with the same affix name is following.
2322	if (itemcnt > lasti && STRCMP(items[lasti], "S") == `0`) {
2323	++lasti;
2324	cur_aff->ah_follows = true;
2325	} else
2326	cur_aff->ah_follows = false;
2327
2328	// Myspell allows extra text after the item, but that might
2329	// mean mistakes go unnoticed. Require a comment-starter,
2330	// unless IGNOREEXTRA is used. Hunspell uses a "-" item.
2331	if (itemcnt > lasti
2332	&& !aff->af_ignoreextra
2333	&& *items[lasti] != `'#'`)
2334	smsg(_(e_afftrailing), fname, lnum, items[lasti]);
2335
2336	if (STRCMP(items[`2`], "Y") != `0` && STRCMP(items[`2`], "N") != `0`)
2337	smsg(_("Expected Y or N in %s line %d: %s"),
2338	fname, lnum, items[`2`]);
2339
2340	if (*items[`0`] == `'P'` && aff->af_pfxpostpone) {
2341	if (cur_aff->ah_newID == `0`) {
2342	// Use a new number in the .spl file later, to be able
2343	// to handle multiple .aff files.
2344	check_renumber(spin);
2345	cur_aff->ah_newID = ++spin->si_newprefID;
2346
2347	// We only really use ah_newID if the prefix is
2348	// postponed. We know that only after handling all
2349	// the items.
2350	did_postpone_prefix = false;
2351	} else
2352	// Did use the ID in a previous block.
2353	did_postpone_prefix = true;
2354	}
2355
2356	aff_todo = atoi((char *)items[`3`]);
2357	} else if ((STRCMP(items[`0`], "PFX") == `0`
2358	\|\| STRCMP(items[`0`], "SFX") == `0`)
2359	&& aff_todo > `0`
2360	&& STRCMP(cur_aff->ah_key, items[`1`]) == `0`
2361	&& itemcnt >= `5`) {
2362	affentry_T *aff_entry;
2363	bool upper = false;
2364	int lasti = `5`;
2365
2366	// Myspell allows extra text after the item, but that might
2367	// mean mistakes go unnoticed. Require a comment-starter.
2368	// Hunspell uses a "-" item.
2369	if (itemcnt > lasti && *items[lasti] != `'#'`
2370	&& (STRCMP(items[lasti], "-") != `0`
2371	\|\| itemcnt != lasti + `1`))
2372	smsg(_(e_afftrailing), fname, lnum, items[lasti]);
2373
2374	// New item for an affix letter.
2375	aff_todo--;
2376	aff_entry = getroom(spin, sizeof(*aff_entry), true);
2377
2378	if (STRCMP(items[`2`], "0") != `0`)
2379	aff_entry->ae_chop = getroom_save(spin, items[`2`]);
2380	if (STRCMP(items[`3`], "0") != `0`) {
2381	aff_entry->ae_add = getroom_save(spin, items[`3`]);
2382
2383	// Recognize flags on the affix: abcd/XYZ
2384	aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, `'/'`);
2385	if (aff_entry->ae_flags != NULL) {
2386	*aff_entry->ae_flags++ = NUL;
2387	aff_process_flags(aff, aff_entry);
2388	}
2389	}
2390
2391	// Don't use an affix entry with non-ASCII characters when
2392	// "spin->si_ascii" is true.
2393	if (!spin->si_ascii \|\| !(has_non_ascii(aff_entry->ae_chop)
2394	\|\| has_non_ascii(aff_entry->ae_add))) {
2395	aff_entry->ae_next = cur_aff->ah_first;
2396	cur_aff->ah_first = aff_entry;
2397
2398	if (STRCMP(items[`4`], ".") != `0`) {
2399	char_u buf[MAXLINELEN];
2400
2401	aff_entry->ae_cond = getroom_save(spin, items[`4`]);
2402	if (*items[`0`] == `'P'`)
2403	sprintf((char *)buf, "^%s", items[`4`]);
2404	else
2405	sprintf((char *)buf, "%s$", items[`4`]);
2406	aff_entry->ae_prog = vim_regcomp(buf,
2407	RE_MAGIC + RE_STRING + RE_STRICT);
2408	if (aff_entry->ae_prog == NULL)
2409	smsg(_("Broken condition in %s line %d: %s"),
2410	fname, lnum, items[`4`]);
2411	}
2412
2413	// For postponed prefixes we need an entry in si_prefcond
2414	// for the condition. Use an existing one if possible.
2415	// Can't be done for an affix with flags, ignoring
2416	// COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG.
2417	if (*items[`0`] == `'P'` && aff->af_pfxpostpone
2418	&& aff_entry->ae_flags == NULL) {
2419	// When the chop string is one lower-case letter and
2420	// the add string ends in the upper-case letter we set
2421	// the "upper" flag, clear "ae_chop" and remove the
2422	// letters from "ae_add". The condition must either
2423	// be empty or start with the same letter.
2424	if (aff_entry->ae_chop != NULL
2425	&& aff_entry->ae_add != NULL
2426	&& aff_entry->ae_chop[(*mb_ptr2len)(
2427	aff_entry->ae_chop)] == NUL
2428	) {
2429	int c, c_up;
2430
2431	c = PTR2CHAR(aff_entry->ae_chop);
2432	c_up = SPELL_TOUPPER(c);
2433	if (c_up != c
2434	&& (aff_entry->ae_cond == NULL
2435	\|\| PTR2CHAR(aff_entry->ae_cond) == c)) {
2436	p = aff_entry->ae_add
2437	+ STRLEN(aff_entry->ae_add);
2438	MB_PTR_BACK(aff_entry->ae_add, p);
2439	if (PTR2CHAR(p) == c_up) {
2440	upper = true;
2441	aff_entry->ae_chop = NULL;
2442	*p = NUL;
2443
2444	// The condition is matched with the
2445	// actual word, thus must check for the
2446	// upper-case letter.
2447	if (aff_entry->ae_cond != NULL) {
2448	char_u buf[MAXLINELEN];
2449	if (has_mbyte) {
2450	onecap_copy(items[`4`], buf, true);
2451	aff_entry->ae_cond = getroom_save(
2452	spin, buf);
2453	} else
2454	*aff_entry->ae_cond = c_up;
2455	if (aff_entry->ae_cond != NULL) {
2456	sprintf((char *)buf, "^%s",
2457	aff_entry->ae_cond);
2458	vim_regfree(aff_entry->ae_prog);
2459	aff_entry->ae_prog = vim_regcomp(
2460	buf, RE_MAGIC + RE_STRING);
2461	}
2462	}
2463	}
2464	}
2465	}
2466
2467	if (aff_entry->ae_chop == NULL) {
2468	int idx;
2469	char_u **pp;
2470	int n;
2471
2472	// Find a previously used condition.
2473	for (idx = spin->si_prefcond.ga_len - `1`; idx >= `0`;
2474	--idx) {
2475	p = ((char_u **)spin->si_prefcond.ga_data)[idx];
2476	if (str_equal(p, aff_entry->ae_cond))
2477	break;
2478	}
2479	if (idx < `0`) {
2480	// Not found, add a new condition.
2481	idx = spin->si_prefcond.ga_len;
2482	pp = GA_APPEND_VIA_PTR(char_u *, &spin->si_prefcond);
2483	*pp = (aff_entry->ae_cond == NULL) ?
2484	NULL : getroom_save(spin, aff_entry->ae_cond);
2485	}
2486
2487	// Add the prefix to the prefix tree.
2488	if (aff_entry->ae_add == NULL)
2489	p = (char_u *)"";
2490	else
2491	p = aff_entry->ae_add;
2492
2493	// PFX_FLAGS is a negative number, so that
2494	// tree_add_word() knows this is the prefix tree.
2495	n = PFX_FLAGS;
2496	if (!cur_aff->ah_combine)
2497	n \|= WFP_NC;
2498	if (upper)
2499	n \|= WFP_UP;
2500	if (aff_entry->ae_comppermit)
2501	n \|= WFP_COMPPERMIT;
2502	if (aff_entry->ae_compforbid)
2503	n \|= WFP_COMPFORBID;
2504	tree_add_word(spin, p, spin->si_prefroot, n,
2505	idx, cur_aff->ah_newID);
2506	did_postpone_prefix = true;
2507	}
2508
2509	// Didn't actually use ah_newID, backup si_newprefID.
2510	if (aff_todo == `0` && !did_postpone_prefix) {
2511	--spin->si_newprefID;
2512	cur_aff->ah_newID = `0`;
2513	}
2514	}
2515	}
2516	} else if (is_aff_rule(items, itemcnt, "FOL", `2`) && fol == NULL) {
2517	fol = vim_strsave(items[`1`]);
2518	} else if (is_aff_rule(items, itemcnt, "LOW", `2`) && low == NULL) {
2519	low = vim_strsave(items[`1`]);
2520	} else if (is_aff_rule(items, itemcnt, "UPP", `2`) && upp == NULL) {
2521	upp = vim_strsave(items[`1`]);
2522	} else if (is_aff_rule(items, itemcnt, "REP", `2`)
2523	\|\| is_aff_rule(items, itemcnt, "REPSAL", `2`)) {
2524	/ Ignore REP/REPSAL count /;
2525	if (!isdigit(*items[`1`]))
2526	smsg(_("Expected REP(SAL) count in %s line %d"),
2527	fname, lnum);
2528	} else if ((STRCMP(items[`0`], "REP") == `0`
2529	\|\| STRCMP(items[`0`], "REPSAL") == `0`)
2530	&& itemcnt >= `3`) {
2531	// REP/REPSAL item
2532	// Myspell ignores extra arguments, we require it starts with
2533	// # to detect mistakes.
2534	if (itemcnt > `3` && items[`3`][`0`] != `'#'`)
2535	smsg(_(e_afftrailing), fname, lnum, items[`3`]);
2536	if (items[`0`][`3`] == `'S'` ? do_repsal : do_rep) {
2537	// Replace underscore with space (can't include a space
2538	// directly).
2539	for (p = items[`1`]; *p != NUL; MB_PTR_ADV(p)) {
2540	if (*p == `'_'`) {
2541	*p = `' '`;
2542	}
2543	}
2544	for (p = items[`2`]; *p != NUL; MB_PTR_ADV(p)) {
2545	if (*p == `'_'`) {
2546	*p = `' '`;
2547	}
2548	}
2549	add_fromto(spin, items[`0`][`3`] == `'S'`
2550	? &spin->si_repsal
2551	: &spin->si_rep, items[`1`], items[`2`]);
2552	}
2553	} else if (is_aff_rule(items, itemcnt, "MAP", `2`)) {
2554	// MAP item or count
2555	if (!found_map) {
2556	// First line contains the count.
2557	found_map = true;
2558	if (!isdigit(*items[`1`]))
2559	smsg(_("Expected MAP count in %s line %d"),
2560	fname, lnum);
2561	} else if (do_mapline) {
2562	int c;
2563
2564	// Check that every character appears only once.
2565	for (p = items[`1`]; *p != NUL; ) {
2566	c = mb_ptr2char_adv((const char_u **)&p);
2567	if ((!GA_EMPTY(&spin->si_map)
2568	&& vim_strchr(spin->si_map.ga_data, c)
2569	!= NULL)
2570	\|\| vim_strchr(p, c) != NULL) {
2571	smsg(_("Duplicate character in MAP in %s line %d"),
2572	fname, lnum);
2573	}
2574	}
2575
2576	// We simply concatenate all the MAP strings, separated by
2577	// slashes.
2578	ga_concat(&spin->si_map, items[`1`]);
2579	ga_append(&spin->si_map, `'/'`);
2580	}
2581	}
2582	// Accept "SAL from to" and "SAL from to #comment".
2583	else if (is_aff_rule(items, itemcnt, "SAL", `3`)) {
2584	if (do_sal) {
2585	// SAL item (sounds-a-like)
2586	// Either one of the known keys or a from-to pair.
2587	if (STRCMP(items[`1`], "followup") == `0`)
2588	spin->si_followup = sal_to_bool(items[`2`]);
2589	else if (STRCMP(items[`1`], "collapse_result") == `0`)
2590	spin->si_collapse = sal_to_bool(items[`2`]);
2591	else if (STRCMP(items[`1`], "remove_accents") == `0`)
2592	spin->si_rem_accents = sal_to_bool(items[`2`]);
2593	else
2594	// when "to" is "_" it means empty
2595	add_fromto(spin, &spin->si_sal, items[`1`],
2596	STRCMP(items[`2`], "_") == `0` ? (char_u *)""
2597	: items[`2`]);
2598	}
2599	} else if (is_aff_rule(items, itemcnt, "SOFOFROM", `2`)
2600	&& sofofrom == NULL) {
2601	sofofrom = getroom_save(spin, items[`1`]);
2602	} else if (is_aff_rule(items, itemcnt, "SOFOTO", `2`)
2603	&& sofoto == NULL) {
2604	sofoto = getroom_save(spin, items[`1`]);
2605	} else if (STRCMP(items[`0`], "COMMON") == `0`) {
2606	int i;
2607
2608	for (i = `1`; i < itemcnt; ++i) {
2609	if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
2610	items[i]))) {
2611	p = vim_strsave(items[i]);
2612	hash_add(&spin->si_commonwords, p);
2613	}
2614	}
2615	} else
2616	smsg(_("Unrecognized or duplicate item in %s line %d: %s"),
2617	fname, lnum, items[`0`]);
2618	}
2619	}
2620
2621	if (fol != NULL \|\| low != NULL \|\| upp != NULL) {
2622	if (spin->si_clear_chartab) {
2623	// Clear the char type tables, don't want to use any of the
2624	// currently used spell properties.
2625	init_spell_chartab();
2626	spin->si_clear_chartab = false;
2627	}
2628
2629	// Don't write a word table for an ASCII file, so that we don't check
2630	// for conflicts with a word table that matches 'encoding'.
2631	// Don't write one for utf-8 either, we use utf_() and*
2632	// mb_get_class(), the list of chars in the file will be incomplete.
2633	if (!spin->si_ascii
2634	&& !enc_utf8
2635	) {
2636	if (fol == NULL \|\| low == NULL \|\| upp == NULL)
2637	smsg(_("Missing FOL/LOW/UPP line in %s"), fname);
2638	else
2639	(void)set_spell_chartab(fol, low, upp);
2640	}
2641
2642	xfree(fol);
2643	xfree(low);
2644	xfree(upp);
2645	}
2646
2647	// Use compound specifications of the .aff file for the spell info.
2648	if (compmax != `0`) {
2649	aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX");
2650	spin->si_compmax = compmax;
2651	}
2652
2653	if (compminlen != `0`) {
2654	aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
2655	spin->si_compminlen = compminlen;
2656	}
2657
2658	if (compsylmax != `0`) {
2659	if (syllable == NULL)
2660	smsg(_("COMPOUNDSYLMAX used without SYLLABLE"));
2661	aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
2662	spin->si_compsylmax = compsylmax;
2663	}
2664
2665	if (compoptions != `0`) {
2666	aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options");
2667	spin->si_compoptions \|= compoptions;
2668	}
2669
2670	if (compflags != NULL)
2671	process_compflags(spin, aff, compflags);
2672
2673	// Check that we didn't use too many renumbered flags.
2674	if (spin->si_newcompID < spin->si_newprefID) {
2675	if (spin->si_newcompID == `127` \|\| spin->si_newcompID == `255`)
2676	MSG(_("Too many postponed prefixes"));
2677	else if (spin->si_newprefID == `0` \|\| spin->si_newprefID == `127`)
2678	MSG(_("Too many compound flags"));
2679	else
2680	MSG(_("Too many postponed prefixes and/or compound flags"));
2681	}
2682
2683	if (syllable != NULL) {
2684	aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
2685	spin->si_syllable = syllable;
2686	}
2687
2688	if (sofofrom != NULL \|\| sofoto != NULL) {
2689	if (sofofrom == NULL \|\| sofoto == NULL)
2690	smsg(_("Missing SOFO%s line in %s"),
2691	sofofrom == NULL ? "FROM" : "TO", fname);
2692	else if (!GA_EMPTY(&spin->si_sal))
2693	smsg(_("Both SAL and SOFO lines in %s"), fname);
2694	else {
2695	aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
2696	aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
2697	spin->si_sofofr = sofofrom;
2698	spin->si_sofoto = sofoto;
2699	}
2700	}
2701
2702	if (midword != NULL) {
2703	aff_check_string(spin->si_midword, midword, "MIDWORD");
2704	spin->si_midword = midword;
2705	}
2706
2707	xfree(pc);
2708	fclose(fd);
2709	return aff;
2710	}
2711
2712	// Returns true when items[0] equals "rulename", there are "mincount" items or
2713	// a comment is following after item "mincount".
2714	static bool is_aff_rule(char_u *items, int* itemcnt, char rulename, int* mincount)
2715	{
2716	return STRCMP(items[`0`], rulename) == `0`
2717	&& (itemcnt == mincount
2718	\|\| (itemcnt > mincount && items[mincount][`0`] == `'#'`));
2719	}
2720
2721	// For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from
2722	// ae_flags to ae_comppermit and ae_compforbid.
2723	static void aff_process_flags(afffile_T affile, affentry_T entry)
2724	{
2725	char_u *p;
2726	char_u *prevp;
2727	unsigned flag;
2728
2729	if (entry->ae_flags != NULL
2730	&& (affile->af_compforbid != `0` \|\| affile->af_comppermit != `0`)) {
2731	for (p = entry->ae_flags; *p != NUL; ) {
2732	prevp = p;
2733	flag = get_affitem(affile->af_flagtype, &p);
2734	if (flag == affile->af_comppermit \|\| flag == affile->af_compforbid) {
2735	STRMOVE(prevp, p);
2736	p = prevp;
2737	if (flag == affile->af_comppermit)
2738	entry->ae_comppermit = true;
2739	else
2740	entry->ae_compforbid = true;
2741	}
2742	if (affile->af_flagtype == AFT_NUM && *p == `','`)
2743	++p;
2744	}
2745	if (*entry->ae_flags == NUL)
2746	entry->ae_flags = NULL; // nothing left
2747	}
2748	}
2749
2750	// Returns true if "s" is the name of an info item in the affix file.
2751	static bool spell_info_item(char_u *s)
2752	{
2753	return STRCMP(s, "NAME") == `0`
2754	\|\| STRCMP(s, "HOME") == `0`
2755	\|\| STRCMP(s, "VERSION") == `0`
2756	\|\| STRCMP(s, "AUTHOR") == `0`
2757	\|\| STRCMP(s, "EMAIL") == `0`
2758	\|\| STRCMP(s, "COPYRIGHT") == `0`;
2759	}
2760
2761	// Turn an affix flag name into a number, according to the FLAG type.
2762	// returns zero for failure.
2763	static unsigned affitem2flag(int flagtype, char_u item, char_u fname, int lnum)
2764	{
2765	unsigned res;
2766	char_u *p = item;
2767
2768	res = get_affitem(flagtype, &p);
2769	if (res == `0`) {
2770	if (flagtype == AFT_NUM)
2771	smsg(_("Flag is not a number in %s line %d: %s"),
2772	fname, lnum, item);
2773	else
2774	smsg(_("Illegal flag in %s line %d: %s"),
2775	fname, lnum, item);
2776	}
2777	if (*p != NUL) {
2778	smsg(_(e_affname), fname, lnum, item);
2779	return `0`;
2780	}
2781
2782	return res;
2783	}
2784
2785	// Get one affix name from "pp" and advance the pointer.*
2786	// Returns zero for an error, still advances the pointer then.
2787	static unsigned get_affitem(int flagtype, char_u **pp)
2788	{
2789	int res;
2790
2791	if (flagtype == AFT_NUM) {
2792	if (!ascii_isdigit(**pp)) {
2793	++pp; // always advance, avoid getting stuck*
2794	return `0`;
2795	}
2796	res = getdigits_int(pp, true, `0`);
2797	} else {
2798	res = mb_ptr2char_adv((const char_u **)pp);
2799	if (flagtype == AFT_LONG \|\| (flagtype == AFT_CAPLONG
2800	&& res >= `'A'` && res <= `'Z'`)) {
2801	if (**pp == NUL)
2802	return `0`;
2803	res = mb_ptr2char_adv((const char_u **)pp) + (res << `16`);
2804	}
2805	}
2806	return res;
2807	}
2808
2809	// Process the "compflags" string used in an affix file and append it to
2810	// spin->si_compflags.
2811	// The processing involves changing the affix names to ID numbers, so that
2812	// they fit in one byte.
2813	static void process_compflags(spellinfo_T spin, afffile_T aff, char_u *compflags)
2814	{
2815	char_u *p;
2816	char_u *prevp;
2817	unsigned flag;
2818	compitem_T *ci;
2819	int id;
2820	int len;
2821	char_u *tp;
2822	char_u key[AH_KEY_LEN];
2823	hashitem_T *hi;
2824
2825	// Make room for the old and the new compflags, concatenated with a / in
2826	// between. Processing it makes it shorter, but we don't know by how
2827	// much, thus allocate the maximum.
2828	len = (int)STRLEN(compflags) + `1`;
2829	if (spin->si_compflags != NULL)
2830	len += (int)STRLEN(spin->si_compflags) + `1`;
2831	p = getroom(spin, len, false);
2832	if (spin->si_compflags != NULL) {
2833	STRCPY(p, spin->si_compflags);
2834	STRCAT(p, "/");
2835	}
2836	spin->si_compflags = p;
2837	tp = p + STRLEN(p);
2838
2839	for (p = compflags; *p != NUL; ) {
2840	if (vim_strchr((char_u )"/?+[]", *p) != NULL)
2841	// Copy non-flag characters directly.
2842	tp++ = p++;
2843	else {
2844	// First get the flag number, also checks validity.
2845	prevp = p;
2846	flag = get_affitem(aff->af_flagtype, &p);
2847	if (flag != `0`) {
2848	// Find the flag in the hashtable. If it was used before, use
2849	// the existing ID. Otherwise add a new entry.
2850	STRLCPY(key, prevp, p - prevp + `1`);
2851	hi = hash_find(&aff->af_comp, key);
2852	if (!HASHITEM_EMPTY(hi)) {
2853	id = HI2CI(hi)->ci_newID;
2854	} else {
2855	ci = getroom(spin, sizeof(compitem_T), true);
2856	STRCPY(ci->ci_key, key);
2857	ci->ci_flag = flag;
2858	// Avoid using a flag ID that has a special meaning in a
2859	// regexp (also inside []).
2860	do {
2861	check_renumber(spin);
2862	id = spin->si_newcompID--;
2863	} while (vim_strchr((char_u )"/?+[]\\-^", id) != NULL);
2864	ci->ci_newID = id;
2865	hash_add(&aff->af_comp, ci->ci_key);
2866	}
2867	*tp++ = id;
2868	}
2869	if (aff->af_flagtype == AFT_NUM && *p == `','`)
2870	++p;
2871	}
2872	}
2873
2874	*tp = NUL;
2875	}
2876
2877	// Check that the new IDs for postponed affixes and compounding don't overrun
2878	// each other. We have almost 255 available, but start at 0-127 to avoid
2879	// using two bytes for utf-8. When the 0-127 range is used up go to 128-255.
2880	// When that is used up an error message is given.
2881	static void check_renumber(spellinfo_T *spin)
2882	{
2883	if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < `128`) {
2884	spin->si_newprefID = `127`;
2885	spin->si_newcompID = `255`;
2886	}
2887	}
2888
2889	// Returns true if flag "flag" appears in affix list "afflist".
2890	static bool flag_in_afflist(int flagtype, char_u afflist, unsigned* flag)
2891	{
2892	char_u *p;
2893	unsigned n;
2894
2895	switch (flagtype) {
2896	case AFT_CHAR:
2897	return vim_strchr(afflist, flag) != NULL;
2898
2899	case AFT_CAPLONG:
2900	case AFT_LONG:
2901	for (p = afflist; *p != NUL; ) {
2902	n = mb_ptr2char_adv((const char_u **)&p);
2903	if ((flagtype == AFT_LONG \|\| (n >= `'A'` && n <= `'Z'`))
2904	&& *p != NUL) {
2905	n = mb_ptr2char_adv((const char_u **)&p) + (n << `16`);
2906	}
2907	if (n == flag) {
2908	return true;
2909	}
2910	}
2911	break;
2912
2913	case AFT_NUM:
2914	for (p = afflist; *p != NUL; ) {
2915	int digits = getdigits_int(&p, true, `0`);
2916	assert(digits >= `0`);
2917	n = (unsigned int)digits;
2918	if (n == flag)
2919	return true;
2920	if (p != NUL) // skip over comma*
2921	++p;
2922	}
2923	break;
2924	}
2925	return false;
2926	}
2927
2928	// Give a warning when "spinval" and "affval" numbers are set and not the same.
2929	static void aff_check_number(int spinval, int affval, char *name)
2930	{
2931	if (spinval != `0` && spinval != affval)
2932	smsg(_("%s value differs from what is used in another .aff file"),
2933	name);
2934	}
2935
2936	// Give a warning when "spinval" and "affval" strings are set and not the same.
2937	static void aff_check_string(char_u spinval, char_u affval, char *name)
2938	{
2939	if (spinval != NULL && STRCMP(spinval, affval) != `0`)
2940	smsg(_("%s value differs from what is used in another .aff file"),
2941	name);
2942	}
2943
2944	// Returns true if strings "s1" and "s2" are equal. Also consider both being
2945	// NULL as equal.
2946	static bool str_equal(char_u s1, char_u s2)
2947	{
2948	if (s1 == NULL \|\| s2 == NULL)
2949	return s1 == s2;
2950	return STRCMP(s1, s2) == `0`;
2951	}
2952
2953	// Add a from-to item to "gap". Used for REP and SAL items.
2954	// They are stored case-folded.
2955	static void add_fromto(spellinfo_T spin, garray_T gap, char_u from, char_u to)
2956	{
2957	char_u word[MAXWLEN];
2958
2959	fromto_T *ftp = GA_APPEND_VIA_PTR(fromto_T, gap);
2960	(void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN);
2961	ftp->ft_from = getroom_save(spin, word);
2962	(void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN);
2963	ftp->ft_to = getroom_save(spin, word);
2964	}
2965
2966	// Converts a boolean argument in a SAL line to true or false;
2967	static bool sal_to_bool(char_u *s)
2968	{
2969	return STRCMP(s, "1") == `0` \|\| STRCMP(s, "true") == `0`;
2970	}
2971
2972	// Free the structure filled by spell_read_aff().
2973	static void spell_free_aff(afffile_T *aff)
2974	{
2975	hashtab_T *ht;
2976	hashitem_T *hi;
2977	int todo;
2978	affheader_T *ah;
2979	affentry_T *ae;
2980
2981	xfree(aff->af_enc);
2982
2983	// All this trouble to free the "ae_prog" items...
2984	for (ht = &aff->af_pref;; ht = &aff->af_suff) {
2985	todo = (int)ht->ht_used;
2986	for (hi = ht->ht_array; todo > `0`; ++hi) {
2987	if (!HASHITEM_EMPTY(hi)) {
2988	--todo;
2989	ah = HI2AH(hi);
2990	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
2991	vim_regfree(ae->ae_prog);
2992	}
2993	}
2994	if (ht == &aff->af_suff)
2995	break;
2996	}
2997
2998	hash_clear(&aff->af_pref);
2999	hash_clear(&aff->af_suff);
3000	hash_clear(&aff->af_comp);
3001	}
3002
3003	// Read dictionary file "fname".
3004	// Returns OK or FAIL;
3005	static int spell_read_dic(spellinfo_T spin, char_u fname, afffile_T *affile)
3006	{
3007	hashtab_T ht;
3008	char_u line[MAXLINELEN];
3009	char_u *p;
3010	char_u *afflist;
3011	char_u store_afflist[MAXWLEN];
3012	int pfxlen;
3013	bool need_affix;
3014	char_u *dw;
3015	char_u *pc;
3016	char_u *w;
3017	int l;
3018	hash_T hash;
3019	hashitem_T *hi;
3020	FILE *fd;
3021	int lnum = `1`;
3022	int non_ascii = `0`;
3023	int retval = OK;
3024	char_u message[MAXLINELEN + MAXWLEN];
3025	int flags;
3026	int duplicate = `0`;
3027
3028	// Open the file.
3029	fd = os_fopen((char *)fname, "r");
3030	if (fd == NULL) {
3031	EMSG2(_(e_notopen), fname);
3032	return FAIL;
3033	}
3034
3035	// The hashtable is only used to detect duplicated words.
3036	hash_init(&ht);
3037
3038	vim_snprintf((char *)IObuff, IOSIZE,
3039	_("Reading dictionary file %s..."), fname);
3040	spell_message(spin, IObuff);
3041
3042	// start with a message for the first line
3043	spin->si_msg_count = `999999`;
3044
3045	// Read and ignore the first line: word count.
3046	(void)vim_fgets(line, MAXLINELEN, fd);
3047	if (!ascii_isdigit(*skipwhite(line)))
3048	EMSG2(_("E760: No word count in %s"), fname);
3049
3050	// Read all the lines in the file one by one.
3051	// The words are converted to 'encoding' here, before being added to
3052	// the hashtable.
3053	while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) {
3054	line_breakcheck();
3055	++lnum;
3056	if (line[`0`] == `'#'` \|\| line[`0`] == `'/'`)
3057	continue; // comment line
3058
3059	// Remove CR, LF and white space from the end. White space halfway through
3060	// the word is kept to allow multi-word terms like "et al.".
3061	l = (int)STRLEN(line);
3062	while (l > `0` && line[l - `1`] <= `' '`)
3063	--l;
3064	if (l == `0`)
3065	continue; // empty line
3066	line[l] = NUL;
3067
3068	// Convert from "SET" to 'encoding' when needed.
3069	if (spin->si_conv.vc_type != CONV_NONE) {
3070	pc = string_convert(&spin->si_conv, line, NULL);
3071	if (pc == NULL) {
3072	smsg(_("Conversion failure for word in %s line %d: %s"),
3073	fname, lnum, line);
3074	continue;
3075	}
3076	w = pc;
3077	} else {
3078	pc = NULL;
3079	w = line;
3080	}
3081
3082	// Truncate the word at the "/", set "afflist" to what follows.
3083	// Replace "\/" by "/" and "\\" by "\".
3084	afflist = NULL;
3085	for (p = w; *p != NUL; MB_PTR_ADV(p)) {
3086	if (*p == `'\\'` && (p[`1`] == `'\\'` \|\| p[`1`] == `'/'`)) {
3087	STRMOVE(p, p + `1`);
3088	} else if (*p == `'/'`) {
3089	*p = NUL;
3090	afflist = p + `1`;
3091	break;
3092	}
3093	}
3094
3095	// Skip non-ASCII words when "spin->si_ascii" is true.
3096	if (spin->si_ascii && has_non_ascii(w)) {
3097	++non_ascii;
3098	xfree(pc);
3099	continue;
3100	}
3101
3102	// This takes time, print a message every 10000 words.
3103	if (spin->si_verbose && spin->si_msg_count > `10000`) {
3104	spin->si_msg_count = `0`;
3105	vim_snprintf((char )message, sizeof*(message),
3106	_("line %6d, word %6ld - %s"),
3107	lnum, spin->si_foldwcount + spin->si_keepwcount, w);
3108	msg_start();
3109	msg_puts_long_attr(message, `0`);
3110	msg_clr_eos();
3111	msg_didout = FALSE;
3112	msg_col = `0`;
3113	ui_flush();
3114	}
3115
3116	// Store the word in the hashtable to be able to find duplicates.
3117	dw = getroom_save(spin, w);
3118	if (dw == NULL) {
3119	retval = FAIL;
3120	xfree(pc);
3121	break;
3122	}
3123
3124	hash = hash_hash(dw);
3125	hi = hash_lookup(&ht, (const char *)dw, STRLEN(dw), hash);
3126	if (!HASHITEM_EMPTY(hi)) {
3127	if (p_verbose > `0`)
3128	smsg(_("Duplicate word in %s line %d: %s"),
3129	fname, lnum, dw);
3130	else if (duplicate == `0`)
3131	smsg(_("First duplicate word in %s line %d: %s"),
3132	fname, lnum, dw);
3133	++duplicate;
3134	} else
3135	hash_add_item(&ht, hi, dw, hash);
3136
3137	flags = `0`;
3138	store_afflist[`0`] = NUL;
3139	pfxlen = `0`;
3140	need_affix = false;
3141	if (afflist != NULL) {
3142	// Extract flags from the affix list.
3143	flags \|= get_affix_flags(affile, afflist);
3144
3145	if (affile->af_needaffix != `0` && flag_in_afflist(
3146	affile->af_flagtype, afflist, affile->af_needaffix))
3147	need_affix = true;
3148
3149	if (affile->af_pfxpostpone)
3150	// Need to store the list of prefix IDs with the word.
3151	pfxlen = get_pfxlist(affile, afflist, store_afflist);
3152
3153	if (spin->si_compflags != NULL)
3154	// Need to store the list of compound flags with the word.
3155	// Concatenate them to the list of prefix IDs.
3156	get_compflags(affile, afflist, store_afflist + pfxlen);
3157	}
3158
3159	// Add the word to the word tree(s).
3160	if (store_word(spin, dw, flags, spin->si_region,
3161	store_afflist, need_affix) == FAIL)
3162	retval = FAIL;
3163
3164	if (afflist != NULL) {
3165	// Find all matching suffixes and add the resulting words.
3166	// Additionally do matching prefixes that combine.
3167	if (store_aff_word(spin, dw, afflist, affile,
3168	&affile->af_suff, &affile->af_pref,
3169	CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
3170	retval = FAIL;
3171
3172	// Find all matching prefixes and add the resulting words.
3173	if (store_aff_word(spin, dw, afflist, affile,
3174	&affile->af_pref, NULL,
3175	CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
3176	retval = FAIL;
3177	}
3178
3179	xfree(pc);
3180	}
3181
3182	if (duplicate > `0`)
3183	smsg(_("%d duplicate word(s) in %s"), duplicate, fname);
3184	if (spin->si_ascii && non_ascii > `0`)
3185	smsg(_("Ignored %d word(s) with non-ASCII characters in %s"),
3186	non_ascii, fname);
3187	hash_clear(&ht);
3188
3189	fclose(fd);
3190	return retval;
3191	}
3192
3193	// Check for affix flags in "afflist" that are turned into word flags.
3194	// Return WF_ flags.
3195	static int get_affix_flags(afffile_T affile, char_u afflist)
3196	{
3197	int flags = `0`;
3198
3199	if (affile->af_keepcase != `0` && flag_in_afflist(
3200	affile->af_flagtype, afflist, affile->af_keepcase))
3201	flags \|= WF_KEEPCAP \| WF_FIXCAP;
3202	if (affile->af_rare != `0` && flag_in_afflist(
3203	affile->af_flagtype, afflist, affile->af_rare))
3204	flags \|= WF_RARE;
3205	if (affile->af_bad != `0` && flag_in_afflist(
3206	affile->af_flagtype, afflist, affile->af_bad))
3207	flags \|= WF_BANNED;
3208	if (affile->af_needcomp != `0` && flag_in_afflist(
3209	affile->af_flagtype, afflist, affile->af_needcomp))
3210	flags \|= WF_NEEDCOMP;
3211	if (affile->af_comproot != `0` && flag_in_afflist(
3212	affile->af_flagtype, afflist, affile->af_comproot))
3213	flags \|= WF_COMPROOT;
3214	if (affile->af_nosuggest != `0` && flag_in_afflist(
3215	affile->af_flagtype, afflist, affile->af_nosuggest))
3216	flags \|= WF_NOSUGGEST;
3217	return flags;
3218	}
3219
3220	// Get the list of prefix IDs from the affix list "afflist".
3221	// Used for PFXPOSTPONE.
3222	// Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
3223	// and return the number of affixes.
3224	static int get_pfxlist(afffile_T affile, char_u afflist, char_u *store_afflist)
3225	{
3226	char_u *p;
3227	char_u *prevp;
3228	int cnt = `0`;
3229	int id;
3230	char_u key[AH_KEY_LEN];
3231	hashitem_T *hi;
3232
3233	for (p = afflist; *p != NUL; ) {
3234	prevp = p;
3235	if (get_affitem(affile->af_flagtype, &p) != `0`) {
3236	// A flag is a postponed prefix flag if it appears in "af_pref"
3237	// and its ID is not zero.
3238	STRLCPY(key, prevp, p - prevp + `1`);
3239	hi = hash_find(&affile->af_pref, key);
3240	if (!HASHITEM_EMPTY(hi)) {
3241	id = HI2AH(hi)->ah_newID;
3242	if (id != `0`)
3243	store_afflist[cnt++] = id;
3244	}
3245	}
3246	if (affile->af_flagtype == AFT_NUM && *p == `','`)
3247	++p;
3248	}
3249
3250	store_afflist[cnt] = NUL;
3251	return cnt;
3252	}
3253
3254	// Get the list of compound IDs from the affix list "afflist" that are used
3255	// for compound words.
3256	// Puts the flags in "store_afflist[]".
3257	static void get_compflags(afffile_T affile, char_u afflist, char_u *store_afflist)
3258	{
3259	char_u *p;
3260	char_u *prevp;
3261	int cnt = `0`;
3262	char_u key[AH_KEY_LEN];
3263	hashitem_T *hi;
3264
3265	for (p = afflist; *p != NUL; ) {
3266	prevp = p;
3267	if (get_affitem(affile->af_flagtype, &p) != `0`) {
3268	// A flag is a compound flag if it appears in "af_comp".
3269	STRLCPY(key, prevp, p - prevp + `1`);
3270	hi = hash_find(&affile->af_comp, key);
3271	if (!HASHITEM_EMPTY(hi))
3272	store_afflist[cnt++] = HI2CI(hi)->ci_newID;
3273	}
3274	if (affile->af_flagtype == AFT_NUM && *p == `','`)
3275	++p;
3276	}
3277
3278	store_afflist[cnt] = NUL;
3279	}
3280
3281	// Apply affixes to a word and store the resulting words.
3282	// "ht" is the hashtable with affentry_T that need to be applied, either
3283	// prefixes or suffixes.
3284	// "xht", when not NULL, is the prefix hashtable, to be used additionally on
3285	// the resulting words for combining affixes.
3286	//
3287	// Returns FAIL when out of memory.
3288	static int
3289	store_aff_word (
3290	spellinfo_T spin, // spell info*
3291	char_u word, // basic word start*
3292	char_u afflist, // list of names of supported affixes*
3293	afffile_T *affile,
3294	hashtab_T *ht,
3295	hashtab_T *xht,
3296	int condit, // CONDIT_SUF et al.
3297	int flags, // flags for the word
3298	char_u pfxlist, // list of prefix IDs*
3299	int pfxlen // nr of flags in "pfxlist" for prefixes, rest
3300	// is compound flags
3301	)
3302	{
3303	int todo;
3304	hashitem_T *hi;
3305	affheader_T *ah;
3306	affentry_T *ae;
3307	char_u newword[MAXWLEN];
3308	int retval = OK;
3309	int i, j;
3310	char_u *p;
3311	int use_flags;
3312	char_u *use_pfxlist;
3313	int use_pfxlen;
3314	bool need_affix;
3315	char_u store_afflist[MAXWLEN];
3316	char_u pfx_pfxlist[MAXWLEN];
3317	size_t wordlen = STRLEN(word);
3318	int use_condit;
3319
3320	todo = (int)ht->ht_used;
3321	for (hi = ht->ht_array; todo > `0` && retval == OK; ++hi) {
3322	if (!HASHITEM_EMPTY(hi)) {
3323	--todo;
3324	ah = HI2AH(hi);
3325
3326	// Check that the affix combines, if required, and that the word
3327	// supports this affix.
3328	if (((condit & CONDIT_COMB) == `0` \|\| ah->ah_combine)
3329	&& flag_in_afflist(affile->af_flagtype, afflist,
3330	ah->ah_flag)) {
3331	// Loop over all affix entries with this name.
3332	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) {
3333	// Check the condition. It's not logical to match case
3334	// here, but it is required for compatibility with
3335	// Myspell.
3336	// Another requirement from Myspell is that the chop
3337	// string is shorter than the word itself.
3338	// For prefixes, when "PFXPOSTPONE" was used, only do
3339	// prefixes with a chop string and/or flags.
3340	// When a previously added affix had CIRCUMFIX this one
3341	// must have it too, if it had not then this one must not
3342	// have one either.
3343	if ((xht != NULL \|\| !affile->af_pfxpostpone
3344	\|\| ae->ae_chop != NULL
3345	\|\| ae->ae_flags != NULL)
3346	&& (ae->ae_chop == NULL
3347	\|\| STRLEN(ae->ae_chop) < wordlen)
3348	&& (ae->ae_prog == NULL
3349	\|\| vim_regexec_prog(&ae->ae_prog, false, word, (colnr_T)`0`))
3350	&& (((condit & CONDIT_CFIX) == `0`)
3351	== ((condit & CONDIT_AFF) == `0`
3352	\|\| ae->ae_flags == NULL
3353	\|\| !flag_in_afflist(affile->af_flagtype,
3354	ae->ae_flags, affile->af_circumfix)))) {
3355	// Match. Remove the chop and add the affix.
3356	if (xht == NULL) {
3357	// prefix: chop/add at the start of the word
3358	if (ae->ae_add == NULL) {
3359	*newword = NUL;
3360	} else {
3361	STRLCPY(newword, ae->ae_add, MAXWLEN);
3362	}
3363	p = word;
3364	if (ae->ae_chop != NULL) {
3365	// Skip chop string.
3366	if (has_mbyte) {
3367	i = mb_charlen(ae->ae_chop);
3368	for (; i > `0`; i--) {
3369	MB_PTR_ADV(p);
3370	}
3371	} else {
3372	p += STRLEN(ae->ae_chop);
3373	}
3374	}
3375	STRCAT(newword, p);
3376	} else {
3377	// suffix: chop/add at the end of the word
3378	STRLCPY(newword, word, MAXWLEN);
3379	if (ae->ae_chop != NULL) {
3380	// Remove chop string.
3381	p = newword + STRLEN(newword);
3382	i = (int)MB_CHARLEN(ae->ae_chop);
3383	for (; i > `0`; i--) {
3384	MB_PTR_BACK(newword, p);
3385	}
3386	*p = NUL;
3387	}
3388	if (ae->ae_add != NULL)
3389	STRCAT(newword, ae->ae_add);
3390	}
3391
3392	use_flags = flags;
3393	use_pfxlist = pfxlist;
3394	use_pfxlen = pfxlen;
3395	need_affix = false;
3396	use_condit = condit \| CONDIT_COMB \| CONDIT_AFF;
3397	if (ae->ae_flags != NULL) {
3398	// Extract flags from the affix list.
3399	use_flags \|= get_affix_flags(affile, ae->ae_flags);
3400
3401	if (affile->af_needaffix != `0` && flag_in_afflist(
3402	affile->af_flagtype, ae->ae_flags,
3403	affile->af_needaffix))
3404	need_affix = true;
3405
3406	// When there is a CIRCUMFIX flag the other affix
3407	// must also have it and we don't add the word
3408	// with one affix.
3409	if (affile->af_circumfix != `0` && flag_in_afflist(
3410	affile->af_flagtype, ae->ae_flags,
3411	affile->af_circumfix)) {
3412	use_condit \|= CONDIT_CFIX;
3413	if ((condit & CONDIT_CFIX) == `0`)
3414	need_affix = true;
3415	}
3416
3417	if (affile->af_pfxpostpone
3418	\|\| spin->si_compflags != NULL) {
3419	if (affile->af_pfxpostpone)
3420	// Get prefix IDS from the affix list.
3421	use_pfxlen = get_pfxlist(affile,
3422	ae->ae_flags, store_afflist);
3423	else
3424	use_pfxlen = `0`;
3425	use_pfxlist = store_afflist;
3426
3427	// Combine the prefix IDs. Avoid adding the
3428	// same ID twice.
3429	for (i = `0`; i < pfxlen; ++i) {
3430	for (j = `0`; j < use_pfxlen; ++j)
3431	if (pfxlist[i] == use_pfxlist[j])
3432	break;
3433	if (j == use_pfxlen)
3434	use_pfxlist[use_pfxlen++] = pfxlist[i];
3435	}
3436
3437	if (spin->si_compflags != NULL)
3438	// Get compound IDS from the affix list.
3439	get_compflags(affile, ae->ae_flags,
3440	use_pfxlist + use_pfxlen);
3441	else
3442	use_pfxlist[use_pfxlen] = NUL;
3443
3444	// Combine the list of compound flags.
3445	// Concatenate them to the prefix IDs list.
3446	// Avoid adding the same ID twice.
3447	for (i = pfxlen; pfxlist[i] != NUL; ++i) {
3448	for (j = use_pfxlen;
3449	use_pfxlist[j] != NUL; ++j)
3450	if (pfxlist[i] == use_pfxlist[j])
3451	break;
3452	if (use_pfxlist[j] == NUL) {
3453	use_pfxlist[j++] = pfxlist[i];
3454	use_pfxlist[j] = NUL;
3455	}
3456	}
3457	}
3458	}
3459
3460	// Obey a "COMPOUNDFORBIDFLAG" of the affix: don't
3461	// use the compound flags.
3462	if (use_pfxlist != NULL && ae->ae_compforbid) {
3463	STRLCPY(pfx_pfxlist, use_pfxlist, use_pfxlen + `1`);
3464	use_pfxlist = pfx_pfxlist;
3465	}
3466
3467	// When there are postponed prefixes...
3468	if (spin->si_prefroot != NULL
3469	&& spin->si_prefroot->wn_sibling != NULL) {
3470	// ... add a flag to indicate an affix was used.
3471	use_flags \|= WF_HAS_AFF;
3472
3473	// ... don't use a prefix list if combining
3474	// affixes is not allowed. But do use the
3475	// compound flags after them.
3476	if (!ah->ah_combine && use_pfxlist != NULL)
3477	use_pfxlist += use_pfxlen;
3478	}
3479
3480	// When compounding is supported and there is no
3481	// "COMPOUNDPERMITFLAG" then forbid compounding on the
3482	// side where the affix is applied.
3483	if (spin->si_compflags != NULL && !ae->ae_comppermit) {
3484	if (xht != NULL)
3485	use_flags \|= WF_NOCOMPAFT;
3486	else
3487	use_flags \|= WF_NOCOMPBEF;
3488	}
3489
3490	// Store the modified word.
3491	if (store_word(spin, newword, use_flags,
3492	spin->si_region, use_pfxlist,
3493	need_affix) == FAIL)
3494	retval = FAIL;
3495
3496	// When added a prefix or a first suffix and the affix
3497	// has flags may add a(nother) suffix. RECURSIVE!
3498	if ((condit & CONDIT_SUF) && ae->ae_flags != NULL)
3499	if (store_aff_word(spin, newword, ae->ae_flags,
3500	affile, &affile->af_suff, xht,
3501	use_condit & (xht == NULL
3502	? ~`0` : ~CONDIT_SUF),
3503	use_flags, use_pfxlist, pfxlen) == FAIL)
3504	retval = FAIL;
3505
3506	// When added a suffix and combining is allowed also
3507	// try adding a prefix additionally. Both for the
3508	// word flags and for the affix flags. RECURSIVE!
3509	if (xht != NULL && ah->ah_combine) {
3510	if (store_aff_word(spin, newword,
3511	afflist, affile,
3512	xht, NULL, use_condit,
3513	use_flags, use_pfxlist,
3514	pfxlen) == FAIL
3515	\|\| (ae->ae_flags != NULL
3516	&& store_aff_word(spin, newword,
3517	ae->ae_flags, affile,
3518	xht, NULL, use_condit,
3519	use_flags, use_pfxlist,
3520	pfxlen) == FAIL))
3521	retval = FAIL;
3522	}
3523	}
3524	}
3525	}
3526	}
3527	}
3528
3529	return retval;
3530	}
3531
3532	// Read a file with a list of words.
3533	static int spell_read_wordfile(spellinfo_T spin, char_u fname)
3534	{
3535	FILE *fd;
3536	long lnum = `0`;
3537	char_u rline[MAXLINELEN];
3538	char_u *line;
3539	char_u *pc = NULL;
3540	char_u *p;
3541	int l;
3542	int retval = OK;
3543	bool did_word = false;
3544	int non_ascii = `0`;
3545	int flags;
3546	int regionmask;
3547
3548	// Open the file.
3549	fd = os_fopen((char *)fname, "r");
3550	if (fd == NULL) {
3551	EMSG2(_(e_notopen), fname);
3552	return FAIL;
3553	}
3554
3555	vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s..."), fname);
3556	spell_message(spin, IObuff);
3557
3558	// Read all the lines in the file one by one.
3559	while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) {
3560	line_breakcheck();
3561	++lnum;
3562
3563	// Skip comment lines.
3564	if (*rline == `'#'`)
3565	continue;
3566
3567	// Remove CR, LF and white space from the end.
3568	l = (int)STRLEN(rline);
3569	while (l > `0` && rline[l - `1`] <= `' '`)
3570	--l;
3571	if (l == `0`)
3572	continue; // empty or blank line
3573	rline[l] = NUL;
3574
3575	// Convert from "/encoding={encoding}" to 'encoding' when needed.
3576	xfree(pc);
3577	if (spin->si_conv.vc_type != CONV_NONE) {
3578	pc = string_convert(&spin->si_conv, rline, NULL);
3579	if (pc == NULL) {
3580	smsg(_("Conversion failure for word in %s line %ld: %s"),
3581	fname, lnum, rline);
3582	continue;
3583	}
3584	line = pc;
3585	} else {
3586	pc = NULL;
3587	line = rline;
3588	}
3589
3590	if (*line == `'/'`) {
3591	++line;
3592	if (STRNCMP(line, "encoding=", `9`) == `0`) {
3593	if (spin->si_conv.vc_type != CONV_NONE) {
3594	smsg(_("Duplicate /encoding= line ignored in %s line %ld: %s"),
3595	fname, lnum, line - `1`);
3596	} else if (did_word) {
3597	smsg(_("/encoding= line after word ignored in %s line %ld: %s"),
3598	fname, lnum, line - `1`);
3599	} else {
3600	char_u *enc;
3601
3602	// Setup for conversion to 'encoding'.
3603	line += `9`;
3604	enc = enc_canonize(line);
3605	if (!spin->si_ascii
3606	&& convert_setup(&spin->si_conv, enc,
3607	p_enc) == FAIL)
3608	smsg(_("Conversion in %s not supported: from %s to %s"),
3609	fname, line, p_enc);
3610	xfree(enc);
3611	spin->si_conv.vc_fail = true;
3612	}
3613	continue;
3614	}
3615
3616	if (STRNCMP(line, "regions=", `8`) == `0`) {
3617	if (spin->si_region_count > `1`) {
3618	smsg(_("Duplicate /regions= line ignored in %s line %ld: %s"),
3619	fname, lnum, line);
3620	} else {
3621	line += `8`;
3622	if (STRLEN(line) > MAXREGIONS * `2`) {
3623	smsg(_("Too many regions in %s line %ld: %s"),
3624	fname, lnum, line);
3625	} else {
3626	spin->si_region_count = (int)STRLEN(line) / `2`;
3627	STRCPY(spin->si_region_name, line);
3628
3629	// Adjust the mask for a word valid in all regions.
3630	spin->si_region = (`1` << spin->si_region_count) - `1`;
3631	}
3632	}
3633	continue;
3634	}
3635
3636	smsg(_("/ line ignored in %s line %ld: %s"),
3637	fname, lnum, line - `1`);
3638	continue;
3639	}
3640
3641	flags = `0`;
3642	regionmask = spin->si_region;
3643
3644	// Check for flags and region after a slash.
3645	p = vim_strchr(line, `'/'`);
3646	if (p != NULL) {
3647	*p++ = NUL;
3648	while (*p != NUL) {
3649	if (p == `'='`) // keep-case word*
3650	flags \|= WF_KEEPCAP \| WF_FIXCAP;
3651	else if (p == `'!'`) // Bad, bad, wicked word.*
3652	flags \|= WF_BANNED;
3653	else if (p == `'?'`) // Rare word.*
3654	flags \|= WF_RARE;
3655	else if (ascii_isdigit(p)) { // region number(s)*
3656	if ((flags & WF_REGION) == `0`) // first one
3657	regionmask = `0`;
3658	flags \|= WF_REGION;
3659
3660	l = *p - `'0'`;
3661	if (l == `0` \|\| l > spin->si_region_count) {
3662	smsg(_("Invalid region nr in %s line %ld: %s"),
3663	fname, lnum, p);
3664	break;
3665	}
3666	regionmask \|= `1` << (l - `1`);
3667	} else {
3668	smsg(_("Unrecognized flags in %s line %ld: %s"),
3669	fname, lnum, p);
3670	break;
3671	}
3672	++p;
3673	}
3674	}
3675
3676	// Skip non-ASCII words when "spin->si_ascii" is true.
3677	if (spin->si_ascii && has_non_ascii(line)) {
3678	++non_ascii;
3679	continue;
3680	}
3681
3682	// Normal word: store it.
3683	if (store_word(spin, line, flags, regionmask, NULL, false) == FAIL) {
3684	retval = FAIL;
3685	break;
3686	}
3687	did_word = true;
3688	}
3689
3690	xfree(pc);
3691	fclose(fd);
3692
3693	if (spin->si_ascii && non_ascii > `0`) {
3694	vim_snprintf((char *)IObuff, IOSIZE,
3695	_("Ignored %d words with non-ASCII characters"), non_ascii);
3696	spell_message(spin, IObuff);
3697	}
3698
3699	return retval;
3700	}
3701
3702	/// Get part of an sblock_T, "len" bytes long.
3703	/// This avoids calling free() for every little struct we use (and keeping
3704	/// track of them).
3705	/// The memory is cleared to all zeros.
3706	///
3707	/// @param len Length needed (<= SBLOCKSIZE).
3708	/// @param align Align for pointer.
3709	/// @return Pointer into block data.
3710	static void getroom(spellinfo_T spin, size_t len, bool align)
3711	FUNC_ATTR_NONNULL_RET
3712	{
3713	char_u *p;
3714	sblock_T *bl = spin->si_blocks;
3715
3716	assert(len <= SBLOCKSIZE);
3717
3718	if (align && bl != NULL)
3719	// Round size up for alignment. On some systems structures need to be
3720	// aligned to the size of a pointer (e.g., SPARC).
3721	bl->sb_used = (bl->sb_used + sizeof(char *) - `1`)
3722	& ~(sizeof(char *) - `1`);
3723
3724	if (bl == NULL \|\| bl->sb_used + len > SBLOCKSIZE) {
3725	// Allocate a block of memory. It is not freed until much later.
3726	bl = xcalloc(`1`, (sizeof(sblock_T) + SBLOCKSIZE));
3727	bl->sb_next = spin->si_blocks;
3728	spin->si_blocks = bl;
3729	bl->sb_used = `0`;
3730	++spin->si_blocks_cnt;
3731	}
3732
3733	p = bl->sb_data + bl->sb_used;
3734	bl->sb_used += (int)len;
3735
3736	return p;
3737	}
3738
3739	// Make a copy of a string into memory allocated with getroom().
3740	// Returns NULL when out of memory.
3741	static char_u getroom_save(spellinfo_T spin, char_u *s)
3742	{
3743	const size_t s_size = STRLEN(s) + `1`;
3744	return memcpy(getroom(spin, s_size, false), s, s_size);
3745	}
3746
3747
3748	// Free the list of allocated sblock_T.
3749	static void free_blocks(sblock_T *bl)
3750	{
3751	sblock_T *next;
3752
3753	while (bl != NULL) {
3754	next = bl->sb_next;
3755	xfree(bl);
3756	bl = next;
3757	}
3758	}
3759
3760	// Allocate the root of a word tree.
3761	// Returns NULL when out of memory.
3762	static wordnode_T wordtree_alloc(spellinfo_T spin)
3763	FUNC_ATTR_NONNULL_RET
3764	{
3765	return (wordnode_T )getroom(spin, sizeof*(wordnode_T), true);
3766	}
3767
3768	// Store a word in the tree(s).
3769	// Always store it in the case-folded tree. For a keep-case word this is
3770	// useful when the word can also be used with all caps (no WF_FIXCAP flag) and
3771	// used to find suggestions.
3772	// For a keep-case word also store it in the keep-case tree.
3773	// When "pfxlist" is not NULL store the word for each postponed prefix ID and
3774	// compound flag.
3775	static int
3776	store_word (
3777	spellinfo_T *spin,
3778	char_u *word,
3779	int flags, // extra flags, WF_BANNED
3780	int region, // supported region(s)
3781	char_u pfxlist, // list of prefix IDs or NULL*
3782	bool need_affix // only store word with affix ID
3783	)
3784	{
3785	int len = (int)STRLEN(word);
3786	int ct = captype(word, word + len);
3787	char_u foldword[MAXWLEN];
3788	int res = OK;
3789	char_u *p;
3790
3791	(void)spell_casefold(word, len, foldword, MAXWLEN);
3792	for (p = pfxlist; res == OK; ++p) {
3793	if (!need_affix \|\| (p != NULL && *p != NUL))
3794	res = tree_add_word(spin, foldword, spin->si_foldroot, ct \| flags,
3795	region, p == NULL ? `0` : *p);
3796	if (p == NULL \|\| *p == NUL)
3797	break;
3798	}
3799	++spin->si_foldwcount;
3800
3801	if (res == OK && (ct == WF_KEEPCAP \|\| (flags & WF_KEEPCAP))) {
3802	for (p = pfxlist; res == OK; ++p) {
3803	if (!need_affix \|\| (p != NULL && *p != NUL))
3804	res = tree_add_word(spin, word, spin->si_keeproot, flags,
3805	region, p == NULL ? `0` : *p);
3806	if (p == NULL \|\| *p == NUL)
3807	break;
3808	}
3809	++spin->si_keepwcount;
3810	}
3811	return res;
3812	}
3813
3814	// Add word "word" to a word tree at "root".
3815	// When "flags" < 0 we are adding to the prefix tree where "flags" is used for
3816	// "rare" and "region" is the condition nr.
3817	// Returns FAIL when out of memory.
3818	static int tree_add_word(spellinfo_T spin, char_u word, wordnode_T root, int* flags, int region, int affixID)
3819	{
3820	wordnode_T *node = root;
3821	wordnode_T *np;
3822	wordnode_T copyp, *copyprev;
3823	wordnode_T **prev = NULL;
3824	int i;
3825
3826	// Add each byte of the word to the tree, including the NUL at the end.
3827	for (i = `0`;; ++i) {
3828	// When there is more than one reference to this node we need to make
3829	// a copy, so that we can modify it. Copy the whole list of siblings
3830	// (we don't optimize for a partly shared list of siblings).
3831	if (node != NULL && node->wn_refs > `1`) {
3832	--node->wn_refs;
3833	copyprev = prev;
3834	for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) {
3835	// Allocate a new node and copy the info.
3836	np = get_wordnode(spin);
3837	if (np == NULL)
3838	return FAIL;
3839	np->wn_child = copyp->wn_child;
3840	if (np->wn_child != NULL)
3841	++np->wn_child->wn_refs; // child gets extra ref
3842	np->wn_byte = copyp->wn_byte;
3843	if (np->wn_byte == NUL) {
3844	np->wn_flags = copyp->wn_flags;
3845	np->wn_region = copyp->wn_region;
3846	np->wn_affixID = copyp->wn_affixID;
3847	}
3848
3849	// Link the new node in the list, there will be one ref.
3850	np->wn_refs = `1`;
3851	if (copyprev != NULL)
3852	*copyprev = np;
3853	copyprev = &np->wn_sibling;
3854
3855	// Let "node" point to the head of the copied list.
3856	if (copyp == node)
3857	node = np;
3858	}
3859	}
3860
3861	// Look for the sibling that has the same character. They are sorted
3862	// on byte value, thus stop searching when a sibling is found with a
3863	// higher byte value. For zero bytes (end of word) the sorting is
3864	// done on flags and then on affixID.
3865	while (node != NULL
3866	&& (node->wn_byte < word[i]
3867	\|\| (node->wn_byte == NUL
3868	&& (flags < `0`
3869	? node->wn_affixID < (unsigned)affixID
3870	: (node->wn_flags < (unsigned)(flags & WN_MASK)
3871	\|\| (node->wn_flags == (flags & WN_MASK)
3872	&& (spin->si_sugtree
3873	? (node->wn_region & `0xffff`) < region
3874	: node->wn_affixID
3875	< (unsigned)affixID))))))) {
3876	prev = &node->wn_sibling;
3877	node = *prev;
3878	}
3879	if (node == NULL
3880	\|\| node->wn_byte != word[i]
3881	\|\| (word[i] == NUL
3882	&& (flags < `0`
3883	\|\| spin->si_sugtree
3884	\|\| node->wn_flags != (flags & WN_MASK)
3885	\|\| node->wn_affixID != affixID))) {
3886	// Allocate a new node.
3887	np = get_wordnode(spin);
3888	if (np == NULL)
3889	return FAIL;
3890	np->wn_byte = word[i];
3891
3892	// If "node" is NULL this is a new child or the end of the sibling
3893	// list: ref count is one. Otherwise use ref count of sibling and
3894	// make ref count of sibling one (matters when inserting in front
3895	// of the list of siblings).
3896	if (node == NULL)
3897	np->wn_refs = `1`;
3898	else {
3899	np->wn_refs = node->wn_refs;
3900	node->wn_refs = `1`;
3901	}
3902	if (prev != NULL)
3903	*prev = np;
3904	np->wn_sibling = node;
3905	node = np;
3906	}
3907
3908	if (word[i] == NUL) {
3909	node->wn_flags = flags;
3910	node->wn_region \|= region;
3911	node->wn_affixID = affixID;
3912	break;
3913	}
3914	prev = &node->wn_child;
3915	node = *prev;
3916	}
3917	#ifdef SPELL_PRINTTREE
3918	smsg((char_u *)"Added \"%s\"", word);
3919	spell_print_tree(root->wn_sibling);
3920	#endif
3921
3922	// count nr of words added since last message
3923	++spin->si_msg_count;
3924
3925	if (spin->si_compress_cnt > `1`) {
3926	if (--spin->si_compress_cnt == `1`)
3927	// Did enough words to lower the block count limit.
3928	spin->si_blocks_cnt += compress_inc;
3929	}
3930
3931	// When we have allocated lots of memory we need to compress the word tree
3932	// to free up some room. But compression is slow, and we might actually
3933	// need that room, thus only compress in the following situations:
3934	// 1. When not compressed before (si_compress_cnt == 0): when using
3935	// "compress_start" blocks.
3936	// 2. When compressed before and used "compress_inc" blocks before
3937	// adding "compress_added" words (si_compress_cnt > 1).
3938	// 3. When compressed before, added "compress_added" words
3939	// (si_compress_cnt == 1) and the number of free nodes drops below the
3940	// maximum word length.
3941	#ifndef SPELL_COMPRESS_ALLWAYS
3942	if (spin->si_compress_cnt == `1` // NOLINT(readability/braces)
3943	? spin->si_free_count < MAXWLEN
3944	: spin->si_blocks_cnt >= compress_start)
3945	#endif
3946	{
3947	// Decrement the block counter. The effect is that we compress again
3948	// when the freed up room has been used and another "compress_inc"
3949	// blocks have been allocated. Unless "compress_added" words have
3950	// been added, then the limit is put back again.
3951	spin->si_blocks_cnt -= compress_inc;
3952	spin->si_compress_cnt = compress_added;
3953
3954	if (spin->si_verbose) {
3955	msg_start();
3956	msg_puts(_(msg_compressing));
3957	msg_clr_eos();
3958	msg_didout = FALSE;
3959	msg_col = `0`;
3960	ui_flush();
3961	}
3962
3963	// Compress both trees. Either they both have many nodes, which makes
3964	// compression useful, or one of them is small, which means
3965	// compression goes fast. But when filling the soundfold word tree
3966	// there is no keep-case tree.
3967	wordtree_compress(spin, spin->si_foldroot);
3968	if (affixID >= `0`)
3969	wordtree_compress(spin, spin->si_keeproot);
3970	}
3971
3972	return OK;
3973	}
3974
3975	// Get a wordnode_T, either from the list of previously freed nodes or
3976	// allocate a new one.
3977	// Returns NULL when out of memory.
3978	static wordnode_T get_wordnode(spellinfo_T spin)
3979	{
3980	wordnode_T *n;
3981
3982	if (spin->si_first_free == NULL)
3983	n = (wordnode_T )getroom(spin, sizeof*(wordnode_T), true);
3984	else {
3985	n = spin->si_first_free;
3986	spin->si_first_free = n->wn_child;
3987	memset(n, `0`, sizeof(wordnode_T));
3988	--spin->si_free_count;
3989	}
3990	#ifdef SPELL_PRINTTREE
3991	if (n != NULL)
3992	n->wn_nr = ++spin->si_wordnode_nr;
3993	#endif
3994	return n;
3995	}
3996
3997	// Decrement the reference count on a node (which is the head of a list of
3998	// siblings). If the reference count becomes zero free the node and its
3999	// siblings.
4000	// Returns the number of nodes actually freed.
4001	static int deref_wordnode(spellinfo_T spin, wordnode_T node)
4002	{
4003	wordnode_T *np;
4004	int cnt = `0`;
4005
4006	if (--node->wn_refs == `0`) {
4007	for (np = node; np != NULL; np = np->wn_sibling) {
4008	if (np->wn_child != NULL)
4009	cnt += deref_wordnode(spin, np->wn_child);
4010	free_wordnode(spin, np);
4011	++cnt;
4012	}
4013	++cnt; // length field
4014	}
4015	return cnt;
4016	}
4017
4018	// Free a wordnode_T for re-use later.
4019	// Only the "wn_child" field becomes invalid.
4020	static void free_wordnode(spellinfo_T spin, wordnode_T n)
4021	{
4022	n->wn_child = spin->si_first_free;
4023	spin->si_first_free = n;
4024	++spin->si_free_count;
4025	}
4026
4027	// Compress a tree: find tails that are identical and can be shared.
4028	static void wordtree_compress(spellinfo_T spin, wordnode_T root)
4029	{
4030	hashtab_T ht;
4031	int n;
4032	int tot = `0`;
4033	int perc;
4034
4035	// Skip the root itself, it's not actually used. The first sibling is the
4036	// start of the tree.
4037	if (root->wn_sibling != NULL) {
4038	hash_init(&ht);
4039	n = node_compress(spin, root->wn_sibling, &ht, &tot);
4040
4041	#ifndef SPELL_PRINTTREE
4042	if (spin->si_verbose \|\| p_verbose > `2`)
4043	#endif
4044	{
4045	if (tot > `1000000`)
4046	perc = (tot - n) / (tot / `100`);
4047	else if (tot == `0`)
4048	perc = `0`;
4049	else
4050	perc = (tot - n) * `100` / tot;
4051	vim_snprintf((char *)IObuff, IOSIZE,
4052	_("Compressed %d of %d nodes; %d (%d%%) remaining"),
4053	n, tot, tot - n, perc);
4054	spell_message(spin, IObuff);
4055	}
4056	#ifdef SPELL_PRINTTREE
4057	spell_print_tree(root->wn_sibling);
4058	#endif
4059	hash_clear(&ht);
4060	}
4061	}
4062
4063	// Compress a node, its siblings and its children, depth first.
4064	// Returns the number of compressed nodes.
4065	static int
4066	node_compress (
4067	spellinfo_T *spin,
4068	wordnode_T *node,
4069	hashtab_T *ht,
4070	int tot // total count of nodes before compressing,*
4071	// incremented while going through the tree
4072	)
4073	{
4074	wordnode_T *np;
4075	wordnode_T *tp;
4076	wordnode_T *child;
4077	hash_T hash;
4078	hashitem_T *hi;
4079	int len = `0`;
4080	unsigned nr, n;
4081	int compressed = `0`;
4082
4083	// Go through the list of siblings. Compress each child and then try
4084	// finding an identical child to replace it.
4085	// Note that with "child" we mean not just the node that is pointed to,
4086	// but the whole list of siblings of which the child node is the first.
4087	for (np = node; np != NULL && !got_int; np = np->wn_sibling) {
4088	++len;
4089	if ((child = np->wn_child) != NULL) {
4090	// Compress the child first. This fills hashkey.
4091	compressed += node_compress(spin, child, ht, tot);
4092
4093	// Try to find an identical child.
4094	hash = hash_hash(child->wn_u1.hashkey);
4095	hi = hash_lookup(ht, (const char *)child->wn_u1.hashkey,
4096	STRLEN(child->wn_u1.hashkey), hash);
4097	if (!HASHITEM_EMPTY(hi)) {
4098	// There are children we encountered before with a hash value
4099	// identical to the current child. Now check if there is one
4100	// that is really identical.
4101	for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
4102	if (node_equal(child, tp)) {
4103	// Found one! Now use that child in place of the
4104	// current one. This means the current child and all
4105	// its siblings is unlinked from the tree.
4106	++tp->wn_refs;
4107	compressed += deref_wordnode(spin, child);
4108	np->wn_child = tp;
4109	break;
4110	}
4111	if (tp == NULL) {
4112	// No other child with this hash value equals the child of
4113	// the node, add it to the linked list after the first
4114	// item.
4115	tp = HI2WN(hi);
4116	child->wn_u2.next = tp->wn_u2.next;
4117	tp->wn_u2.next = child;
4118	}
4119	} else
4120	// No other child has this hash value, add it to the
4121	// hashtable.
4122	hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
4123	}
4124	}
4125	tot += len + `1`; // add one for the node that stores the length*
4126
4127	// Make a hash key for the node and its siblings, so that we can quickly
4128	// find a lookalike node. This must be done after compressing the sibling
4129	// list, otherwise the hash key would become invalid by the compression.
4130	node->wn_u1.hashkey[`0`] = len;
4131	nr = `0`;
4132	for (np = node; np != NULL; np = np->wn_sibling) {
4133	if (np->wn_byte == NUL)
4134	// end node: use wn_flags, wn_region and wn_affixID
4135	n = np->wn_flags + (np->wn_region << `8`) + (np->wn_affixID << `16`);
4136	else
4137	// byte node: use the byte value and the child pointer
4138	n = (unsigned)(np->wn_byte + ((uintptr_t)np->wn_child << `8`));
4139	nr = nr * `101` + n;
4140	}
4141
4142	// Avoid NUL bytes, it terminates the hash key.
4143	n = nr & `0xff`;
4144	node->wn_u1.hashkey[`1`] = n == `0` ? `1` : n;
4145	n = (nr >> `8`) & `0xff`;
4146	node->wn_u1.hashkey[`2`] = n == `0` ? `1` : n;
4147	n = (nr >> `16`) & `0xff`;
4148	node->wn_u1.hashkey[`3`] = n == `0` ? `1` : n;
4149	n = (nr >> `24`) & `0xff`;
4150	node->wn_u1.hashkey[`4`] = n == `0` ? `1` : n;
4151	node->wn_u1.hashkey[`5`] = NUL;
4152
4153	// Check for CTRL-C pressed now and then.
4154	fast_breakcheck();
4155
4156	return compressed;
4157	}
4158
4159	// Returns true when two nodes have identical siblings and children.
4160	static bool node_equal(wordnode_T n1, wordnode_T n2)
4161	{
4162	wordnode_T *p1;
4163	wordnode_T *p2;
4164
4165	for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
4166	p1 = p1->wn_sibling, p2 = p2->wn_sibling)
4167	if (p1->wn_byte != p2->wn_byte
4168	\|\| (p1->wn_byte == NUL
4169	? (p1->wn_flags != p2->wn_flags
4170	\|\| p1->wn_region != p2->wn_region
4171	\|\| p1->wn_affixID != p2->wn_affixID)
4172	: (p1->wn_child != p2->wn_child)))
4173	break;
4174
4175	return p1 == NULL && p2 == NULL;
4176	}
4177
4178
4179	// Function given to qsort() to sort the REP items on "from" string.
4180	static int rep_compare(const void s1, const* void *s2)
4181	{
4182	fromto_T p1 = (fromto_T )s1;
4183	fromto_T p2 = (fromto_T )s2;
4184
4185	return STRCMP(p1->ft_from, p2->ft_from);
4186	}
4187
4188	// Write the Vim .spl file "fname".
4189	// Return OK/FAIL.
4190	static int write_vim_spell(spellinfo_T spin, char_u fname)
4191	{
4192	int retval = OK;
4193	int regionmask;
4194
4195	FILE fd = os_fopen((char* *)fname, "w");
4196	if (fd == NULL) {
4197	EMSG2(_(e_notopen), fname);
4198	return FAIL;
4199	}
4200
4201	// <HEADER>: <fileID> <versionnr>
4202	// <fileID>
4203	size_t fwv = fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, `1`, fd);
4204	if (fwv != (size_t)`1`)
4205	// Catch first write error, don't try writing more.
4206	goto theend;
4207
4208	putc(VIMSPELLVERSION, fd); // <versionnr>
4209
4210	// <SECTIONS>: <section> ... <sectionend>
4211
4212	// SN_INFO: <infotext>
4213	if (spin->si_info != NULL) {
4214	putc(SN_INFO, fd); // <sectionID>
4215	putc(`0`, fd); // <sectionflags>
4216	size_t i = STRLEN(spin->si_info);
4217	put_bytes(fd, i, `4`); // <sectionlen>
4218	fwv &= fwrite(spin->si_info, i, `1`, fd); // <infotext>
4219	}
4220
4221	// SN_REGION: <regionname> ...
4222	// Write the region names only if there is more than one.
4223	if (spin->si_region_count > `1`) {
4224	putc(SN_REGION, fd); // <sectionID>
4225	putc(SNF_REQUIRED, fd); // <sectionflags>
4226	size_t l = (size_t)spin->si_region_count * `2`;
4227	put_bytes(fd, l, `4`); // <sectionlen>
4228	fwv &= fwrite(spin->si_region_name, l, `1`, fd);
4229	// <regionname> ...
4230	regionmask = (`1` << spin->si_region_count) - `1`;
4231	} else
4232	regionmask = `0`;
4233
4234	// SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
4235	//
4236	// The table with character flags and the table for case folding.
4237	// This makes sure the same characters are recognized as word characters
4238	// when generating an when using a spell file.
4239	// Skip this for ASCII, the table may conflict with the one used for
4240	// 'encoding'.
4241	// Also skip this for an .add.spl file, the main spell file must contain
4242	// the table (avoids that it conflicts). File is shorter too.
4243	if (!spin->si_ascii && !spin->si_add) {
4244	char_u folchars[`128` * `8`];
4245	int flags;
4246
4247	putc(SN_CHARFLAGS, fd); // <sectionID>
4248	putc(SNF_REQUIRED, fd); // <sectionflags>
4249
4250	// Form the <folchars> string first, we need to know its length.
4251	size_t l = `0`;
4252	for (size_t i = `128`; i < `256`; i++) {
4253	l += (size_t)utf_char2bytes(spelltab.st_fold[i], folchars + l);
4254	}
4255	put_bytes(fd, `1` + `128` + `2` + l, `4`); // <sectionlen>
4256
4257	fputc(`128`, fd); // <charflagslen>
4258	for (size_t i = `128`; i < `256`; ++i) {
4259	flags = `0`;
4260	if (spelltab.st_isw[i])
4261	flags \|= CF_WORD;
4262	if (spelltab.st_isu[i])
4263	flags \|= CF_UPPER;
4264	fputc(flags, fd); // <charflags>
4265	}
4266
4267	put_bytes(fd, l, `2`); // <folcharslen>
4268	fwv &= fwrite(folchars, l, `1`, fd); // <folchars>
4269	}
4270
4271	// SN_MIDWORD: <midword>
4272	if (spin->si_midword != NULL) {
4273	putc(SN_MIDWORD, fd); // <sectionID>
4274	putc(SNF_REQUIRED, fd); // <sectionflags>
4275
4276	size_t i = STRLEN(spin->si_midword);
4277	put_bytes(fd, i, `4`); // <sectionlen>
4278	fwv &= fwrite(spin->si_midword, i, `1`, fd);
4279	// <midword>
4280	}
4281
4282	// SN_PREFCOND: <prefcondcnt> <prefcond> ...
4283	if (!GA_EMPTY(&spin->si_prefcond)) {
4284	putc(SN_PREFCOND, fd); // <sectionID>
4285	putc(SNF_REQUIRED, fd); // <sectionflags>
4286
4287	size_t l = (size_t)write_spell_prefcond(NULL, &spin->si_prefcond);
4288	put_bytes(fd, l, `4`); // <sectionlen>
4289
4290	write_spell_prefcond(fd, &spin->si_prefcond);
4291	}
4292
4293	// SN_REP: <repcount> <rep> ...
4294	// SN_SAL: <salflags> <salcount> <sal> ...
4295	// SN_REPSAL: <repcount> <rep> ...
4296
4297	// round 1: SN_REP section
4298	// round 2: SN_SAL section (unless SN_SOFO is used)
4299	// round 3: SN_REPSAL section
4300	for (unsigned int round = `1`; round <= `3`; ++round) {
4301	garray_T *gap;
4302	if (round == `1`)
4303	gap = &spin->si_rep;
4304	else if (round == `2`) {
4305	// Don't write SN_SAL when using a SN_SOFO section
4306	if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
4307	continue;
4308	gap = &spin->si_sal;
4309	} else
4310	gap = &spin->si_repsal;
4311
4312	// Don't write the section if there are no items.
4313	if (GA_EMPTY(gap))
4314	continue;
4315
4316	// Sort the REP/REPSAL items.
4317	if (round != `2`)
4318	qsort(gap->ga_data, (size_t)gap->ga_len,
4319	sizeof(fromto_T), rep_compare);
4320
4321	int sect_id = round == `1` ? SN_REP : (round == `2` ? SN_SAL : SN_REPSAL);
4322	putc(sect_id, fd); // <sectionID>
4323
4324	// This is for making suggestions, section is not required.
4325	putc(`0`, fd); // <sectionflags>
4326
4327	// Compute the length of what follows.
4328	size_t l = `2`; // count <repcount> or <salcount>
4329	assert(gap->ga_len >= `0`);
4330	for (size_t i = `0`; i < (size_t)gap->ga_len; ++i) {
4331	fromto_T ftp = &((fromto_T )gap->ga_data)[i];
4332	l += `1` + STRLEN(ftp->ft_from); // count <fromlen> and <from>
4333	l += `1` + STRLEN(ftp->ft_to); // count <tolen> and <to>
4334	}
4335	if (round == `2`)
4336	++l; // count <salflags>
4337	put_bytes(fd, l, `4`); // <sectionlen>
4338
4339	if (round == `2`) {
4340	int i = `0`;
4341	if (spin->si_followup)
4342	i \|= SAL_F0LLOWUP;
4343	if (spin->si_collapse)
4344	i \|= SAL_COLLAPSE;
4345	if (spin->si_rem_accents)
4346	i \|= SAL_REM_ACCENTS;
4347	putc(i, fd); // <salflags>
4348	}
4349
4350	put_bytes(fd, (uintmax_t)gap->ga_len, `2`); // <repcount> or <salcount>
4351	for (size_t i = `0`; i < (size_t)gap->ga_len; ++i) {
4352	// <rep> : <repfromlen> <repfrom> <reptolen> <repto>
4353	// <sal> : <salfromlen> <salfrom> <saltolen> <salto>
4354	fromto_T ftp = &((fromto_T )gap->ga_data)[i];
4355	for (unsigned int rr = `1`; rr <= `2`; ++rr) {
4356	char_u *p = rr == `1` ? ftp->ft_from : ftp->ft_to;
4357	l = STRLEN(p);
4358	assert(l < INT_MAX);
4359	putc((int)l, fd);
4360	if (l > `0`)
4361	fwv &= fwrite(p, l, `1`, fd);
4362	}
4363	}
4364
4365	}
4366
4367	// SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
4368	// This is for making suggestions, section is not required.
4369	if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) {
4370	putc(SN_SOFO, fd); // <sectionID>
4371	putc(`0`, fd); // <sectionflags>
4372
4373	size_t l = STRLEN(spin->si_sofofr);
4374	put_bytes(fd, l + STRLEN(spin->si_sofoto) + `4`, `4`); // <sectionlen>
4375
4376	put_bytes(fd, l, `2`); // <sofofromlen>
4377	fwv &= fwrite(spin->si_sofofr, l, `1`, fd); // <sofofrom>
4378
4379	l = STRLEN(spin->si_sofoto);
4380	put_bytes(fd, l, `2`); // <sofotolen>
4381	fwv &= fwrite(spin->si_sofoto, l, `1`, fd); // <sofoto>
4382	}
4383
4384	// SN_WORDS: <word> ...
4385	// This is for making suggestions, section is not required.
4386	if (spin->si_commonwords.ht_used > `0`) {
4387	putc(SN_WORDS, fd); // <sectionID>
4388	putc(`0`, fd); // <sectionflags>
4389
4390	// round 1: count the bytes
4391	// round 2: write the bytes
4392	for (unsigned int round = `1`; round <= `2`; ++round) {
4393	size_t todo;
4394	size_t len = `0`;
4395	hashitem_T *hi;
4396
4397	todo = spin->si_commonwords.ht_used;
4398	for (hi = spin->si_commonwords.ht_array; todo > `0`; ++hi)
4399	if (!HASHITEM_EMPTY(hi)) {
4400	size_t l = STRLEN(hi->hi_key) + `1`;
4401	len += l;
4402	if (round == `2`) // <word>
4403	fwv &= fwrite(hi->hi_key, l, `1`, fd);
4404	--todo;
4405	}
4406	if (round == `1`)
4407	put_bytes(fd, len, `4`); // <sectionlen>
4408	}
4409	}
4410
4411	// SN_MAP: <mapstr>
4412	// This is for making suggestions, section is not required.
4413	if (!GA_EMPTY(&spin->si_map)) {
4414	putc(SN_MAP, fd); // <sectionID>
4415	putc(`0`, fd); // <sectionflags>
4416	size_t l = (size_t)spin->si_map.ga_len;
4417	put_bytes(fd, l, `4`); // <sectionlen>
4418	fwv &= fwrite(spin->si_map.ga_data, l, `1`, fd); // <mapstr>
4419	}
4420
4421	// SN_SUGFILE: <timestamp>
4422	// This is used to notify that a .sug file may be available and at the
4423	// same time allows for checking that a .sug file that is found matches
4424	// with this .spl file. That's because the word numbers must be exactly
4425	// right.
4426	if (!spin->si_nosugfile
4427	&& (!GA_EMPTY(&spin->si_sal)
4428	\|\| (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) {
4429	putc(SN_SUGFILE, fd); // <sectionID>
4430	putc(`0`, fd); // <sectionflags>
4431	put_bytes(fd, `8`, `4`); // <sectionlen>
4432
4433	// Set si_sugtime and write it to the file.
4434	spin->si_sugtime = time(NULL);
4435	put_time(fd, spin->si_sugtime); // <timestamp>
4436	}
4437
4438	// SN_NOSPLITSUGS: nothing
4439	// This is used to notify that no suggestions with word splits are to be
4440	// made.
4441	if (spin->si_nosplitsugs) {
4442	putc(SN_NOSPLITSUGS, fd); // <sectionID>
4443	putc(`0`, fd); // <sectionflags>
4444	put_bytes(fd, `0`, `4`); // <sectionlen>
4445	}
4446
4447	// SN_NOCOMPUNDSUGS: nothing
4448	// This is used to notify that no suggestions with compounds are to be
4449	// made.
4450	if (spin->si_nocompoundsugs) {
4451	putc(SN_NOCOMPOUNDSUGS, fd); // <sectionID>
4452	putc(`0`, fd); // <sectionflags>
4453	put_bytes(fd, `0`, `4`); // <sectionlen>
4454	}
4455
4456	// SN_COMPOUND: compound info.
4457	// We don't mark it required, when not supported all compound words will
4458	// be bad words.
4459	if (spin->si_compflags != NULL) {
4460	putc(SN_COMPOUND, fd); // <sectionID>
4461	putc(`0`, fd); // <sectionflags>
4462
4463	size_t l = STRLEN(spin->si_compflags);
4464	assert(spin->si_comppat.ga_len >= `0`);
4465	for (size_t i = `0`; i < (size_t)spin->si_comppat.ga_len; ++i) {
4466	l += STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + `1`;
4467	}
4468	put_bytes(fd, l + `7`, `4`); // <sectionlen>
4469
4470	putc(spin->si_compmax, fd); // <compmax>
4471	putc(spin->si_compminlen, fd); // <compminlen>
4472	putc(spin->si_compsylmax, fd); // <compsylmax>
4473	putc(`0`, fd); // for Vim 7.0b compatibility
4474	putc(spin->si_compoptions, fd); // <compoptions>
4475	put_bytes(fd, (uintmax_t)spin->si_comppat.ga_len, `2`); // <comppatcount>
4476	for (size_t i = `0`; i < (size_t)spin->si_comppat.ga_len; ++i) {
4477	char_u p = ((char_u *)(spin->si_comppat.ga_data))[i];
4478	assert(STRLEN(p) < INT_MAX);
4479	putc((int)STRLEN(p), fd); // <comppatlen>
4480	fwv &= fwrite(p, STRLEN(p), `1`, fd); // <comppattext>
4481	}
4482	// <compflags>
4483	fwv &= fwrite(spin->si_compflags, STRLEN(spin->si_compflags), `1`, fd);
4484	}
4485
4486	// SN_NOBREAK: NOBREAK flag
4487	if (spin->si_nobreak) {
4488	putc(SN_NOBREAK, fd); // <sectionID>
4489	putc(`0`, fd); // <sectionflags>
4490
4491	// It's empty, the presence of the section flags the feature.
4492	put_bytes(fd, `0`, `4`); // <sectionlen>
4493	}
4494
4495	// SN_SYLLABLE: syllable info.
4496	// We don't mark it required, when not supported syllables will not be
4497	// counted.
4498	if (spin->si_syllable != NULL) {
4499	putc(SN_SYLLABLE, fd); // <sectionID>
4500	putc(`0`, fd); // <sectionflags>
4501
4502	size_t l = STRLEN(spin->si_syllable);
4503	put_bytes(fd, l, `4`); // <sectionlen>
4504	fwv &= fwrite(spin->si_syllable, l, `1`, fd); // <syllable>
4505	}
4506
4507	// end of <SECTIONS>
4508	putc(SN_END, fd); // <sectionend>
4509
4510
4511	// <LWORDTREE> <KWORDTREE> <PREFIXTREE>
4512	spin->si_memtot = `0`;
4513	for (unsigned int round = `1`; round <= `3`; ++round) {
4514	wordnode_T *tree;
4515	if (round == `1`)
4516	tree = spin->si_foldroot->wn_sibling;
4517	else if (round == `2`)
4518	tree = spin->si_keeproot->wn_sibling;
4519	else
4520	tree = spin->si_prefroot->wn_sibling;
4521
4522	// Clear the index and wnode fields in the tree.
4523	clear_node(tree);
4524
4525	// Count the number of nodes. Needed to be able to allocate the
4526	// memory when reading the nodes. Also fills in index for shared
4527	// nodes.
4528	size_t nodecount = (size_t)put_node(NULL, tree, `0`, regionmask, round == `3`);
4529
4530	// number of nodes in 4 bytes
4531	put_bytes(fd, nodecount, `4`); // <nodecount>
4532	assert(nodecount + nodecount * sizeof(int) < INT_MAX);
4533	spin->si_memtot += (int)(nodecount + nodecount * sizeof(int));
4534
4535	// Write the nodes.
4536	(void)put_node(fd, tree, `0`, regionmask, round == `3`);
4537	}
4538
4539	// Write another byte to check for errors (file system full).
4540	if (putc(`0`, fd) == EOF)
4541	retval = FAIL;
4542	theend:
4543	if (fclose(fd) == EOF)
4544	retval = FAIL;
4545
4546	if (fwv != (size_t)`1`)
4547	retval = FAIL;
4548	if (retval == FAIL)
4549	EMSG(_(e_write));
4550
4551	return retval;
4552	}
4553
4554	// Clear the index and wnode fields of "node", it siblings and its
4555	// children. This is needed because they are a union with other items to save
4556	// space.
4557	static void clear_node(wordnode_T *node)
4558	{
4559	wordnode_T *np;
4560
4561	if (node != NULL)
4562	for (np = node; np != NULL; np = np->wn_sibling) {
4563	np->wn_u1.index = `0`;
4564	np->wn_u2.wnode = NULL;
4565
4566	if (np->wn_byte != NUL)
4567	clear_node(np->wn_child);
4568	}
4569	}
4570
4571
4572	// Dump a word tree at node "node".
4573	//
4574	// This first writes the list of possible bytes (siblings). Then for each
4575	// byte recursively write the children.
4576	//
4577	// NOTE: The code here must match the code in read_tree_node(), since
4578	// assumptions are made about the indexes (so that we don't have to write them
4579	// in the file).
4580	//
4581	// Returns the number of nodes used.
4582	static int
4583	put_node (
4584	FILE fd, // NULL when only counting*
4585	wordnode_T *node,
4586	int idx,
4587	int regionmask,
4588	bool prefixtree // true for PREFIXTREE
4589	)
4590	{
4591	// If "node" is zero the tree is empty.
4592	if (node == NULL)
4593	return `0`;
4594
4595	// Store the index where this node is written.
4596	node->wn_u1.index = idx;
4597
4598	// Count the number of siblings.
4599	int siblingcount = `0`;
4600	for (wordnode_T *np = node; np != NULL; np = np->wn_sibling)
4601	++siblingcount;
4602
4603	// Write the sibling count.
4604	if (fd != NULL)
4605	putc(siblingcount, fd); // <siblingcount>
4606
4607	// Write each sibling byte and optionally extra info.
4608	for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) {
4609	if (np->wn_byte == `0`) {
4610	if (fd != NULL) {
4611	// For a NUL byte (end of word) write the flags etc.
4612	if (prefixtree) {
4613	// In PREFIXTREE write the required affixID and the
4614	// associated condition nr (stored in wn_region). The
4615	// byte value is misused to store the "rare" and "not
4616	// combining" flags
4617	if (np->wn_flags == (uint16_t)PFX_FLAGS)
4618	putc(BY_NOFLAGS, fd); // <byte>
4619	else {
4620	putc(BY_FLAGS, fd); // <byte>
4621	putc(np->wn_flags, fd); // <pflags>
4622	}
4623	putc(np->wn_affixID, fd); // <affixID>
4624	put_bytes(fd, (uintmax_t)np->wn_region, `2`); // <prefcondnr>
4625	} else {
4626	// For word trees we write the flag/region items.
4627	int flags = np->wn_flags;
4628	if (regionmask != `0` && np->wn_region != regionmask)
4629	flags \|= WF_REGION;
4630	if (np->wn_affixID != `0`)
4631	flags \|= WF_AFX;
4632	if (flags == `0`) {
4633	// word without flags or region
4634	putc(BY_NOFLAGS, fd); // <byte>
4635	} else {
4636	if (np->wn_flags >= `0x100`) {
4637	putc(BY_FLAGS2, fd); // <byte>
4638	putc(flags, fd); // <flags>
4639	putc((int)((unsigned)flags >> `8`), fd); // <flags2>
4640	} else {
4641	putc(BY_FLAGS, fd); // <byte>
4642	putc(flags, fd); // <flags>
4643	}
4644	if (flags & WF_REGION)
4645	putc(np->wn_region, fd); // <region>
4646	if (flags & WF_AFX)
4647	putc(np->wn_affixID, fd); // <affixID>
4648	}
4649	}
4650	}
4651	} else {
4652	if (np->wn_child->wn_u1.index != `0`
4653	&& np->wn_child->wn_u2.wnode != node) {
4654	// The child is written elsewhere, write the reference.
4655	if (fd != NULL) {
4656	putc(BY_INDEX, fd); // <byte>
4657	put_bytes(fd, (uintmax_t)np->wn_child->wn_u1.index, `3`); // <nodeidx>
4658	}
4659	} else if (np->wn_child->wn_u2.wnode == NULL)
4660	// We will write the child below and give it an index.
4661	np->wn_child->wn_u2.wnode = node;
4662
4663	if (fd != NULL)
4664	if (putc(np->wn_byte, fd) == EOF) { // <byte> or <xbyte>
4665	EMSG(_(e_write));
4666	return `0`;
4667	}
4668	}
4669	}
4670
4671	// Space used in the array when reading: one for each sibling and one for
4672	// the count.
4673	int newindex = idx + siblingcount + `1`;
4674
4675	// Recursively dump the children of each sibling.
4676	for (wordnode_T *np = node; np != NULL; np = np->wn_sibling)
4677	if (np->wn_byte != `0` && np->wn_child->wn_u2.wnode == node)
4678	newindex = put_node(fd, np->wn_child, newindex, regionmask,
4679	prefixtree);
4680
4681	return newindex;
4682	}
4683
4684
4685	// ":mkspell [-ascii] outfile infile ..."
4686	// ":mkspell [-ascii] addfile"
4687	void ex_mkspell(exarg_T *eap)
4688	{
4689	int fcount;
4690	char_u **fnames;
4691	char_u *arg = eap->arg;
4692	bool ascii = false;
4693
4694	if (STRNCMP(arg, "-ascii", `6`) == `0`) {
4695	ascii = true;
4696	arg = skipwhite(arg + `6`);
4697	}
4698
4699	// Expand all the remaining arguments (e.g., $VIMRUNTIME).
4700	if (get_arglist_exp(arg, &fcount, &fnames, false) == OK) {
4701	mkspell(fcount, fnames, ascii, eap->forceit, false);
4702	FreeWild(fcount, fnames);
4703	}
4704	}
4705
4706	// Create the .sug file.
4707	// Uses the soundfold info in "spin".
4708	// Writes the file with the name "wfname", with ".spl" changed to ".sug".
4709	static void spell_make_sugfile(spellinfo_T spin, char_u wfname)
4710	{
4711	char_u *fname = NULL;
4712	int len;
4713	slang_T *slang;
4714	bool free_slang = false;
4715
4716	// Read back the .spl file that was written. This fills the required
4717	// info for soundfolding. This also uses less memory than the
4718	// pointer-linked version of the trie. And it avoids having two versions
4719	// of the code for the soundfolding stuff.
4720	// It might have been done already by spell_reload_one().
4721	for (slang = first_lang; slang != NULL; slang = slang->sl_next) {
4722	if (path_full_compare(wfname, slang->sl_fname, false) == kEqualFiles) {
4723	break;
4724	}
4725	}
4726	if (slang == NULL) {
4727	spell_message(spin, (char_u *)_("Reading back spell file..."));
4728	slang = spell_load_file(wfname, NULL, NULL, false);
4729	if (slang == NULL)
4730	return;
4731	free_slang = true;
4732	}
4733
4734	// Clear the info in "spin" that is used.
4735	spin->si_blocks = NULL;
4736	spin->si_blocks_cnt = `0`;
4737	spin->si_compress_cnt = `0`; // will stay at 0 all the time
4738	spin->si_free_count = `0`;
4739	spin->si_first_free = NULL;
4740	spin->si_foldwcount = `0`;
4741
4742	// Go through the trie of good words, soundfold each word and add it to
4743	// the soundfold trie.
4744	spell_message(spin, (char_u *)_("Performing soundfolding..."));
4745	if (sug_filltree(spin, slang) == FAIL)
4746	goto theend;
4747
4748	// Create the table which links each soundfold word with a list of the
4749	// good words it may come from. Creates buffer "spin->si_spellbuf".
4750	// This also removes the wordnr from the NUL byte entries to make
4751	// compression possible.
4752	if (sug_maketable(spin) == FAIL)
4753	goto theend;
4754
4755	smsg(_("Number of words after soundfolding: %" PRId64),
4756	(int64_t)spin->si_spellbuf->b_ml.ml_line_count);
4757
4758	// Compress the soundfold trie.
4759	spell_message(spin, (char_u *)_(msg_compressing));
4760	wordtree_compress(spin, spin->si_foldroot);
4761
4762	// Write the .sug file.
4763	// Make the file name by changing ".spl" to ".sug".
4764	fname = xmalloc(MAXPATHL);
4765	STRLCPY(fname, wfname, MAXPATHL);
4766	len = (int)STRLEN(fname);
4767	fname[len - `2`] = `'u'`;
4768	fname[len - `1`] = `'g'`;
4769	sug_write(spin, fname);
4770
4771	theend:
4772	xfree(fname);
4773	if (free_slang)
4774	slang_free(slang);
4775	free_blocks(spin->si_blocks);
4776	close_spellbuf(spin->si_spellbuf);
4777	}
4778
4779	// Build the soundfold trie for language "slang".
4780	static int sug_filltree(spellinfo_T spin, slang_T slang)
4781	{
4782	char_u *byts;
4783	idx_T *idxs;
4784	int depth;
4785	idx_T arridx[MAXWLEN];
4786	int curi[MAXWLEN];
4787	char_u tword[MAXWLEN];
4788	char_u tsalword[MAXWLEN];
4789	int c;
4790	idx_T n;
4791	unsigned words_done = `0`;
4792	int wordcount[MAXWLEN];
4793
4794	// We use si_foldroot for the soundfolded trie.
4795	spin->si_foldroot = wordtree_alloc(spin);
4796
4797	// Let tree_add_word() know we're adding to the soundfolded tree
4798	spin->si_sugtree = true;
4799
4800	// Go through the whole case-folded tree, soundfold each word and put it
4801	// in the trie.
4802	byts = slang->sl_fbyts;
4803	idxs = slang->sl_fidxs;
4804
4805	arridx[`0`] = `0`;
4806	curi[`0`] = `1`;
4807	wordcount[`0`] = `0`;
4808
4809	depth = `0`;
4810	while (depth >= `0` && !got_int) {
4811	if (curi[depth] > byts[arridx[depth]]) {
4812	// Done all bytes at this node, go up one level.
4813	idxs[arridx[depth]] = wordcount[depth];
4814	if (depth > `0`)
4815	wordcount[depth - `1`] += wordcount[depth];
4816
4817	--depth;
4818	line_breakcheck();
4819	} else {
4820
4821	// Do one more byte at this node.
4822	n = arridx[depth] + curi[depth];
4823	++curi[depth];
4824
4825	c = byts[n];
4826	if (c == `0`) {
4827	// Sound-fold the word.
4828	tword[depth] = NUL;
4829	spell_soundfold(slang, tword, true, tsalword);
4830
4831	// We use the "flags" field for the MSB of the wordnr,
4832	// "region" for the LSB of the wordnr.
4833	if (tree_add_word(spin, tsalword, spin->si_foldroot,
4834	words_done >> `16`, words_done & `0xffff`,
4835	`0`) == FAIL)
4836	return FAIL;
4837
4838	++words_done;
4839	++wordcount[depth];
4840
4841	// Reset the block count each time to avoid compression
4842	// kicking in.
4843	spin->si_blocks_cnt = `0`;
4844
4845	// Skip over any other NUL bytes (same word with different
4846	// flags).
4847	while (byts[n + `1`] == `0`) {
4848	++n;
4849	++curi[depth];
4850	}
4851	} else {
4852	// Normal char, go one level deeper.
4853	tword[depth++] = c;
4854	arridx[depth] = idxs[n];
4855	curi[depth] = `1`;
4856	wordcount[depth] = `0`;
4857	}
4858	}
4859	}
4860
4861	smsg(_("Total number of words: %d"), words_done);
4862
4863	return OK;
4864	}
4865
4866	// Make the table that links each word in the soundfold trie to the words it
4867	// can be produced from.
4868	// This is not unlike lines in a file, thus use a memfile to be able to access
4869	// the table efficiently.
4870	// Returns FAIL when out of memory.
4871	static int sug_maketable(spellinfo_T *spin)
4872	{
4873	garray_T ga;
4874	int res = OK;
4875
4876	// Allocate a buffer, open a memline for it and create the swap file
4877	// (uses a temp file, not a .swp file).
4878	spin->si_spellbuf = open_spellbuf();
4879
4880	// Use a buffer to store the line info, avoids allocating many small
4881	// pieces of memory.
4882	ga_init(&ga, `1`, `100`);
4883
4884	// recursively go through the tree
4885	if (sug_filltable(spin, spin->si_foldroot->wn_sibling, `0`, &ga) == -`1`)
4886	res = FAIL;
4887
4888	ga_clear(&ga);
4889	return res;
4890	}
4891
4892	// Fill the table for one node and its children.
4893	// Returns the wordnr at the start of the node.
4894	// Returns -1 when out of memory.
4895	static int
4896	sug_filltable (
4897	spellinfo_T *spin,
4898	wordnode_T *node,
4899	int startwordnr,
4900	garray_T gap // place to store line of numbers*
4901	)
4902	{
4903	wordnode_T p, np;
4904	int wordnr = startwordnr;
4905	int nr;
4906	int prev_nr;
4907
4908	for (p = node; p != NULL; p = p->wn_sibling) {
4909	if (p->wn_byte == NUL) {
4910	gap->ga_len = `0`;
4911	prev_nr = `0`;
4912	for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) {
4913	ga_grow(gap, `10`);
4914
4915	nr = (np->wn_flags << `16`) + (np->wn_region & `0xffff`);
4916	// Compute the offset from the previous nr and store the
4917	// offset in a way that it takes a minimum number of bytes.
4918	// It's a bit like utf-8, but without the need to mark
4919	// following bytes.
4920	nr -= prev_nr;
4921	prev_nr += nr;
4922	gap->ga_len += offset2bytes(nr,
4923	(char_u *)gap->ga_data + gap->ga_len);
4924	}
4925
4926	// add the NUL byte
4927	((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
4928
4929	if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
4930	gap->ga_data, gap->ga_len, true) == FAIL) {
4931	return -`1`;
4932	}
4933	wordnr++;
4934
4935	// Remove extra NUL entries, we no longer need them. We don't
4936	// bother freeing the nodes, the won't be reused anyway.
4937	while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
4938	p->wn_sibling = p->wn_sibling->wn_sibling;
4939
4940	// Clear the flags on the remaining NUL node, so that compression
4941	// works a lot better.
4942	p->wn_flags = `0`;
4943	p->wn_region = `0`;
4944	} else {
4945	wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
4946	if (wordnr == -`1`)
4947	return -`1`;
4948	}
4949	}
4950	return wordnr;
4951	}
4952
4953	// Convert an offset into a minimal number of bytes.
4954	// Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
4955	// bytes.
4956	static int offset2bytes(int nr, char_u *buf)
4957	{
4958	int rem;
4959	int b1, b2, b3, b4;
4960
4961	// Split the number in parts of base 255. We need to avoid NUL bytes.
4962	b1 = nr % `255` + `1`;
4963	rem = nr / `255`;
4964	b2 = rem % `255` + `1`;
4965	rem = rem / `255`;
4966	b3 = rem % `255` + `1`;
4967	b4 = rem / `255` + `1`;
4968
4969	if (b4 > `1` \|\| b3 > `0x1f`) { // 4 bytes
4970	buf[`0`] = `0xe0` + b4;
4971	buf[`1`] = b3;
4972	buf[`2`] = b2;
4973	buf[`3`] = b1;
4974	return `4`;
4975	}
4976	if (b3 > `1` \|\| b2 > `0x3f` ) { // 3 bytes
4977	buf[`0`] = `0xc0` + b3;
4978	buf[`1`] = b2;
4979	buf[`2`] = b1;
4980	return `3`;
4981	}
4982	if (b2 > `1` \|\| b1 > `0x7f` ) { // 2 bytes
4983	buf[`0`] = `0x80` + b2;
4984	buf[`1`] = b1;
4985	return `2`;
4986	}
4987	// 1 byte
4988	buf[`0`] = b1;
4989	return `1`;
4990	}
4991
4992	// Write the .sug file in "fname".
4993	static void sug_write(spellinfo_T spin, char_u fname)
4994	{
4995	// Create the file. Note that an existing file is silently overwritten!
4996	FILE fd = os_fopen((char* *)fname, "w");
4997	if (fd == NULL) {
4998	EMSG2(_(e_notopen), fname);
4999	return;
5000	}
5001
5002	vim_snprintf((char *)IObuff, IOSIZE,
5003	_("Writing suggestion file %s..."), fname);
5004	spell_message(spin, IObuff);
5005
5006	// <SUGHEADER>: <fileID> <versionnr> <timestamp>
5007	if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)`1`, fd) != `1`) { // <fileID>
5008	EMSG(_(e_write));
5009	goto theend;
5010	}
5011	putc(VIMSUGVERSION, fd); // <versionnr>
5012
5013	// Write si_sugtime to the file.
5014	put_time(fd, spin->si_sugtime); // <timestamp>
5015
5016	// <SUGWORDTREE>
5017	spin->si_memtot = `0`;
5018	wordnode_T *tree = spin->si_foldroot->wn_sibling;
5019
5020	// Clear the index and wnode fields in the tree.
5021	clear_node(tree);
5022
5023	// Count the number of nodes. Needed to be able to allocate the
5024	// memory when reading the nodes. Also fills in index for shared
5025	// nodes.
5026	size_t nodecount = (size_t)put_node(NULL, tree, `0`, `0`, false);
5027
5028	// number of nodes in 4 bytes
5029	put_bytes(fd, nodecount, `4`); // <nodecount>
5030	assert(nodecount + nodecount * sizeof(int) < INT_MAX);
5031	spin->si_memtot += (int)(nodecount + nodecount * sizeof(int));
5032
5033	// Write the nodes.
5034	(void)put_node(fd, tree, `0`, `0`, false);
5035
5036	// <SUGTABLE>: <sugwcount> <sugline> ...
5037	linenr_T wcount = spin->si_spellbuf->b_ml.ml_line_count;
5038	assert(wcount >= `0`);
5039	put_bytes(fd, (uintmax_t)wcount, `4`); // <sugwcount>
5040
5041	for (linenr_T lnum = `1`; lnum <= wcount; ++lnum) {
5042	// <sugline>: <sugnr> ... NUL
5043	char_u *line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
5044	size_t len = STRLEN(line) + `1`;
5045	if (fwrite(line, len, `1`, fd) == `0`) {
5046	EMSG(_(e_write));
5047	goto theend;
5048	}
5049	assert((size_t)spin->si_memtot + len <= INT_MAX);
5050	spin->si_memtot += (int)len;
5051	}
5052
5053	// Write another byte to check for errors.
5054	if (putc(`0`, fd) == EOF)
5055	EMSG(_(e_write));
5056
5057	vim_snprintf((char *)IObuff, IOSIZE,
5058	_("Estimated runtime memory use: %d bytes"), spin->si_memtot);
5059	spell_message(spin, IObuff);
5060
5061	theend:
5062	// close the file
5063	fclose(fd);
5064	}
5065
5066
5067	// Create a Vim spell file from one or more word lists.
5068	// "fnames[0]" is the output file name.
5069	// "fnames[fcount - 1]" is the last input file name.
5070	// Exception: when "fnames[0]" ends in ".add" it's used as the input file name
5071	// and ".spl" is appended to make the output file name.
5072	static void
5073	mkspell (
5074	int fcount,
5075	char_u **fnames,
5076	bool ascii, // -ascii argument given
5077	bool over_write, // overwrite existing output file
5078	bool added_word // invoked through "zg"
5079	)
5080	{
5081	char_u *fname = NULL;
5082	char_u *wfname;
5083	char_u **innames;
5084	int incount;
5085	afffile_T *(afile[MAXREGIONS]);
5086	int i;
5087	int len;
5088	bool error = false;
5089	spellinfo_T spin;
5090
5091	memset(&spin, `0`, sizeof(spin));
5092	spin.si_verbose = !added_word;
5093	spin.si_ascii = ascii;
5094	spin.si_followup = true;
5095	spin.si_rem_accents = true;
5096	ga_init(&spin.si_rep, (int)sizeof(fromto_T), `20`);
5097	ga_init(&spin.si_repsal, (int)sizeof(fromto_T), `20`);
5098	ga_init(&spin.si_sal, (int)sizeof(fromto_T), `20`);
5099	ga_init(&spin.si_map, (int)sizeof(char_u), `100`);
5100	ga_init(&spin.si_comppat, (int)sizeof(char_u *), `20`);
5101	ga_init(&spin.si_prefcond, (int)sizeof(char_u *), `50`);
5102	hash_init(&spin.si_commonwords);
5103	spin.si_newcompID = `127`; // start compound ID at first maximum
5104
5105	// default: fnames[0] is output file, following are input files
5106	innames = &fnames[`1`];
5107	incount = fcount - `1`;
5108
5109	wfname = xmalloc(MAXPATHL);
5110
5111	if (fcount >= `1`) {
5112	len = (int)STRLEN(fnames[`0`]);
5113	if (fcount == `1` && len > `4` && STRCMP(fnames[`0`] + len - `4`, ".add") == `0`) {
5114	// For ":mkspell path/en.latin1.add" output file is
5115	// "path/en.latin1.add.spl".
5116	innames = &fnames[`0`];
5117	incount = `1`;
5118	vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[`0`]);
5119	} else if (fcount == `1`) {
5120	// For ":mkspell path/vim" output file is "path/vim.latin1.spl".
5121	innames = &fnames[`0`];
5122	incount = `1`;
5123	vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
5124	fnames[`0`], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
5125	} else if (len > `4` && STRCMP(fnames[`0`] + len - `4`, ".spl") == `0`) {
5126	// Name ends in ".spl", use as the file name.
5127	STRLCPY(wfname, fnames[`0`], MAXPATHL);
5128	} else
5129	// Name should be language, make the file name from it.
5130	vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
5131	fnames[`0`], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
5132
5133	// Check for .ascii.spl.
5134	if (strstr((char *)path_tail(wfname), SPL_FNAME_ASCII) != NULL)
5135	spin.si_ascii = true;
5136
5137	// Check for .add.spl.
5138	if (strstr((char *)path_tail(wfname), SPL_FNAME_ADD) != NULL)
5139	spin.si_add = true;
5140	}
5141
5142	if (incount <= `0`) {
5143	EMSG(_(e_invarg)); // need at least output and input names
5144	} else if (vim_strchr(path_tail(wfname), `'_'`) != NULL) {
5145	EMSG(_("E751: Output file name must not have region name"));
5146	} else if (incount > MAXREGIONS) {
5147	emsgf(_("E754: Only up to %d regions supported"), MAXREGIONS);
5148	} else {
5149	// Check for overwriting before doing things that may take a lot of
5150	// time.
5151	if (!over_write && os_path_exists(wfname)) {
5152	EMSG(_(e_exists));
5153	goto theend;
5154	}
5155	if (os_isdir(wfname)) {
5156	EMSG2(_(e_isadir2), wfname);
5157	goto theend;
5158	}
5159
5160	fname = xmalloc(MAXPATHL);
5161
5162	// Init the aff and dic pointers.
5163	// Get the region names if there are more than 2 arguments.
5164	for (i = `0`; i < incount; ++i) {
5165	afile[i] = NULL;
5166
5167	if (incount > `1`) {
5168	len = (int)STRLEN(innames[i]);
5169	if (STRLEN(path_tail(innames[i])) < `5`
5170	\|\| innames[i][len - `3`] != `'_'`) {
5171	EMSG2(_("E755: Invalid region in %s"), innames[i]);
5172	goto theend;
5173	}
5174	spin.si_region_name[i * `2`] = TOLOWER_ASC(innames[i][len - `2`]);
5175	spin.si_region_name[i * `2` + `1`] =
5176	TOLOWER_ASC(innames[i][len - `1`]);
5177	}
5178	}
5179	spin.si_region_count = incount;
5180
5181	spin.si_foldroot = wordtree_alloc(&spin);
5182	spin.si_keeproot = wordtree_alloc(&spin);
5183	spin.si_prefroot = wordtree_alloc(&spin);
5184
5185	// When not producing a .add.spl file clear the character table when
5186	// we encounter one in the .aff file. This means we dump the current
5187	// one in the .spl file if the .aff file doesn't define one. That's
5188	// better than guessing the contents, the table will match a
5189	// previously loaded spell file.
5190	if (!spin.si_add)
5191	spin.si_clear_chartab = true;
5192
5193	// Read all the .aff and .dic files.
5194	// Text is converted to 'encoding'.
5195	// Words are stored in the case-folded and keep-case trees.
5196	for (i = `0`; i < incount && !error; ++i) {
5197	spin.si_conv.vc_type = CONV_NONE;
5198	spin.si_region = `1` << i;
5199
5200	vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]);
5201	if (os_path_exists(fname)) {
5202	// Read the .aff file. Will init "spin->si_conv" based on the
5203	// "SET" line.
5204	afile[i] = spell_read_aff(&spin, fname);
5205	if (afile[i] == NULL)
5206	error = true;
5207	else {
5208	// Read the .dic file and store the words in the trees.
5209	vim_snprintf((char *)fname, MAXPATHL, "%s.dic",
5210	innames[i]);
5211	if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
5212	error = true;
5213	}
5214	} else {
5215	// No .aff file, try reading the file as a word list. Store
5216	// the words in the trees.
5217	if (spell_read_wordfile(&spin, innames[i]) == FAIL)
5218	error = true;
5219	}
5220
5221	// Free any conversion stuff.
5222	convert_setup(&spin.si_conv, NULL, NULL);
5223	}
5224
5225	if (spin.si_compflags != NULL && spin.si_nobreak)
5226	MSG(_("Warning: both compounding and NOBREAK specified"));
5227
5228	if (!error && !got_int) {
5229	// Combine tails in the tree.
5230	spell_message(&spin, (char_u *)_(msg_compressing));
5231	wordtree_compress(&spin, spin.si_foldroot);
5232	wordtree_compress(&spin, spin.si_keeproot);
5233	wordtree_compress(&spin, spin.si_prefroot);
5234	}
5235
5236	if (!error && !got_int) {
5237	// Write the info in the spell file.
5238	vim_snprintf((char *)IObuff, IOSIZE,
5239	_("Writing spell file %s..."), wfname);
5240	spell_message(&spin, IObuff);
5241
5242	error = write_vim_spell(&spin, wfname) == FAIL;
5243
5244	spell_message(&spin, (char_u *)_("Done!"));
5245	vim_snprintf((char *)IObuff, IOSIZE,
5246	_("Estimated runtime memory use: %d bytes"), spin.si_memtot);
5247	spell_message(&spin, IObuff);
5248
5249	// If the file is loaded need to reload it.
5250	if (!error)
5251	spell_reload_one(wfname, added_word);
5252	}
5253
5254	// Free the allocated memory.
5255	ga_clear(&spin.si_rep);
5256	ga_clear(&spin.si_repsal);
5257	ga_clear(&spin.si_sal);
5258	ga_clear(&spin.si_map);
5259	ga_clear(&spin.si_comppat);
5260	ga_clear(&spin.si_prefcond);
5261	hash_clear_all(&spin.si_commonwords, `0`);
5262
5263	// Free the .aff file structures.
5264	for (i = `0`; i < incount; ++i)
5265	if (afile[i] != NULL)
5266	spell_free_aff(afile[i]);
5267
5268	// Free all the bits and pieces at once.
5269	free_blocks(spin.si_blocks);
5270
5271	// If there is soundfolding info and no NOSUGFILE item create the
5272	// .sug file with the soundfolded word trie.
5273	if (spin.si_sugtime != `0` && !error && !got_int)
5274	spell_make_sugfile(&spin, wfname);
5275
5276	}
5277
5278	theend:
5279	xfree(fname);
5280	xfree(wfname);
5281	}
5282
5283	// Display a message for spell file processing when 'verbose' is set or using
5284	// ":mkspell". "str" can be IObuff.
5285	static void spell_message(spellinfo_T spin, char_u str)
5286	{
5287	if (spin->si_verbose \|\| p_verbose > `2`) {
5288	if (!spin->si_verbose)
5289	verbose_enter();
5290	MSG(str);
5291	ui_flush();
5292	if (!spin->si_verbose)
5293	verbose_leave();
5294	}
5295	}
5296
5297	// ":[count]spellgood {word}"
5298	// ":[count]spellwrong {word}"
5299	// ":[count]spellundo {word}"
5300	void ex_spell(exarg_T *eap)
5301	{
5302	spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong,
5303	eap->forceit ? `0` : (int)eap->line2,
5304	eap->cmdidx == CMD_spellundo);
5305	}
5306
5307	// Add "word[len]" to 'spellfile' as a good or bad word.
5308	void
5309	spell_add_word (
5310	char_u *word,
5311	int len,
5312	int bad,
5313	int idx, // "zG" and "zW": zero, otherwise index in
5314	// 'spellfile'
5315	bool undo // true for "zug", "zuG", "zuw" and "zuW"
5316	)
5317	{
5318	FILE *fd = NULL;
5319	buf_T *buf = NULL;
5320	bool new_spf = false;
5321	char_u *fname;
5322	char_u *fnamebuf = NULL;
5323	char_u line[MAXWLEN * `2`];
5324	long fpos, fpos_next = `0`;
5325	int i;
5326	char_u *spf;
5327
5328	if (idx == `0`) { // use internal wordlist
5329	if (int_wordlist == NULL) {
5330	int_wordlist = vim_tempname();
5331	if (int_wordlist == NULL)
5332	return;
5333	}
5334	fname = int_wordlist;
5335	} else {
5336	// If 'spellfile' isn't set figure out a good default value.
5337	if (*curwin->w_s->b_p_spf == NUL) {
5338	init_spellfile();
5339	new_spf = true;
5340	}
5341
5342	if (*curwin->w_s->b_p_spf == NUL) {
5343	EMSG2(_(e_notset), "spellfile");
5344	return;
5345	}
5346	fnamebuf = xmalloc(MAXPATHL);
5347
5348	for (spf = curwin->w_s->b_p_spf, i = `1`; *spf != NUL; ++i) {
5349	copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
5350	if (i == idx)
5351	break;
5352	if (*spf == NUL) {
5353	EMSGN(_("E765: 'spellfile' does not have %" PRId64 " entries"), idx);
5354	xfree(fnamebuf);
5355	return;
5356	}
5357	}
5358
5359	// Check that the user isn't editing the .add file somewhere.
5360	buf = buflist_findname_exp(fnamebuf);
5361	if (buf != NULL && buf->b_ml.ml_mfp == NULL)
5362	buf = NULL;
5363	if (buf != NULL && bufIsChanged(buf)) {
5364	EMSG(_(e_bufloaded));
5365	xfree(fnamebuf);
5366	return;
5367	}
5368
5369	fname = fnamebuf;
5370	}
5371
5372	if (bad \|\| undo) {
5373	// When the word appears as good word we need to remove that one,
5374	// since its flags sort before the one with WF_BANNED.
5375	fd = os_fopen((char *)fname, "r");
5376	if (fd != NULL) {
5377	while (!vim_fgets(line, MAXWLEN * `2`, fd)) {
5378	fpos = fpos_next;
5379	fpos_next = ftell(fd);
5380	if (STRNCMP(word, line, len) == `0`
5381	&& (line[len] == `'/'` \|\| line[len] < `' '`)) {
5382	// Found duplicate word. Remove it by writing a '#' at
5383	// the start of the line. Mixing reading and writing
5384	// doesn't work for all systems, close the file first.
5385	fclose(fd);
5386	fd = os_fopen((char *)fname, "r+");
5387	if (fd == NULL) {
5388	break;
5389	}
5390	if (fseek(fd, fpos, SEEK_SET) == `0`) {
5391	fputc(`'#'`, fd);
5392	if (undo) {
5393	home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
5394	smsg(_("Word '%.*s' removed from %s"),
5395	len, word, NameBuff);
5396	}
5397	}
5398	if (fseek(fd, fpos_next, SEEK_SET) <= `0`) {
5399	break;
5400	}
5401	}
5402	}
5403	if (fd != NULL)
5404	fclose(fd);
5405	}
5406	}
5407
5408	if (!undo) {
5409	fd = os_fopen((char *)fname, "a");
5410	if (fd == NULL && new_spf) {
5411	char_u *p;
5412
5413	// We just initialized the 'spellfile' option and can't open the
5414	// file. We may need to create the "spell" directory first. We
5415	// already checked the runtime directory is writable in
5416	// init_spellfile().
5417	if (!dir_of_file_exists(fname) && (p = path_tail_with_sep(fname)) != fname) {
5418	int c = *p;
5419
5420	// The directory doesn't exist. Try creating it and opening
5421	// the file again.
5422	*p = NUL;
5423	os_mkdir((char *)fname, `0755`);
5424	*p = c;
5425	fd = os_fopen((char *)fname, "a");
5426	}
5427	}
5428
5429	if (fd == NULL)
5430	EMSG2(_(e_notopen), fname);
5431	else {
5432	if (bad)
5433	fprintf(fd, "%.*s/!\n", len, word);
5434	else
5435	fprintf(fd, "%.*s\n", len, word);
5436	fclose(fd);
5437
5438	home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
5439	smsg(_("Word '%.*s' added to %s"), len, word, NameBuff);
5440	}
5441	}
5442
5443	if (fd != NULL) {
5444	// Update the .add.spl file.
5445	mkspell(`1`, &fname, false, true, true);
5446
5447	// If the .add file is edited somewhere, reload it.
5448	if (buf != NULL)
5449	buf_reload(buf, buf->b_orig_mode);
5450
5451	redraw_all_later(SOME_VALID);
5452	}
5453	xfree(fnamebuf);
5454	}
5455
5456	// Initialize 'spellfile' for the current buffer.
5457	static void init_spellfile(void)
5458	{
5459	char_u *buf;
5460	int l;
5461	char_u *fname;
5462	char_u *rtp;
5463	char_u *lend;
5464	bool aspath = false;
5465	char_u *lstart = curbuf->b_s.b_p_spl;
5466
5467	if (*curwin->w_s->b_p_spl != NUL && !GA_EMPTY(&curwin->w_s->b_langp)) {
5468	buf = xmalloc(MAXPATHL);
5469
5470	// Find the end of the language name. Exclude the region. If there
5471	// is a path separator remember the start of the tail.
5472	for (lend = curwin->w_s->b_p_spl; *lend != NUL
5473	&& vim_strchr((char_u )",._", lend) == NULL; ++lend)
5474	if (vim_ispathsep(*lend)) {
5475	aspath = true;
5476	lstart = lend + `1`;
5477	}
5478
5479	// Loop over all entries in 'runtimepath'. Use the first one where we
5480	// are allowed to write.
5481	rtp = p_rtp;
5482	while (*rtp != NUL) {
5483	if (aspath)
5484	// Use directory of an entry with path, e.g., for
5485	// "/dir/lg.utf-8.spl" use "/dir".
5486	STRLCPY(buf, curbuf->b_s.b_p_spl,
5487	lstart - curbuf->b_s.b_p_spl);
5488	else
5489	// Copy the path from 'runtimepath' to buf[].
5490	copy_option_part(&rtp, buf, MAXPATHL, ",");
5491	if (os_file_is_writable((char *)buf) == `2`) {
5492	// Use the first language name from 'spelllang' and the
5493	// encoding used in the first loaded .spl file.
5494	if (aspath)
5495	STRLCPY(buf, curbuf->b_s.b_p_spl,
5496	lend - curbuf->b_s.b_p_spl + `1`);
5497	else {
5498	// Create the "spell" directory if it doesn't exist yet.
5499	l = (int)STRLEN(buf);
5500	vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell");
5501	if (os_file_is_writable((char *)buf) != `2`) {
5502	os_mkdir((char *)buf, `0755`);
5503	}
5504
5505	l = (int)STRLEN(buf);
5506	vim_snprintf((char *)buf + l, MAXPATHL - l,
5507	"/%.s", (int*)(lend - lstart), lstart);
5508	}
5509	l = (int)STRLEN(buf);
5510	fname = LANGP_ENTRY(curwin->w_s->b_langp, `0`)
5511	->lp_slang->sl_fname;
5512	vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
5513	((fname != NULL
5514	&& strstr((char *)path_tail(fname), ".ascii.") != NULL)
5515	? "ascii"
5516	: (const char *)spell_enc()));
5517	set_option_value("spellfile", `0L`, (const char *)buf, OPT_LOCAL);
5518	break;
5519	}
5520	aspath = false;
5521	}
5522
5523	xfree(buf);
5524	}
5525	}
5526
5527	// Set the spell character tables from strings in the affix file.
5528	static int set_spell_chartab(char_u fol, char_u low, char_u *upp)
5529	{
5530	// We build the new tables here first, so that we can compare with the
5531	// previous one.
5532	spelltab_T new_st;
5533	char_u pf = fol, pl = low, *pu = upp;
5534	int f, l, u;
5535
5536	clear_spell_chartab(&new_st);
5537
5538	while (*pf != NUL) {
5539	if (pl == NUL \|\| pu == NUL) {
5540	EMSG(_(e_affform));
5541	return FAIL;
5542	}
5543	f = mb_ptr2char_adv((const char_u **)&pf);
5544	l = mb_ptr2char_adv((const char_u **)&pl);
5545	u = mb_ptr2char_adv((const char_u **)&pu);
5546	// Every character that appears is a word character.
5547	if (f < `256`)
5548	new_st.st_isw[f] = true;
5549	if (l < `256`)
5550	new_st.st_isw[l] = true;
5551	if (u < `256`)
5552	new_st.st_isw[u] = true;
5553
5554	// if "LOW" and "FOL" are not the same the "LOW" char needs
5555	// case-folding
5556	if (l < `256` && l != f) {
5557	if (f >= `256`) {
5558	EMSG(_(e_affrange));
5559	return FAIL;
5560	}
5561	new_st.st_fold[l] = f;
5562	}
5563
5564	// if "UPP" and "FOL" are not the same the "UPP" char needs
5565	// case-folding, it's upper case and the "UPP" is the upper case of
5566	// "FOL" .
5567	if (u < `256` && u != f) {
5568	if (f >= `256`) {
5569	EMSG(_(e_affrange));
5570	return FAIL;
5571	}
5572	new_st.st_fold[u] = f;
5573	new_st.st_isu[u] = true;
5574	new_st.st_upper[f] = u;
5575	}
5576	}
5577
5578	if (pl != NUL \|\| pu != NUL) {
5579	EMSG(_(e_affform));
5580	return FAIL;
5581	}
5582
5583	return set_spell_finish(&new_st);
5584	}
5585
5586	// Set the spell character tables from strings in the .spl file.
5587	static void
5588	set_spell_charflags (
5589	char_u *flags,
5590	int cnt, // length of "flags"
5591	char_u *fol
5592	)
5593	{
5594	// We build the new tables here first, so that we can compare with the
5595	// previous one.
5596	spelltab_T new_st;
5597	int i;
5598	char_u *p = fol;
5599	int c;
5600
5601	clear_spell_chartab(&new_st);
5602
5603	for (i = `0`; i < `128`; ++i) {
5604	if (i < cnt) {
5605	new_st.st_isw[i + `128`] = (flags[i] & CF_WORD) != `0`;
5606	new_st.st_isu[i + `128`] = (flags[i] & CF_UPPER) != `0`;
5607	}
5608
5609	if (*p != NUL) {
5610	c = mb_ptr2char_adv((const char_u **)&p);
5611	new_st.st_fold[i + `128`] = c;
5612	if (i + `128` != c && new_st.st_isu[i + `128`] && c < `256`)
5613	new_st.st_upper[c] = i + `128`;
5614	}
5615	}
5616
5617	(void)set_spell_finish(&new_st);
5618	}
5619
5620	static int set_spell_finish(spelltab_T *new_st)
5621	{
5622	int i;
5623
5624	if (did_set_spelltab) {
5625	// check that it's the same table
5626	for (i = `0`; i < `256`; ++i) {
5627	if (spelltab.st_isw[i] != new_st->st_isw[i]
5628	\|\| spelltab.st_isu[i] != new_st->st_isu[i]
5629	\|\| spelltab.st_fold[i] != new_st->st_fold[i]
5630	\|\| spelltab.st_upper[i] != new_st->st_upper[i]) {
5631	EMSG(_("E763: Word characters differ between spell files"));
5632	return FAIL;
5633	}
5634	}
5635	} else {
5636	// copy the new spelltab into the one being used
5637	spelltab = *new_st;
5638	did_set_spelltab = true;
5639	}
5640
5641	return OK;
5642	}
5643
5644	// Write the table with prefix conditions to the .spl file.
5645	// When "fd" is NULL only count the length of what is written.
5646	static int write_spell_prefcond(FILE fd, garray_T gap)
5647	{
5648	assert(gap->ga_len >= `0`);
5649
5650	if (fd != NULL)
5651	put_bytes(fd, (uintmax_t)gap->ga_len, `2`); // <prefcondcnt>
5652
5653	size_t totlen = `2` + (size_t)gap->ga_len; // <prefcondcnt> and <condlen> bytes
5654	size_t x = `1`; // collect return value of fwrite()
5655	for (int i = `0`; i < gap->ga_len; ++i) {
5656	// <prefcond> : <condlen> <condstr>
5657	char_u p = ((char_u *)gap->ga_data)[i];
5658	if (p != NULL) {
5659	size_t len = STRLEN(p);
5660	if (fd != NULL) {
5661	assert(len <= INT_MAX);
5662	fputc((int)len, fd);
5663	x &= fwrite(p, len, `1`, fd);
5664	}
5665	totlen += len;
5666	} else if (fd != NULL)
5667	fputc(`0`, fd);
5668	}
5669
5670	assert(totlen <= INT_MAX);
5671	return (int)totlen;
5672	}
5673
5674	// Use map string "map" for languages "lp".
5675	static void set_map_str(slang_T lp, char_u map)
5676	{
5677	char_u *p;
5678	int headc = `0`;
5679	int c;
5680	int i;
5681
5682	if (*map == NUL) {
5683	lp->sl_has_map = false;
5684	return;
5685	}
5686	lp->sl_has_map = true;
5687
5688	// Init the array and hash tables empty.
5689	for (i = `0`; i < `256`; ++i)
5690	lp->sl_map_array[i] = `0`;
5691	hash_init(&lp->sl_map_hash);
5692
5693	// The similar characters are stored separated with slashes:
5694	// "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
5695	// before the same slash. For characters above 255 sl_map_hash is used.
5696	for (p = map; *p != NUL; ) {
5697	c = mb_cptr2char_adv((const char_u **)&p);
5698	if (c == `'/'`) {
5699	headc = `0`;
5700	} else {
5701	if (headc == `0`) {
5702	headc = c;
5703	}
5704
5705	// Characters above 255 don't fit in sl_map_array[], put them in
5706	// the hash table. Each entry is the char, a NUL the headchar and
5707	// a NUL.
5708	if (c >= `256`) {
5709	int cl = mb_char2len(c);
5710	int headcl = mb_char2len(headc);
5711	char_u *b;
5712	hash_T hash;
5713	hashitem_T *hi;
5714
5715	b = xmalloc(cl + headcl + `2`);
5716	utf_char2bytes(c, b);
5717	b[cl] = NUL;
5718	utf_char2bytes(headc, b + cl + `1`);
5719	b[cl + `1` + headcl] = NUL;
5720	hash = hash_hash(b);
5721	hi = hash_lookup(&lp->sl_map_hash, (const char *)b, STRLEN(b), hash);
5722	if (HASHITEM_EMPTY(hi)) {
5723	hash_add_item(&lp->sl_map_hash, hi, b, hash);
5724	} else {
5725	// This should have been checked when generating the .spl
5726	// file.
5727	EMSG(_("E783: duplicate char in MAP entry"));
5728	xfree(b);
5729	}
5730	} else
5731	lp->sl_map_array[c] = headc;
5732	}
5733	}
5734	}
5735
5736

Browse the source code of neovim/src/nvim/spellfile.c