1 | // This is an open source non-commercial project. Dear PVS-Studio, please check |
2 | // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com |
3 | |
4 | // spellfile.c: code for reading and writing spell files. |
5 | // |
6 | // See spell.c for information about spell checking. |
7 | |
8 | // Vim spell file format: <HEADER> |
9 | // <SECTIONS> |
10 | // <LWORDTREE> |
11 | // <KWORDTREE> |
12 | // <PREFIXTREE> |
13 | // |
14 | // <HEADER>: <fileID> <versionnr> |
15 | // |
16 | // <fileID> 8 bytes "VIMspell" |
17 | // <versionnr> 1 byte VIMSPELLVERSION |
18 | // |
19 | // |
20 | // Sections make it possible to add information to the .spl file without |
21 | // making it incompatible with previous versions. There are two kinds of |
22 | // sections: |
23 | // 1. Not essential for correct spell checking. E.g. for making suggestions. |
24 | // These are skipped when not supported. |
25 | // 2. Optional information, but essential for spell checking when present. |
26 | // E.g. conditions for affixes. When this section is present but not |
27 | // supported an error message is given. |
28 | // |
29 | // <SECTIONS>: <section> ... <sectionend> |
30 | // |
31 | // <section>: <sectionID> <sectionflags> <sectionlen> (section contents) |
32 | // |
33 | // <sectionID> 1 byte number from 0 to 254 identifying the section |
34 | // |
35 | // <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct |
36 | // spell checking |
37 | // |
38 | // <sectionlen> 4 bytes length of section contents, MSB first |
39 | // |
40 | // <sectionend> 1 byte SN_END |
41 | // |
42 | // |
43 | // sectionID == SN_INFO: <infotext> |
44 | // <infotext> N bytes free format text with spell file info (version, |
45 | // website, etc) |
46 | // |
47 | // sectionID == SN_REGION: <regionname> ... |
48 | // <regionname> 2 bytes Up to MAXREGIONS region names: ca, au, etc. |
49 | // Lower case. |
50 | // First <regionname> is region 1. |
51 | // |
52 | // sectionID == SN_CHARFLAGS: <charflagslen> <charflags> |
53 | // <folcharslen> <folchars> |
54 | // <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). |
55 | // <charflags> N bytes List of flags (first one is for character 128): |
56 | // 0x01 word character CF_WORD |
57 | // 0x02 upper-case character CF_UPPER |
58 | // <folcharslen> 2 bytes Number of bytes in <folchars>. |
59 | // <folchars> N bytes Folded characters, first one is for character 128. |
60 | // |
61 | // sectionID == SN_MIDWORD: <midword> |
62 | // <midword> N bytes Characters that are word characters only when used |
63 | // in the middle of a word. |
64 | // |
65 | // sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... |
66 | // <prefcondcnt> 2 bytes Number of <prefcond> items following. |
67 | // <prefcond> : <condlen> <condstr> |
68 | // <condlen> 1 byte Length of <condstr>. |
69 | // <condstr> N bytes Condition for the prefix. |
70 | // |
71 | // sectionID == SN_REP: <repcount> <rep> ... |
72 | // <repcount> 2 bytes number of <rep> items, MSB first. |
73 | // <rep> : <repfromlen> <repfrom> <reptolen> <repto> |
74 | // <repfromlen> 1 byte length of <repfrom> |
75 | // <repfrom> N bytes "from" part of replacement |
76 | // <reptolen> 1 byte length of <repto> |
77 | // <repto> N bytes "to" part of replacement |
78 | // |
79 | // sectionID == SN_REPSAL: <repcount> <rep> ... |
80 | // just like SN_REP but for soundfolded words |
81 | // |
82 | // sectionID == SN_SAL: <salflags> <salcount> <sal> ... |
83 | // <salflags> 1 byte flags for soundsalike conversion: |
84 | // SAL_F0LLOWUP |
85 | // SAL_COLLAPSE |
86 | // SAL_REM_ACCENTS |
87 | // <salcount> 2 bytes number of <sal> items following |
88 | // <sal> : <salfromlen> <salfrom> <saltolen> <salto> |
89 | // <salfromlen> 1 byte length of <salfrom> |
90 | // <salfrom> N bytes "from" part of soundsalike |
91 | // <saltolen> 1 byte length of <salto> |
92 | // <salto> N bytes "to" part of soundsalike |
93 | // |
94 | // sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> |
95 | // <sofofromlen> 2 bytes length of <sofofrom> |
96 | // <sofofrom> N bytes "from" part of soundfold |
97 | // <sofotolen> 2 bytes length of <sofoto> |
98 | // <sofoto> N bytes "to" part of soundfold |
99 | // |
100 | // sectionID == SN_SUGFILE: <timestamp> |
101 | // <timestamp> 8 bytes time in seconds that must match with .sug file |
102 | // |
103 | // sectionID == SN_NOSPLITSUGS: nothing |
104 | // |
105 | // sectionID == SN_NOCOMPOUNDSUGS: nothing |
106 | // |
107 | // sectionID == SN_WORDS: <word> ... |
108 | // <word> N bytes NUL terminated common word |
109 | // |
110 | // sectionID == SN_MAP: <mapstr> |
111 | // <mapstr> N bytes String with sequences of similar characters, |
112 | // separated by slashes. |
113 | // |
114 | // sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions> |
115 | // <comppatcount> <comppattern> ... <compflags> |
116 | // <compmax> 1 byte Maximum nr of words in compound word. |
117 | // <compminlen> 1 byte Minimal word length for compounding. |
118 | // <compsylmax> 1 byte Maximum nr of syllables in compound word. |
119 | // <compoptions> 2 bytes COMP_ flags. |
120 | // <comppatcount> 2 bytes number of <comppattern> following |
121 | // <compflags> N bytes Flags from COMPOUNDRULE items, separated by |
122 | // slashes. |
123 | // |
124 | // <comppattern>: <comppatlen> <comppattext> |
125 | // <comppatlen> 1 byte length of <comppattext> |
126 | // <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN |
127 | // |
128 | // sectionID == SN_NOBREAK: (empty, its presence is what matters) |
129 | // |
130 | // sectionID == SN_SYLLABLE: <syllable> |
131 | // <syllable> N bytes String from SYLLABLE item. |
132 | // |
133 | // <LWORDTREE>: <wordtree> |
134 | // |
135 | // <KWORDTREE>: <wordtree> |
136 | // |
137 | // <PREFIXTREE>: <wordtree> |
138 | // |
139 | // |
140 | // <wordtree>: <nodecount> <nodedata> ... |
141 | // |
142 | // <nodecount> 4 bytes Number of nodes following. MSB first. |
143 | // |
144 | // <nodedata>: <siblingcount> <sibling> ... |
145 | // |
146 | // <siblingcount> 1 byte Number of siblings in this node. The siblings |
147 | // follow in sorted order. |
148 | // |
149 | // <sibling>: <byte> [ <nodeidx> <xbyte> |
150 | // | <flags> [<flags2>] [<region>] [<affixID>] |
151 | // | [<pflags>] <affixID> <prefcondnr> ] |
152 | // |
153 | // <byte> 1 byte Byte value of the sibling. Special cases: |
154 | // BY_NOFLAGS: End of word without flags and for all |
155 | // regions. |
156 | // For PREFIXTREE <affixID> and |
157 | // <prefcondnr> follow. |
158 | // BY_FLAGS: End of word, <flags> follow. |
159 | // For PREFIXTREE <pflags>, <affixID> |
160 | // and <prefcondnr> follow. |
161 | // BY_FLAGS2: End of word, <flags> and <flags2> |
162 | // follow. Not used in PREFIXTREE. |
163 | // BY_INDEX: Child of sibling is shared, <nodeidx> |
164 | // and <xbyte> follow. |
165 | // |
166 | // <nodeidx> 3 bytes Index of child for this sibling, MSB first. |
167 | // |
168 | // <xbyte> 1 byte Byte value of the sibling. |
169 | // |
170 | // <flags> 1 byte Bitmask of: |
171 | // WF_ALLCAP word must have only capitals |
172 | // WF_ONECAP first char of word must be capital |
173 | // WF_KEEPCAP keep-case word |
174 | // WF_FIXCAP keep-case word, all caps not allowed |
175 | // WF_RARE rare word |
176 | // WF_BANNED bad word |
177 | // WF_REGION <region> follows |
178 | // WF_AFX <affixID> follows |
179 | // |
180 | // <flags2> 1 byte Bitmask of: |
181 | // WF_HAS_AFF >> 8 word includes affix |
182 | // WF_NEEDCOMP >> 8 word only valid in compound |
183 | // WF_NOSUGGEST >> 8 word not used for suggestions |
184 | // WF_COMPROOT >> 8 word already a compound |
185 | // WF_NOCOMPBEF >> 8 no compounding before this word |
186 | // WF_NOCOMPAFT >> 8 no compounding after this word |
187 | // |
188 | // <pflags> 1 byte Bitmask of: |
189 | // WFP_RARE rare prefix |
190 | // WFP_NC non-combining prefix |
191 | // WFP_UP letter after prefix made upper case |
192 | // |
193 | // <region> 1 byte Bitmask for regions in which word is valid. When |
194 | // omitted it's valid in all regions. |
195 | // Lowest bit is for region 1. |
196 | // |
197 | // <affixID> 1 byte ID of affix that can be used with this word. In |
198 | // PREFIXTREE used for the required prefix ID. |
199 | // |
200 | // <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list |
201 | // from HEADER. |
202 | // |
203 | // All text characters are in 'encoding', but stored as single bytes. |
204 | |
205 | // Vim .sug file format: <SUGHEADER> |
206 | // <SUGWORDTREE> |
207 | // <SUGTABLE> |
208 | // |
209 | // <SUGHEADER>: <fileID> <versionnr> <timestamp> |
210 | // |
211 | // <fileID> 6 bytes "VIMsug" |
212 | // <versionnr> 1 byte VIMSUGVERSION |
213 | // <timestamp> 8 bytes timestamp that must match with .spl file |
214 | // |
215 | // |
216 | // <SUGWORDTREE>: <wordtree> (see above, no flags or region used) |
217 | // |
218 | // |
219 | // <SUGTABLE>: <sugwcount> <sugline> ... |
220 | // |
221 | // <sugwcount> 4 bytes number of <sugline> following |
222 | // |
223 | // <sugline>: <sugnr> ... NUL |
224 | // |
225 | // <sugnr>: X bytes word number that results in this soundfolded word, |
226 | // stored as an offset to the previous number in as |
227 | // few bytes as possible, see offset2bytes()) |
228 | |
229 | #include <stdio.h> |
230 | #include <stdint.h> |
231 | #include <wctype.h> |
232 | |
233 | #include "nvim/vim.h" |
234 | #include "nvim/spell_defs.h" |
235 | #include "nvim/ascii.h" |
236 | #include "nvim/buffer.h" |
237 | #include "nvim/charset.h" |
238 | #include "nvim/ex_cmds2.h" |
239 | #include "nvim/fileio.h" |
240 | #include "nvim/memory.h" |
241 | #include "nvim/memline.h" |
242 | #include "nvim/misc1.h" |
243 | #include "nvim/option.h" |
244 | #include "nvim/os/os.h" |
245 | #include "nvim/path.h" |
246 | #include "nvim/regexp.h" |
247 | #include "nvim/screen.h" |
248 | #include "nvim/spell.h" |
249 | #include "nvim/spellfile.h" |
250 | #include "nvim/ui.h" |
251 | #include "nvim/undo.h" |
252 | |
253 | #ifndef UNIX // it's in os/unix_defs.h for Unix |
254 | # include <time.h> // for time_t |
255 | #endif |
256 | |
257 | // Special byte values for <byte>. Some are only used in the tree for |
258 | // postponed prefixes, some only in the other trees. This is a bit messy... |
259 | #define BY_NOFLAGS 0 // end of word without flags or region; for |
260 | // postponed prefix: no <pflags> |
261 | #define BY_INDEX 1 // child is shared, index follows |
262 | #define BY_FLAGS 2 // end of word, <flags> byte follows; for |
263 | // postponed prefix: <pflags> follows |
264 | #define BY_FLAGS2 3 // end of word, <flags> and <flags2> bytes |
265 | // follow; never used in prefix tree |
266 | #define BY_SPECIAL BY_FLAGS2 // highest special byte value |
267 | |
268 | // Flags used in .spl file for soundsalike flags. |
269 | #define SAL_F0LLOWUP 1 |
270 | #define SAL_COLLAPSE 2 |
271 | #define SAL_REM_ACCENTS 4 |
272 | |
273 | #define VIMSPELLMAGIC "VIMspell" // string at start of Vim spell file |
274 | #define VIMSPELLMAGICL (sizeof(VIMSPELLMAGIC) - 1) |
275 | #define VIMSPELLVERSION 50 |
276 | |
277 | // Section IDs. Only renumber them when VIMSPELLVERSION changes! |
278 | #define SN_REGION 0 // <regionname> section |
279 | #define SN_CHARFLAGS 1 // charflags section |
280 | #define SN_MIDWORD 2 // <midword> section |
281 | #define SN_PREFCOND 3 // <prefcond> section |
282 | #define SN_REP 4 // REP items section |
283 | #define SN_SAL 5 // SAL items section |
284 | #define SN_SOFO 6 // soundfolding section |
285 | #define SN_MAP 7 // MAP items section |
286 | #define SN_COMPOUND 8 // compound words section |
287 | #define SN_SYLLABLE 9 // syllable section |
288 | #define SN_NOBREAK 10 // NOBREAK section |
289 | #define SN_SUGFILE 11 // timestamp for .sug file |
290 | #define SN_REPSAL 12 // REPSAL items section |
291 | #define SN_WORDS 13 // common words |
292 | #define SN_NOSPLITSUGS 14 // don't split word for suggestions |
293 | #define SN_INFO 15 // info section |
294 | #define SN_NOCOMPOUNDSUGS 16 // don't compound for suggestions |
295 | #define SN_END 255 // end of sections |
296 | |
297 | #define SNF_REQUIRED 1 // <sectionflags>: required section |
298 | |
299 | #define CF_WORD 0x01 |
300 | #define CF_UPPER 0x02 |
301 | |
302 | static char *e_spell_trunc = N_("E758: Truncated spell file" ); |
303 | static char *e_afftrailing = N_("Trailing text in %s line %d: %s" ); |
304 | static char *e_affname = N_("Affix name too long in %s line %d: %s" ); |
305 | static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP" ); |
306 | static char *e_affrange = N_( |
307 | "E762: Character in FOL, LOW or UPP is out of range" ); |
308 | static char *msg_compressing = N_("Compressing word tree..." ); |
309 | |
310 | #define MAXLINELEN 500 // Maximum length in bytes of a line in a .aff |
311 | // and .dic file. |
312 | // Main structure to store the contents of a ".aff" file. |
313 | typedef struct afffile_S { |
314 | char_u *af_enc; // "SET", normalized, alloc'ed string or NULL |
315 | int af_flagtype; // AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG |
316 | unsigned af_rare; // RARE ID for rare word |
317 | unsigned af_keepcase; // KEEPCASE ID for keep-case word |
318 | unsigned af_bad; // BAD ID for banned word |
319 | unsigned af_needaffix; // NEEDAFFIX ID |
320 | unsigned af_circumfix; // CIRCUMFIX ID |
321 | unsigned af_needcomp; // NEEDCOMPOUND ID |
322 | unsigned af_comproot; // COMPOUNDROOT ID |
323 | unsigned af_compforbid; // COMPOUNDFORBIDFLAG ID |
324 | unsigned af_comppermit; // COMPOUNDPERMITFLAG ID |
325 | unsigned af_nosuggest; // NOSUGGEST ID |
326 | int af_pfxpostpone; // postpone prefixes without chop string and |
327 | // without flags |
328 | bool ; // IGNOREEXTRA present |
329 | hashtab_T af_pref; // hashtable for prefixes, affheader_T |
330 | hashtab_T af_suff; // hashtable for suffixes, affheader_T |
331 | hashtab_T af_comp; // hashtable for compound flags, compitem_T |
332 | } afffile_T; |
333 | |
334 | #define AFT_CHAR 0 // flags are one character |
335 | #define AFT_LONG 1 // flags are two characters |
336 | #define AFT_CAPLONG 2 // flags are one or two characters |
337 | #define AFT_NUM 3 // flags are numbers, comma separated |
338 | |
339 | typedef struct affentry_S affentry_T; |
340 | // Affix entry from ".aff" file. Used for prefixes and suffixes. |
341 | struct affentry_S { |
342 | affentry_T *ae_next; // next affix with same name/number |
343 | char_u *ae_chop; // text to chop off basic word (can be NULL) |
344 | char_u *ae_add; // text to add to basic word (can be NULL) |
345 | char_u *ae_flags; // flags on the affix (can be NULL) |
346 | char_u *ae_cond; // condition (NULL for ".") |
347 | regprog_T *ae_prog; // regexp program for ae_cond or NULL |
348 | char ae_compforbid; // COMPOUNDFORBIDFLAG found |
349 | char ae_comppermit; // COMPOUNDPERMITFLAG found |
350 | }; |
351 | |
352 | # define AH_KEY_LEN 17 // 2 x 8 bytes + NUL |
353 | |
354 | // Affix header from ".aff" file. Used for af_pref and af_suff. |
355 | typedef struct { |
356 | char_u [AH_KEY_LEN]; // key for hashtab == name of affix |
357 | unsigned ; // affix name as number, uses "af_flagtype" |
358 | int ; // prefix ID after renumbering; 0 if not used |
359 | int ; // suffix may combine with prefix |
360 | int ; // another affix block should be following |
361 | affentry_T *; // first affix entry |
362 | } ; |
363 | |
364 | #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) |
365 | |
366 | // Flag used in compound items. |
367 | typedef struct compitem_S { |
368 | char_u ci_key[AH_KEY_LEN]; // key for hashtab == name of compound |
369 | unsigned ci_flag; // affix name as number, uses "af_flagtype" |
370 | int ci_newID; // affix ID after renumbering. |
371 | } compitem_T; |
372 | |
373 | #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) |
374 | |
375 | // Structure that is used to store the items in the word tree. This avoids |
376 | // the need to keep track of each allocated thing, everything is freed all at |
377 | // once after ":mkspell" is done. |
378 | // Note: "sb_next" must be just before "sb_data" to make sure the alignment of |
379 | // "sb_data" is correct for systems where pointers must be aligned on |
380 | // pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc). |
381 | #define SBLOCKSIZE 16000 // size of sb_data |
382 | typedef struct sblock_S sblock_T; |
383 | struct sblock_S { |
384 | int sb_used; // nr of bytes already in use |
385 | sblock_T *sb_next; // next block in list |
386 | char_u sb_data[1]; // data, actually longer |
387 | }; |
388 | |
389 | // A node in the tree. |
390 | typedef struct wordnode_S wordnode_T; |
391 | struct wordnode_S { |
392 | union { // shared to save space |
393 | char_u hashkey[6]; // the hash key, only used while compressing |
394 | int index; // index in written nodes (valid after first |
395 | // round) |
396 | } wn_u1; |
397 | union { // shared to save space |
398 | wordnode_T *next; // next node with same hash key |
399 | wordnode_T *wnode; // parent node that will write this node |
400 | } wn_u2; |
401 | wordnode_T *wn_child; // child (next byte in word) |
402 | wordnode_T *wn_sibling; // next sibling (alternate byte in word, |
403 | // always sorted) |
404 | int wn_refs; // Nr. of references to this node. Only |
405 | // relevant for first node in a list of |
406 | // siblings, in following siblings it is |
407 | // always one. |
408 | char_u wn_byte; // Byte for this node. NUL for word end |
409 | |
410 | // Info for when "wn_byte" is NUL. |
411 | // In PREFIXTREE "wn_region" is used for the prefcondnr. |
412 | // In the soundfolded word tree "wn_flags" has the MSW of the wordnr and |
413 | // "wn_region" the LSW of the wordnr. |
414 | char_u wn_affixID; // supported/required prefix ID or 0 |
415 | uint16_t wn_flags; // WF_ flags |
416 | short wn_region; // region mask |
417 | |
418 | #ifdef SPELL_PRINTTREE |
419 | int wn_nr; // sequence nr for printing |
420 | #endif |
421 | }; |
422 | |
423 | #define WN_MASK 0xffff // mask relevant bits of "wn_flags" |
424 | |
425 | #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) |
426 | |
427 | // Info used while reading the spell files. |
428 | typedef struct spellinfo_S { |
429 | wordnode_T *si_foldroot; // tree with case-folded words |
430 | long si_foldwcount; // nr of words in si_foldroot |
431 | |
432 | wordnode_T *si_keeproot; // tree with keep-case words |
433 | long si_keepwcount; // nr of words in si_keeproot |
434 | |
435 | wordnode_T *si_prefroot; // tree with postponed prefixes |
436 | |
437 | long si_sugtree; // creating the soundfolding trie |
438 | |
439 | sblock_T *si_blocks; // memory blocks used |
440 | long si_blocks_cnt; // memory blocks allocated |
441 | int si_did_emsg; // TRUE when ran out of memory |
442 | |
443 | long si_compress_cnt; // words to add before lowering |
444 | // compression limit |
445 | wordnode_T *si_first_free; // List of nodes that have been freed during |
446 | // compression, linked by "wn_child" field. |
447 | long si_free_count; // number of nodes in si_first_free |
448 | #ifdef SPELL_PRINTTREE |
449 | int si_wordnode_nr; // sequence nr for nodes |
450 | #endif |
451 | buf_T *si_spellbuf; // buffer used to store soundfold word table |
452 | |
453 | int si_ascii; // handling only ASCII words |
454 | int si_add; // addition file |
455 | int si_clear_chartab; // when TRUE clear char tables |
456 | int si_region; // region mask |
457 | vimconv_T si_conv; // for conversion to 'encoding' |
458 | int si_memtot; // runtime memory used |
459 | int si_verbose; // verbose messages |
460 | int si_msg_count; // number of words added since last message |
461 | char_u *si_info; // info text chars or NULL |
462 | int si_region_count; // number of regions supported (1 when there |
463 | // are no regions) |
464 | char_u si_region_name[MAXREGIONS * 2 + 1]; |
465 | // region names; used only if |
466 | // si_region_count > 1) |
467 | |
468 | garray_T si_rep; // list of fromto_T entries from REP lines |
469 | garray_T si_repsal; // list of fromto_T entries from REPSAL lines |
470 | garray_T si_sal; // list of fromto_T entries from SAL lines |
471 | char_u *si_sofofr; // SOFOFROM text |
472 | char_u *si_sofoto; // SOFOTO text |
473 | int si_nosugfile; // NOSUGFILE item found |
474 | int si_nosplitsugs; // NOSPLITSUGS item found |
475 | int si_nocompoundsugs; // NOCOMPOUNDSUGS item found |
476 | int si_followup; // soundsalike: ? |
477 | int si_collapse; // soundsalike: ? |
478 | hashtab_T si_commonwords; // hashtable for common words |
479 | time_t si_sugtime; // timestamp for .sug file |
480 | int si_rem_accents; // soundsalike: remove accents |
481 | garray_T si_map; // MAP info concatenated |
482 | char_u *si_midword; // MIDWORD chars or NULL |
483 | int si_compmax; // max nr of words for compounding |
484 | int si_compminlen; // minimal length for compounding |
485 | int si_compsylmax; // max nr of syllables for compounding |
486 | int si_compoptions; // COMP_ flags |
487 | garray_T si_comppat; // CHECKCOMPOUNDPATTERN items, each stored as |
488 | // a string |
489 | char_u *si_compflags; // flags used for compounding |
490 | char_u si_nobreak; // NOBREAK |
491 | char_u *si_syllable; // syllable string |
492 | garray_T si_prefcond; // table with conditions for postponed |
493 | // prefixes, each stored as a string |
494 | int si_newprefID; // current value for ah_newID |
495 | int si_newcompID; // current value for compound ID |
496 | } spellinfo_T; |
497 | |
498 | #ifdef INCLUDE_GENERATED_DECLARATIONS |
499 | # include "spellfile.c.generated.h" |
500 | #endif |
501 | |
502 | /// Read n bytes from fd to buf, returning on errors |
503 | /// |
504 | /// @param[out] buf Buffer to read to, must be at least n bytes long. |
505 | /// @param[in] n Amount of bytes to read. |
506 | /// @param fd FILE* to read from. |
507 | /// @param exit_code Code to run before returning. |
508 | /// |
509 | /// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if |
510 | /// there are not enough bytes, returns SP_OTHERERROR if reading failed. |
511 | #define SPELL_READ_BYTES(buf, n, fd, exit_code) \ |
512 | do { \ |
513 | const size_t n__SPRB = (n); \ |
514 | FILE *const fd__SPRB = (fd); \ |
515 | char *const buf__SPRB = (buf); \ |
516 | const size_t read_bytes__SPRB = fread(buf__SPRB, 1, n__SPRB, fd__SPRB); \ |
517 | if (read_bytes__SPRB != n__SPRB) { \ |
518 | exit_code; \ |
519 | return feof(fd__SPRB) ? SP_TRUNCERROR : SP_OTHERERROR; \ |
520 | } \ |
521 | } while (0) |
522 | |
523 | /// Like #SPELL_READ_BYTES, but also error out if NUL byte was read |
524 | /// |
525 | /// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if |
526 | /// there are not enough bytes, returns SP_OTHERERROR if reading failed, |
527 | /// returns SP_FORMERROR if read out a NUL byte. |
528 | #define SPELL_READ_NONNUL_BYTES(buf, n, fd, exit_code) \ |
529 | do { \ |
530 | const size_t n__SPRNB = (n); \ |
531 | FILE *const fd__SPRNB = (fd); \ |
532 | char *const buf__SPRNB = (buf); \ |
533 | SPELL_READ_BYTES(buf__SPRNB, n__SPRNB, fd__SPRNB, exit_code); \ |
534 | if (memchr(buf__SPRNB, NUL, (size_t)n__SPRNB)) { \ |
535 | exit_code; \ |
536 | return SP_FORMERROR; \ |
537 | } \ |
538 | } while (0) |
539 | |
540 | /// Check that spell file starts with a magic string |
541 | /// |
542 | /// Does not check for version of the file. |
543 | /// |
544 | /// @param fd File to check. |
545 | /// |
546 | /// @return 0 in case of success, SP_TRUNCERROR if file contains not enough |
547 | /// bytes, SP_FORMERROR if it does not match magic string and |
548 | /// SP_OTHERERROR if reading file failed. |
549 | static inline int spell_check_magic_string(FILE *const fd) |
550 | FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE |
551 | { |
552 | char buf[VIMSPELLMAGICL]; |
553 | SPELL_READ_BYTES(buf, VIMSPELLMAGICL, fd, ;); |
554 | if (memcmp(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) { |
555 | return SP_FORMERROR; |
556 | } |
557 | return 0; |
558 | } |
559 | |
560 | // Load one spell file and store the info into a slang_T. |
561 | // |
562 | // This is invoked in three ways: |
563 | // - From spell_load_cb() to load a spell file for the first time. "lang" is |
564 | // the language name, "old_lp" is NULL. Will allocate an slang_T. |
565 | // - To reload a spell file that was changed. "lang" is NULL and "old_lp" |
566 | // points to the existing slang_T. |
567 | // - Just after writing a .spl file; it's read back to produce the .sug file. |
568 | // "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T. |
569 | // |
570 | // Returns the slang_T the spell file was loaded into. NULL for error. |
571 | slang_T * |
572 | spell_load_file ( |
573 | char_u *fname, |
574 | char_u *lang, |
575 | slang_T *old_lp, |
576 | bool silent // no error if file doesn't exist |
577 | ) |
578 | { |
579 | FILE *fd; |
580 | char_u *p; |
581 | int n; |
582 | int len; |
583 | char_u *save_sourcing_name = sourcing_name; |
584 | linenr_T save_sourcing_lnum = sourcing_lnum; |
585 | slang_T *lp = NULL; |
586 | int c = 0; |
587 | int res; |
588 | |
589 | fd = os_fopen((char *)fname, "r" ); |
590 | if (fd == NULL) { |
591 | if (!silent) |
592 | EMSG2(_(e_notopen), fname); |
593 | else if (p_verbose > 2) { |
594 | verbose_enter(); |
595 | smsg((char *)e_notopen, fname); |
596 | verbose_leave(); |
597 | } |
598 | goto endFAIL; |
599 | } |
600 | if (p_verbose > 2) { |
601 | verbose_enter(); |
602 | smsg(_("Reading spell file \"%s\"" ), fname); |
603 | verbose_leave(); |
604 | } |
605 | |
606 | if (old_lp == NULL) { |
607 | lp = slang_alloc(lang); |
608 | |
609 | // Remember the file name, used to reload the file when it's updated. |
610 | lp->sl_fname = vim_strsave(fname); |
611 | |
612 | // Check for .add.spl. |
613 | lp->sl_add = strstr((char *)path_tail(fname), SPL_FNAME_ADD) != NULL; |
614 | } else |
615 | lp = old_lp; |
616 | |
617 | // Set sourcing_name, so that error messages mention the file name. |
618 | sourcing_name = fname; |
619 | sourcing_lnum = 0; |
620 | |
621 | // <HEADER>: <fileID> |
622 | const int scms_ret = spell_check_magic_string(fd); |
623 | switch (scms_ret) { |
624 | case SP_FORMERROR: |
625 | case SP_TRUNCERROR: { |
626 | emsgf(_("E757: This does not look like a spell file" )); |
627 | goto endFAIL; |
628 | } |
629 | case SP_OTHERERROR: { |
630 | emsgf(_("E5042: Failed to read spell file %s: %s" ), |
631 | fname, strerror(ferror(fd))); |
632 | } |
633 | case 0: { |
634 | break; |
635 | } |
636 | } |
637 | c = getc(fd); // <versionnr> |
638 | if (c < VIMSPELLVERSION) { |
639 | EMSG(_("E771: Old spell file, needs to be updated" )); |
640 | goto endFAIL; |
641 | } else if (c > VIMSPELLVERSION) { |
642 | EMSG(_("E772: Spell file is for newer version of Vim" )); |
643 | goto endFAIL; |
644 | } |
645 | |
646 | |
647 | // <SECTIONS>: <section> ... <sectionend> |
648 | // <section>: <sectionID> <sectionflags> <sectionlen> (section contents) |
649 | for (;; ) { |
650 | n = getc(fd); // <sectionID> or <sectionend> |
651 | if (n == SN_END) |
652 | break; |
653 | c = getc(fd); // <sectionflags> |
654 | len = get4c(fd); // <sectionlen> |
655 | if (len < 0) |
656 | goto truncerr; |
657 | |
658 | res = 0; |
659 | switch (n) { |
660 | case SN_INFO: |
661 | lp->sl_info = READ_STRING(fd, len); // <infotext> |
662 | if (lp->sl_info == NULL) |
663 | goto endFAIL; |
664 | break; |
665 | |
666 | case SN_REGION: |
667 | res = read_region_section(fd, lp, len); |
668 | break; |
669 | |
670 | case SN_CHARFLAGS: |
671 | res = read_charflags_section(fd); |
672 | break; |
673 | |
674 | case SN_MIDWORD: |
675 | lp->sl_midword = READ_STRING(fd, len); // <midword> |
676 | if (lp->sl_midword == NULL) |
677 | goto endFAIL; |
678 | break; |
679 | |
680 | case SN_PREFCOND: |
681 | res = read_prefcond_section(fd, lp); |
682 | break; |
683 | |
684 | case SN_REP: |
685 | res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); |
686 | break; |
687 | |
688 | case SN_REPSAL: |
689 | res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); |
690 | break; |
691 | |
692 | case SN_SAL: |
693 | res = read_sal_section(fd, lp); |
694 | break; |
695 | |
696 | case SN_SOFO: |
697 | res = read_sofo_section(fd, lp); |
698 | break; |
699 | |
700 | case SN_MAP: |
701 | p = READ_STRING(fd, len); // <mapstr> |
702 | if (p == NULL) |
703 | goto endFAIL; |
704 | set_map_str(lp, p); |
705 | xfree(p); |
706 | break; |
707 | |
708 | case SN_WORDS: |
709 | res = read_words_section(fd, lp, len); |
710 | break; |
711 | |
712 | case SN_SUGFILE: |
713 | lp->sl_sugtime = get8ctime(fd); // <timestamp> |
714 | break; |
715 | |
716 | case SN_NOSPLITSUGS: |
717 | lp->sl_nosplitsugs = true; |
718 | break; |
719 | |
720 | case SN_NOCOMPOUNDSUGS: |
721 | lp->sl_nocompoundsugs = true; |
722 | break; |
723 | |
724 | case SN_COMPOUND: |
725 | res = read_compound(fd, lp, len); |
726 | break; |
727 | |
728 | case SN_NOBREAK: |
729 | lp->sl_nobreak = true; |
730 | break; |
731 | |
732 | case SN_SYLLABLE: |
733 | lp->sl_syllable = READ_STRING(fd, len); // <syllable> |
734 | if (lp->sl_syllable == NULL) |
735 | goto endFAIL; |
736 | if (init_syl_tab(lp) == FAIL) |
737 | goto endFAIL; |
738 | break; |
739 | |
740 | default: |
741 | // Unsupported section. When it's required give an error |
742 | // message. When it's not required skip the contents. |
743 | if (c & SNF_REQUIRED) { |
744 | EMSG(_("E770: Unsupported section in spell file" )); |
745 | goto endFAIL; |
746 | } |
747 | while (--len >= 0) |
748 | if (getc(fd) < 0) |
749 | goto truncerr; |
750 | break; |
751 | } |
752 | someerror: |
753 | if (res == SP_FORMERROR) { |
754 | EMSG(_(e_format)); |
755 | goto endFAIL; |
756 | } |
757 | if (res == SP_TRUNCERROR) { |
758 | truncerr: |
759 | EMSG(_(e_spell_trunc)); |
760 | goto endFAIL; |
761 | } |
762 | if (res == SP_OTHERERROR) |
763 | goto endFAIL; |
764 | } |
765 | |
766 | // <LWORDTREE> |
767 | res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, false, 0); |
768 | if (res != 0) |
769 | goto someerror; |
770 | |
771 | // <KWORDTREE> |
772 | res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, false, 0); |
773 | if (res != 0) |
774 | goto someerror; |
775 | |
776 | // <PREFIXTREE> |
777 | res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, true, |
778 | lp->sl_prefixcnt); |
779 | if (res != 0) |
780 | goto someerror; |
781 | |
782 | // For a new file link it in the list of spell files. |
783 | if (old_lp == NULL && lang != NULL) { |
784 | lp->sl_next = first_lang; |
785 | first_lang = lp; |
786 | } |
787 | |
788 | goto endOK; |
789 | |
790 | endFAIL: |
791 | if (lang != NULL) |
792 | // truncating the name signals the error to spell_load_lang() |
793 | *lang = NUL; |
794 | if (lp != NULL && old_lp == NULL) |
795 | slang_free(lp); |
796 | lp = NULL; |
797 | |
798 | endOK: |
799 | if (fd != NULL) |
800 | fclose(fd); |
801 | sourcing_name = save_sourcing_name; |
802 | sourcing_lnum = save_sourcing_lnum; |
803 | |
804 | return lp; |
805 | } |
806 | |
807 | // Fill in the wordcount fields for a trie. |
808 | // Returns the total number of words. |
809 | static void tree_count_words(char_u *byts, idx_T *idxs) |
810 | { |
811 | int depth; |
812 | idx_T arridx[MAXWLEN]; |
813 | int curi[MAXWLEN]; |
814 | int c; |
815 | idx_T n; |
816 | int wordcount[MAXWLEN]; |
817 | |
818 | arridx[0] = 0; |
819 | curi[0] = 1; |
820 | wordcount[0] = 0; |
821 | depth = 0; |
822 | while (depth >= 0 && !got_int) { |
823 | if (curi[depth] > byts[arridx[depth]]) { |
824 | // Done all bytes at this node, go up one level. |
825 | idxs[arridx[depth]] = wordcount[depth]; |
826 | if (depth > 0) |
827 | wordcount[depth - 1] += wordcount[depth]; |
828 | |
829 | --depth; |
830 | fast_breakcheck(); |
831 | } else { |
832 | // Do one more byte at this node. |
833 | n = arridx[depth] + curi[depth]; |
834 | ++curi[depth]; |
835 | |
836 | c = byts[n]; |
837 | if (c == 0) { |
838 | // End of word, count it. |
839 | ++wordcount[depth]; |
840 | |
841 | // Skip over any other NUL bytes (same word with different |
842 | // flags). |
843 | while (byts[n + 1] == 0) { |
844 | ++n; |
845 | ++curi[depth]; |
846 | } |
847 | } else { |
848 | // Normal char, go one level deeper to count the words. |
849 | ++depth; |
850 | arridx[depth] = idxs[n]; |
851 | curi[depth] = 1; |
852 | wordcount[depth] = 0; |
853 | } |
854 | } |
855 | } |
856 | } |
857 | |
858 | // Load the .sug files for languages that have one and weren't loaded yet. |
859 | void suggest_load_files(void) |
860 | { |
861 | langp_T *lp; |
862 | slang_T *slang; |
863 | char_u *dotp; |
864 | FILE *fd; |
865 | char_u buf[MAXWLEN]; |
866 | int i; |
867 | time_t timestamp; |
868 | int wcount; |
869 | int wordnr; |
870 | garray_T ga; |
871 | int c; |
872 | |
873 | // Do this for all languages that support sound folding. |
874 | for (int lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) { |
875 | lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); |
876 | slang = lp->lp_slang; |
877 | if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) { |
878 | // Change ".spl" to ".sug" and open the file. When the file isn't |
879 | // found silently skip it. Do set "sl_sugloaded" so that we |
880 | // don't try again and again. |
881 | slang->sl_sugloaded = true; |
882 | |
883 | dotp = STRRCHR(slang->sl_fname, '.'); |
884 | if (dotp == NULL || fnamecmp(dotp, ".spl" ) != 0) { |
885 | continue; |
886 | } |
887 | STRCPY(dotp, ".sug" ); |
888 | fd = os_fopen((char *)slang->sl_fname, "r" ); |
889 | if (fd == NULL) { |
890 | goto nextone; |
891 | } |
892 | |
893 | // <SUGHEADER>: <fileID> <versionnr> <timestamp> |
894 | for (i = 0; i < VIMSUGMAGICL; ++i) |
895 | buf[i] = getc(fd); // <fileID> |
896 | if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) { |
897 | EMSG2(_("E778: This does not look like a .sug file: %s" ), |
898 | slang->sl_fname); |
899 | goto nextone; |
900 | } |
901 | c = getc(fd); // <versionnr> |
902 | if (c < VIMSUGVERSION) { |
903 | EMSG2(_("E779: Old .sug file, needs to be updated: %s" ), |
904 | slang->sl_fname); |
905 | goto nextone; |
906 | } else if (c > VIMSUGVERSION) { |
907 | EMSG2(_("E780: .sug file is for newer version of Vim: %s" ), |
908 | slang->sl_fname); |
909 | goto nextone; |
910 | } |
911 | |
912 | // Check the timestamp, it must be exactly the same as the one in |
913 | // the .spl file. Otherwise the word numbers won't match. |
914 | timestamp = get8ctime(fd); // <timestamp> |
915 | if (timestamp != slang->sl_sugtime) { |
916 | EMSG2(_("E781: .sug file doesn't match .spl file: %s" ), |
917 | slang->sl_fname); |
918 | goto nextone; |
919 | } |
920 | |
921 | // <SUGWORDTREE>: <wordtree> |
922 | // Read the trie with the soundfolded words. |
923 | if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, |
924 | false, 0) != 0) { |
925 | someerror: |
926 | EMSG2(_("E782: error while reading .sug file: %s" ), |
927 | slang->sl_fname); |
928 | slang_clear_sug(slang); |
929 | goto nextone; |
930 | } |
931 | |
932 | // <SUGTABLE>: <sugwcount> <sugline> ... |
933 | // |
934 | // Read the table with word numbers. We use a file buffer for |
935 | // this, because it's so much like a file with lines. Makes it |
936 | // possible to swap the info and save on memory use. |
937 | slang->sl_sugbuf = open_spellbuf(); |
938 | |
939 | // <sugwcount> |
940 | wcount = get4c(fd); |
941 | if (wcount < 0) |
942 | goto someerror; |
943 | |
944 | // Read all the wordnr lists into the buffer, one NUL terminated |
945 | // list per line. |
946 | ga_init(&ga, 1, 100); |
947 | for (wordnr = 0; wordnr < wcount; ++wordnr) { |
948 | ga.ga_len = 0; |
949 | for (;; ) { |
950 | c = getc(fd); // <sugline> |
951 | if (c < 0) { |
952 | goto someerror; |
953 | } |
954 | GA_APPEND(char_u, &ga, c); |
955 | if (c == NUL) |
956 | break; |
957 | } |
958 | if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, |
959 | ga.ga_data, ga.ga_len, true) == FAIL) { |
960 | goto someerror; |
961 | } |
962 | } |
963 | ga_clear(&ga); |
964 | |
965 | // Need to put word counts in the word tries, so that we can find |
966 | // a word by its number. |
967 | tree_count_words(slang->sl_fbyts, slang->sl_fidxs); |
968 | tree_count_words(slang->sl_sbyts, slang->sl_sidxs); |
969 | |
970 | nextone: |
971 | if (fd != NULL) |
972 | fclose(fd); |
973 | STRCPY(dotp, ".spl" ); |
974 | } |
975 | } |
976 | } |
977 | |
978 | |
979 | // Read a length field from "fd" in "cnt_bytes" bytes. |
980 | // Allocate memory, read the string into it and add a NUL at the end. |
981 | // Returns NULL when the count is zero. |
982 | // Sets "*cntp" to SP_*ERROR when there is an error, length of the result |
983 | // otherwise. |
984 | static char_u *read_cnt_string(FILE *fd, int cnt_bytes, int *cntp) |
985 | { |
986 | int cnt = 0; |
987 | int i; |
988 | char_u *str; |
989 | |
990 | // read the length bytes, MSB first |
991 | for (i = 0; i < cnt_bytes; ++i) |
992 | cnt = (cnt << 8) + getc(fd); |
993 | if (cnt < 0) { |
994 | *cntp = SP_TRUNCERROR; |
995 | return NULL; |
996 | } |
997 | *cntp = cnt; |
998 | if (cnt == 0) |
999 | return NULL; // nothing to read, return NULL |
1000 | |
1001 | str = READ_STRING(fd, cnt); |
1002 | if (str == NULL) |
1003 | *cntp = SP_OTHERERROR; |
1004 | return str; |
1005 | } |
1006 | |
1007 | // Read SN_REGION: <regionname> ... |
1008 | // Return SP_*ERROR flags. |
1009 | static int read_region_section(FILE *fd, slang_T *lp, int len) |
1010 | { |
1011 | if (len > MAXREGIONS * 2) { |
1012 | return SP_FORMERROR; |
1013 | } |
1014 | SPELL_READ_NONNUL_BYTES((char *)lp->sl_regions, (size_t)len, fd, ;); |
1015 | lp->sl_regions[len] = NUL; |
1016 | return 0; |
1017 | } |
1018 | |
1019 | // Read SN_CHARFLAGS section: <charflagslen> <charflags> |
1020 | // <folcharslen> <folchars> |
1021 | // Return SP_*ERROR flags. |
1022 | static int read_charflags_section(FILE *fd) |
1023 | { |
1024 | char_u *flags; |
1025 | char_u *fol; |
1026 | int flagslen, follen; |
1027 | |
1028 | // <charflagslen> <charflags> |
1029 | flags = read_cnt_string(fd, 1, &flagslen); |
1030 | if (flagslen < 0) |
1031 | return flagslen; |
1032 | |
1033 | // <folcharslen> <folchars> |
1034 | fol = read_cnt_string(fd, 2, &follen); |
1035 | if (follen < 0) { |
1036 | xfree(flags); |
1037 | return follen; |
1038 | } |
1039 | |
1040 | // Set the word-char flags and fill SPELL_ISUPPER() table. |
1041 | if (flags != NULL && fol != NULL) |
1042 | set_spell_charflags(flags, flagslen, fol); |
1043 | |
1044 | xfree(flags); |
1045 | xfree(fol); |
1046 | |
1047 | // When <charflagslen> is zero then <fcharlen> must also be zero. |
1048 | if ((flags == NULL) != (fol == NULL)) |
1049 | return SP_FORMERROR; |
1050 | return 0; |
1051 | } |
1052 | |
1053 | // Read SN_PREFCOND section. |
1054 | // Return SP_*ERROR flags. |
1055 | static int read_prefcond_section(FILE *fd, slang_T *lp) |
1056 | { |
1057 | // <prefcondcnt> <prefcond> ... |
1058 | const int cnt = get2c(fd); // <prefcondcnt> |
1059 | if (cnt <= 0) { |
1060 | return SP_FORMERROR; |
1061 | } |
1062 | |
1063 | lp->sl_prefprog = xcalloc(cnt, sizeof(regprog_T *)); |
1064 | lp->sl_prefixcnt = cnt; |
1065 | |
1066 | for (int i = 0; i < cnt; i++) { |
1067 | // <prefcond> : <condlen> <condstr> |
1068 | const int n = getc(fd); // <condlen> |
1069 | if (n < 0 || n >= MAXWLEN) { |
1070 | return SP_FORMERROR; |
1071 | } |
1072 | |
1073 | // When <condlen> is zero we have an empty condition. Otherwise |
1074 | // compile the regexp program used to check for the condition. |
1075 | if (n > 0) { |
1076 | char buf[MAXWLEN + 1]; |
1077 | buf[0] = '^'; // always match at one position only |
1078 | SPELL_READ_NONNUL_BYTES(buf + 1, (size_t)n, fd, ;); |
1079 | buf[n + 1] = NUL; |
1080 | lp->sl_prefprog[i] = vim_regcomp((char_u *)buf, RE_MAGIC | RE_STRING); |
1081 | } |
1082 | } |
1083 | return 0; |
1084 | } |
1085 | |
1086 | // Read REP or REPSAL items section from "fd": <repcount> <rep> ... |
1087 | // Return SP_*ERROR flags. |
1088 | static int read_rep_section(FILE *fd, garray_T *gap, int16_t *first) |
1089 | { |
1090 | int cnt; |
1091 | fromto_T *ftp; |
1092 | |
1093 | cnt = get2c(fd); // <repcount> |
1094 | if (cnt < 0) |
1095 | return SP_TRUNCERROR; |
1096 | |
1097 | ga_grow(gap, cnt); |
1098 | |
1099 | // <rep> : <repfromlen> <repfrom> <reptolen> <repto> |
1100 | for (; gap->ga_len < cnt; ++gap->ga_len) { |
1101 | int c; |
1102 | ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; |
1103 | ftp->ft_from = read_cnt_string(fd, 1, &c); |
1104 | if (c < 0) |
1105 | return c; |
1106 | if (c == 0) |
1107 | return SP_FORMERROR; |
1108 | ftp->ft_to = read_cnt_string(fd, 1, &c); |
1109 | if (c <= 0) { |
1110 | xfree(ftp->ft_from); |
1111 | if (c < 0) |
1112 | return c; |
1113 | return SP_FORMERROR; |
1114 | } |
1115 | } |
1116 | |
1117 | // Fill the first-index table. |
1118 | for (int i = 0; i < 256; ++i) { |
1119 | first[i] = -1; |
1120 | } |
1121 | for (int i = 0; i < gap->ga_len; ++i) { |
1122 | ftp = &((fromto_T *)gap->ga_data)[i]; |
1123 | if (first[*ftp->ft_from] == -1) |
1124 | first[*ftp->ft_from] = i; |
1125 | } |
1126 | return 0; |
1127 | } |
1128 | |
1129 | // Read SN_SAL section: <salflags> <salcount> <sal> ... |
1130 | // Return SP_*ERROR flags. |
1131 | static int read_sal_section(FILE *fd, slang_T *slang) |
1132 | { |
1133 | int cnt; |
1134 | garray_T *gap; |
1135 | salitem_T *smp; |
1136 | int ccnt; |
1137 | char_u *p; |
1138 | int c = NUL; |
1139 | |
1140 | slang->sl_sofo = false; |
1141 | |
1142 | const int flags = getc(fd); // <salflags> |
1143 | if (flags & SAL_F0LLOWUP) { |
1144 | slang->sl_followup = true; |
1145 | } |
1146 | if (flags & SAL_COLLAPSE) { |
1147 | slang->sl_collapse = true; |
1148 | } |
1149 | if (flags & SAL_REM_ACCENTS) { |
1150 | slang->sl_rem_accents = true; |
1151 | } |
1152 | |
1153 | cnt = get2c(fd); // <salcount> |
1154 | if (cnt < 0) |
1155 | return SP_TRUNCERROR; |
1156 | |
1157 | gap = &slang->sl_sal; |
1158 | ga_init(gap, sizeof(salitem_T), 10); |
1159 | ga_grow(gap, cnt + 1); |
1160 | |
1161 | // <sal> : <salfromlen> <salfrom> <saltolen> <salto> |
1162 | for (; gap->ga_len < cnt; ++gap->ga_len) { |
1163 | smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; |
1164 | ccnt = getc(fd); // <salfromlen> |
1165 | if (ccnt < 0) |
1166 | return SP_TRUNCERROR; |
1167 | p = xmalloc(ccnt + 2); |
1168 | smp->sm_lead = p; |
1169 | |
1170 | // Read up to the first special char into sm_lead. |
1171 | int i = 0; |
1172 | for (; i < ccnt; ++i) { |
1173 | c = getc(fd); // <salfrom> |
1174 | if (vim_strchr((char_u *)"0123456789(-<^$" , c) != NULL) |
1175 | break; |
1176 | *p++ = c; |
1177 | } |
1178 | smp->sm_leadlen = (int)(p - smp->sm_lead); |
1179 | *p++ = NUL; |
1180 | |
1181 | // Put (abc) chars in sm_oneof, if any. |
1182 | if (c == '(') { |
1183 | smp->sm_oneof = p; |
1184 | for (++i; i < ccnt; ++i) { |
1185 | c = getc(fd); // <salfrom> |
1186 | if (c == ')') |
1187 | break; |
1188 | *p++ = c; |
1189 | } |
1190 | *p++ = NUL; |
1191 | if (++i < ccnt) |
1192 | c = getc(fd); |
1193 | } else |
1194 | smp->sm_oneof = NULL; |
1195 | |
1196 | // Any following chars go in sm_rules. |
1197 | smp->sm_rules = p; |
1198 | if (i < ccnt) { |
1199 | // store the char we got while checking for end of sm_lead |
1200 | *p++ = c; |
1201 | } |
1202 | i++; |
1203 | if (i < ccnt) { |
1204 | SPELL_READ_NONNUL_BYTES( // <salfrom> |
1205 | (char *)p, (size_t)(ccnt - i), fd, xfree(smp->sm_lead)); |
1206 | p += (ccnt - i); |
1207 | } |
1208 | *p++ = NUL; |
1209 | |
1210 | // <saltolen> <salto> |
1211 | smp->sm_to = read_cnt_string(fd, 1, &ccnt); |
1212 | if (ccnt < 0) { |
1213 | xfree(smp->sm_lead); |
1214 | return ccnt; |
1215 | } |
1216 | |
1217 | if (has_mbyte) { |
1218 | // convert the multi-byte strings to wide char strings |
1219 | smp->sm_lead_w = mb_str2wide(smp->sm_lead); |
1220 | smp->sm_leadlen = mb_charlen(smp->sm_lead); |
1221 | if (smp->sm_oneof == NULL) |
1222 | smp->sm_oneof_w = NULL; |
1223 | else |
1224 | smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); |
1225 | if (smp->sm_to == NULL) |
1226 | smp->sm_to_w = NULL; |
1227 | else |
1228 | smp->sm_to_w = mb_str2wide(smp->sm_to); |
1229 | } |
1230 | } |
1231 | |
1232 | if (!GA_EMPTY(gap)) { |
1233 | // Add one extra entry to mark the end with an empty sm_lead. Avoids |
1234 | // that we need to check the index every time. |
1235 | smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; |
1236 | p = xmalloc(1); |
1237 | p[0] = NUL; |
1238 | smp->sm_lead = p; |
1239 | smp->sm_leadlen = 0; |
1240 | smp->sm_oneof = NULL; |
1241 | smp->sm_rules = p; |
1242 | smp->sm_to = NULL; |
1243 | if (has_mbyte) { |
1244 | smp->sm_lead_w = mb_str2wide(smp->sm_lead); |
1245 | smp->sm_leadlen = 0; |
1246 | smp->sm_oneof_w = NULL; |
1247 | smp->sm_to_w = NULL; |
1248 | } |
1249 | ++gap->ga_len; |
1250 | } |
1251 | |
1252 | // Fill the first-index table. |
1253 | set_sal_first(slang); |
1254 | |
1255 | return 0; |
1256 | } |
1257 | |
1258 | // Read SN_WORDS: <word> ... |
1259 | // Return SP_*ERROR flags. |
1260 | static int read_words_section(FILE *fd, slang_T *lp, int len) |
1261 | { |
1262 | int done = 0; |
1263 | int i; |
1264 | int c; |
1265 | char_u word[MAXWLEN]; |
1266 | |
1267 | while (done < len) { |
1268 | // Read one word at a time. |
1269 | for (i = 0;; ++i) { |
1270 | c = getc(fd); |
1271 | if (c == EOF) |
1272 | return SP_TRUNCERROR; |
1273 | word[i] = c; |
1274 | if (word[i] == NUL) |
1275 | break; |
1276 | if (i == MAXWLEN - 1) |
1277 | return SP_FORMERROR; |
1278 | } |
1279 | |
1280 | // Init the count to 10. |
1281 | count_common_word(lp, word, -1, 10); |
1282 | done += i + 1; |
1283 | } |
1284 | return 0; |
1285 | } |
1286 | |
1287 | // SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> |
1288 | // Return SP_*ERROR flags. |
1289 | static int read_sofo_section(FILE *fd, slang_T *slang) |
1290 | { |
1291 | int cnt; |
1292 | char_u *from, *to; |
1293 | int res; |
1294 | |
1295 | slang->sl_sofo = true; |
1296 | |
1297 | // <sofofromlen> <sofofrom> |
1298 | from = read_cnt_string(fd, 2, &cnt); |
1299 | if (cnt < 0) |
1300 | return cnt; |
1301 | |
1302 | // <sofotolen> <sofoto> |
1303 | to = read_cnt_string(fd, 2, &cnt); |
1304 | if (cnt < 0) { |
1305 | xfree(from); |
1306 | return cnt; |
1307 | } |
1308 | |
1309 | // Store the info in slang->sl_sal and/or slang->sl_sal_first. |
1310 | if (from != NULL && to != NULL) |
1311 | res = set_sofo(slang, from, to); |
1312 | else if (from != NULL || to != NULL) |
1313 | res = SP_FORMERROR; // only one of two strings is an error |
1314 | else |
1315 | res = 0; |
1316 | |
1317 | xfree(from); |
1318 | xfree(to); |
1319 | return res; |
1320 | } |
1321 | |
1322 | // Read the compound section from the .spl file: |
1323 | // <compmax> <compminlen> <compsylmax> <compoptions> <compflags> |
1324 | // Returns SP_*ERROR flags. |
1325 | static int read_compound(FILE *fd, slang_T *slang, int len) |
1326 | { |
1327 | int todo = len; |
1328 | int c; |
1329 | int atstart; |
1330 | char_u *pat; |
1331 | char_u *pp; |
1332 | char_u *cp; |
1333 | char_u *ap; |
1334 | char_u *crp; |
1335 | int cnt; |
1336 | garray_T *gap; |
1337 | |
1338 | if (todo < 2) |
1339 | return SP_FORMERROR; // need at least two bytes |
1340 | |
1341 | --todo; |
1342 | c = getc(fd); // <compmax> |
1343 | if (c < 2) |
1344 | c = MAXWLEN; |
1345 | slang->sl_compmax = c; |
1346 | |
1347 | --todo; |
1348 | c = getc(fd); // <compminlen> |
1349 | if (c < 1) |
1350 | c = 0; |
1351 | slang->sl_compminlen = c; |
1352 | |
1353 | --todo; |
1354 | c = getc(fd); // <compsylmax> |
1355 | if (c < 1) |
1356 | c = MAXWLEN; |
1357 | slang->sl_compsylmax = c; |
1358 | |
1359 | c = getc(fd); // <compoptions> |
1360 | if (c != 0) |
1361 | ungetc(c, fd); // be backwards compatible with Vim 7.0b |
1362 | else { |
1363 | --todo; |
1364 | c = getc(fd); // only use the lower byte for now |
1365 | --todo; |
1366 | slang->sl_compoptions = c; |
1367 | |
1368 | gap = &slang->sl_comppat; |
1369 | c = get2c(fd); // <comppatcount> |
1370 | todo -= 2; |
1371 | ga_init(gap, sizeof(char_u *), c); |
1372 | ga_grow(gap, c); |
1373 | while (--c >= 0) { |
1374 | ((char_u **)(gap->ga_data))[gap->ga_len++] = |
1375 | read_cnt_string(fd, 1, &cnt); |
1376 | // <comppatlen> <comppattext> |
1377 | if (cnt < 0) |
1378 | return cnt; |
1379 | todo -= cnt + 1; |
1380 | } |
1381 | } |
1382 | if (todo < 0) |
1383 | return SP_FORMERROR; |
1384 | |
1385 | // Turn the COMPOUNDRULE items into a regexp pattern: |
1386 | // "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". |
1387 | // Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. |
1388 | // Conversion to utf-8 may double the size. |
1389 | c = todo * 2 + 7; |
1390 | if (enc_utf8) |
1391 | c += todo * 2; |
1392 | pat = xmalloc(c); |
1393 | |
1394 | // We also need a list of all flags that can appear at the start and one |
1395 | // for all flags. |
1396 | cp = xmalloc(todo + 1); |
1397 | slang->sl_compstartflags = cp; |
1398 | *cp = NUL; |
1399 | |
1400 | ap = xmalloc(todo + 1); |
1401 | slang->sl_compallflags = ap; |
1402 | *ap = NUL; |
1403 | |
1404 | // And a list of all patterns in their original form, for checking whether |
1405 | // compounding may work in match_compoundrule(). This is freed when we |
1406 | // encounter a wildcard, the check doesn't work then. |
1407 | crp = xmalloc(todo + 1); |
1408 | slang->sl_comprules = crp; |
1409 | |
1410 | pp = pat; |
1411 | *pp++ = '^'; |
1412 | *pp++ = '\\'; |
1413 | *pp++ = '('; |
1414 | |
1415 | atstart = 1; |
1416 | while (todo-- > 0) { |
1417 | c = getc(fd); // <compflags> |
1418 | if (c == EOF) { |
1419 | xfree(pat); |
1420 | return SP_TRUNCERROR; |
1421 | } |
1422 | |
1423 | // Add all flags to "sl_compallflags". |
1424 | if (vim_strchr((char_u *)"?*+[]/" , c) == NULL |
1425 | && !byte_in_str(slang->sl_compallflags, c)) { |
1426 | *ap++ = c; |
1427 | *ap = NUL; |
1428 | } |
1429 | |
1430 | if (atstart != 0) { |
1431 | // At start of item: copy flags to "sl_compstartflags". For a |
1432 | // [abc] item set "atstart" to 2 and copy up to the ']'. |
1433 | if (c == '[') |
1434 | atstart = 2; |
1435 | else if (c == ']') |
1436 | atstart = 0; |
1437 | else { |
1438 | if (!byte_in_str(slang->sl_compstartflags, c)) { |
1439 | *cp++ = c; |
1440 | *cp = NUL; |
1441 | } |
1442 | if (atstart == 1) |
1443 | atstart = 0; |
1444 | } |
1445 | } |
1446 | |
1447 | // Copy flag to "sl_comprules", unless we run into a wildcard. |
1448 | if (crp != NULL) { |
1449 | if (c == '?' || c == '+' || c == '*') { |
1450 | XFREE_CLEAR(slang->sl_comprules); |
1451 | crp = NULL; |
1452 | } else |
1453 | *crp++ = c; |
1454 | } |
1455 | |
1456 | if (c == '/') { // slash separates two items |
1457 | *pp++ = '\\'; |
1458 | *pp++ = '|'; |
1459 | atstart = 1; |
1460 | } else { // normal char, "[abc]" and '*' are copied as-is |
1461 | if (c == '?' || c == '+' || c == '~') { |
1462 | *pp++ = '\\'; // "a?" becomes "a\?", "a+" becomes "a\+" |
1463 | } |
1464 | pp += utf_char2bytes(c, pp); |
1465 | } |
1466 | } |
1467 | |
1468 | *pp++ = '\\'; |
1469 | *pp++ = ')'; |
1470 | *pp++ = '$'; |
1471 | *pp = NUL; |
1472 | |
1473 | if (crp != NULL) |
1474 | *crp = NUL; |
1475 | |
1476 | slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); |
1477 | xfree(pat); |
1478 | if (slang->sl_compprog == NULL) |
1479 | return SP_FORMERROR; |
1480 | |
1481 | return 0; |
1482 | } |
1483 | |
1484 | // Set the SOFOFROM and SOFOTO items in language "lp". |
1485 | // Returns SP_*ERROR flags when there is something wrong. |
1486 | static int set_sofo(slang_T *lp, char_u *from, char_u *to) |
1487 | { |
1488 | int i; |
1489 | |
1490 | garray_T *gap; |
1491 | char_u *s; |
1492 | char_u *p; |
1493 | int c; |
1494 | int *inp; |
1495 | |
1496 | if (has_mbyte) { |
1497 | // Use "sl_sal" as an array with 256 pointers to a list of wide |
1498 | // characters. The index is the low byte of the character. |
1499 | // The list contains from-to pairs with a terminating NUL. |
1500 | // sl_sal_first[] is used for latin1 "from" characters. |
1501 | gap = &lp->sl_sal; |
1502 | ga_init(gap, sizeof(int *), 1); |
1503 | ga_grow(gap, 256); |
1504 | memset(gap->ga_data, 0, sizeof(int *) * 256); |
1505 | gap->ga_len = 256; |
1506 | |
1507 | // First count the number of items for each list. Temporarily use |
1508 | // sl_sal_first[] for this. |
1509 | for (p = from, s = to; *p != NUL && *s != NUL; ) { |
1510 | c = mb_cptr2char_adv((const char_u **)&p); |
1511 | MB_CPTR_ADV(s); |
1512 | if (c >= 256) { |
1513 | lp->sl_sal_first[c & 0xff]++; |
1514 | } |
1515 | } |
1516 | if (*p != NUL || *s != NUL) // lengths differ |
1517 | return SP_FORMERROR; |
1518 | |
1519 | // Allocate the lists. |
1520 | for (i = 0; i < 256; ++i) |
1521 | if (lp->sl_sal_first[i] > 0) { |
1522 | p = xmalloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); |
1523 | ((int **)gap->ga_data)[i] = (int *)p; |
1524 | *(int *)p = 0; |
1525 | } |
1526 | |
1527 | // Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal |
1528 | // list. |
1529 | memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); |
1530 | for (p = from, s = to; *p != NUL && *s != NUL; ) { |
1531 | c = mb_cptr2char_adv((const char_u **)&p); |
1532 | i = mb_cptr2char_adv((const char_u **)&s); |
1533 | if (c >= 256) { |
1534 | // Append the from-to chars at the end of the list with |
1535 | // the low byte. |
1536 | inp = ((int **)gap->ga_data)[c & 0xff]; |
1537 | while (*inp != 0) |
1538 | ++inp; |
1539 | *inp++ = c; // from char |
1540 | *inp++ = i; // to char |
1541 | *inp++ = NUL; // NUL at the end |
1542 | } else |
1543 | // mapping byte to char is done in sl_sal_first[] |
1544 | lp->sl_sal_first[c] = i; |
1545 | } |
1546 | } else { |
1547 | // mapping bytes to bytes is done in sl_sal_first[] |
1548 | if (STRLEN(from) != STRLEN(to)) |
1549 | return SP_FORMERROR; |
1550 | |
1551 | for (i = 0; to[i] != NUL; ++i) |
1552 | lp->sl_sal_first[from[i]] = to[i]; |
1553 | lp->sl_sal.ga_len = 1; // indicates we have soundfolding |
1554 | } |
1555 | |
1556 | return 0; |
1557 | } |
1558 | |
1559 | // Fill the first-index table for "lp". |
1560 | static void set_sal_first(slang_T *lp) |
1561 | { |
1562 | salfirst_T *sfirst; |
1563 | salitem_T *smp; |
1564 | int c; |
1565 | garray_T *gap = &lp->sl_sal; |
1566 | |
1567 | sfirst = lp->sl_sal_first; |
1568 | for (int i = 0; i < 256; ++i) { |
1569 | sfirst[i] = -1; |
1570 | } |
1571 | smp = (salitem_T *)gap->ga_data; |
1572 | for (int i = 0; i < gap->ga_len; ++i) { |
1573 | if (has_mbyte) |
1574 | // Use the lowest byte of the first character. For latin1 it's |
1575 | // the character, for other encodings it should differ for most |
1576 | // characters. |
1577 | c = *smp[i].sm_lead_w & 0xff; |
1578 | else |
1579 | c = *smp[i].sm_lead; |
1580 | if (sfirst[c] == -1) { |
1581 | sfirst[c] = i; |
1582 | if (has_mbyte) { |
1583 | int n; |
1584 | |
1585 | // Make sure all entries with this byte are following each |
1586 | // other. Move the ones that are in the wrong position. Do |
1587 | // keep the same ordering! |
1588 | while (i + 1 < gap->ga_len |
1589 | && (*smp[i + 1].sm_lead_w & 0xff) == c) |
1590 | // Skip over entry with same index byte. |
1591 | ++i; |
1592 | |
1593 | for (n = 1; i + n < gap->ga_len; ++n) |
1594 | if ((*smp[i + n].sm_lead_w & 0xff) == c) { |
1595 | salitem_T tsal; |
1596 | |
1597 | // Move entry with same index byte after the entries |
1598 | // we already found. |
1599 | ++i; |
1600 | --n; |
1601 | tsal = smp[i + n]; |
1602 | memmove(smp + i + 1, smp + i, |
1603 | sizeof(salitem_T) * n); |
1604 | smp[i] = tsal; |
1605 | } |
1606 | } |
1607 | } |
1608 | } |
1609 | } |
1610 | |
1611 | // Turn a multi-byte string into a wide character string. |
1612 | // Return it in allocated memory. |
1613 | static int *mb_str2wide(char_u *s) |
1614 | { |
1615 | int i = 0; |
1616 | |
1617 | int *res = xmalloc((mb_charlen(s) + 1) * sizeof(int)); |
1618 | for (char_u *p = s; *p != NUL; ) { |
1619 | res[i++] = mb_ptr2char_adv((const char_u **)&p); |
1620 | } |
1621 | res[i] = NUL; |
1622 | |
1623 | return res; |
1624 | } |
1625 | |
1626 | // Reads a tree from the .spl or .sug file. |
1627 | // Allocates the memory and stores pointers in "bytsp" and "idxsp". |
1628 | // This is skipped when the tree has zero length. |
1629 | // Returns zero when OK, SP_ value for an error. |
1630 | static int |
1631 | spell_read_tree ( |
1632 | FILE *fd, |
1633 | char_u **bytsp, |
1634 | idx_T **idxsp, |
1635 | bool prefixtree, // true for the prefix tree |
1636 | int prefixcnt // when "prefixtree" is true: prefix count |
1637 | ) |
1638 | { |
1639 | int idx; |
1640 | char_u *bp; |
1641 | idx_T *ip; |
1642 | |
1643 | // The tree size was computed when writing the file, so that we can |
1644 | // allocate it as one long block. <nodecount> |
1645 | long len = get4c(fd); |
1646 | if (len < 0) { |
1647 | return SP_TRUNCERROR; |
1648 | } |
1649 | if ((size_t)len >= SIZE_MAX / sizeof(int)) { // -V547 |
1650 | // Invalid length, multiply with sizeof(int) would overflow. |
1651 | return SP_FORMERROR; |
1652 | } |
1653 | if (len > 0) { |
1654 | // Allocate the byte array. |
1655 | bp = xmalloc(len); |
1656 | *bytsp = bp; |
1657 | |
1658 | // Allocate the index array. |
1659 | ip = xcalloc(len, sizeof(*ip)); |
1660 | *idxsp = ip; |
1661 | |
1662 | // Recursively read the tree and store it in the array. |
1663 | idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); |
1664 | if (idx < 0) |
1665 | return idx; |
1666 | } |
1667 | return 0; |
1668 | } |
1669 | |
1670 | // Read one row of siblings from the spell file and store it in the byte array |
1671 | // "byts" and index array "idxs". Recursively read the children. |
1672 | // |
1673 | // NOTE: The code here must match put_node()! |
1674 | // |
1675 | // Returns the index (>= 0) following the siblings. |
1676 | // Returns SP_TRUNCERROR if the file is shorter than expected. |
1677 | // Returns SP_FORMERROR if there is a format error. |
1678 | static idx_T |
1679 | read_tree_node ( |
1680 | FILE *fd, |
1681 | char_u *byts, |
1682 | idx_T *idxs, |
1683 | int maxidx, // size of arrays |
1684 | idx_T startidx, // current index in "byts" and "idxs" |
1685 | bool prefixtree, // true for reading PREFIXTREE |
1686 | int maxprefcondnr // maximum for <prefcondnr> |
1687 | ) |
1688 | { |
1689 | int len; |
1690 | int i; |
1691 | int n; |
1692 | idx_T idx = startidx; |
1693 | int c; |
1694 | int c2; |
1695 | #define SHARED_MASK 0x8000000 |
1696 | |
1697 | len = getc(fd); // <siblingcount> |
1698 | if (len <= 0) |
1699 | return SP_TRUNCERROR; |
1700 | |
1701 | if (startidx + len >= maxidx) |
1702 | return SP_FORMERROR; |
1703 | byts[idx++] = len; |
1704 | |
1705 | // Read the byte values, flag/region bytes and shared indexes. |
1706 | for (i = 1; i <= len; ++i) { |
1707 | c = getc(fd); // <byte> |
1708 | if (c < 0) |
1709 | return SP_TRUNCERROR; |
1710 | if (c <= BY_SPECIAL) { |
1711 | if (c == BY_NOFLAGS && !prefixtree) { |
1712 | // No flags, all regions. |
1713 | idxs[idx] = 0; |
1714 | c = 0; |
1715 | } else if (c != BY_INDEX) { |
1716 | if (prefixtree) { |
1717 | // Read the optional pflags byte, the prefix ID and the |
1718 | // condition nr. In idxs[] store the prefix ID in the low |
1719 | // byte, the condition index shifted up 8 bits, the flags |
1720 | // shifted up 24 bits. |
1721 | if (c == BY_FLAGS) |
1722 | c = getc(fd) << 24; // <pflags> |
1723 | else |
1724 | c = 0; |
1725 | |
1726 | c |= getc(fd); // <affixID> |
1727 | |
1728 | n = get2c(fd); // <prefcondnr> |
1729 | if (n >= maxprefcondnr) |
1730 | return SP_FORMERROR; |
1731 | c |= (n << 8); |
1732 | } else { // c must be BY_FLAGS or BY_FLAGS2 |
1733 | // Read flags and optional region and prefix ID. In |
1734 | // idxs[] the flags go in the low two bytes, region above |
1735 | // that and prefix ID above the region. |
1736 | c2 = c; |
1737 | c = getc(fd); // <flags> |
1738 | if (c2 == BY_FLAGS2) |
1739 | c = (getc(fd) << 8) + c; // <flags2> |
1740 | if (c & WF_REGION) |
1741 | c = (getc(fd) << 16) + c; // <region> |
1742 | if (c & WF_AFX) |
1743 | c = (getc(fd) << 24) + c; // <affixID> |
1744 | } |
1745 | |
1746 | idxs[idx] = c; |
1747 | c = 0; |
1748 | } else { // c == BY_INDEX |
1749 | // <nodeidx> |
1750 | n = get3c(fd); |
1751 | if (n < 0 || n >= maxidx) |
1752 | return SP_FORMERROR; |
1753 | idxs[idx] = n + SHARED_MASK; |
1754 | c = getc(fd); // <xbyte> |
1755 | } |
1756 | } |
1757 | byts[idx++] = c; |
1758 | } |
1759 | |
1760 | // Recursively read the children for non-shared siblings. |
1761 | // Skip the end-of-word ones (zero byte value) and the shared ones (and |
1762 | // remove SHARED_MASK) |
1763 | for (i = 1; i <= len; ++i) |
1764 | if (byts[startidx + i] != 0) { |
1765 | if (idxs[startidx + i] & SHARED_MASK) |
1766 | idxs[startidx + i] &= ~SHARED_MASK; |
1767 | else { |
1768 | idxs[startidx + i] = idx; |
1769 | idx = read_tree_node(fd, byts, idxs, maxidx, idx, |
1770 | prefixtree, maxprefcondnr); |
1771 | if (idx < 0) |
1772 | break; |
1773 | } |
1774 | } |
1775 | |
1776 | return idx; |
1777 | } |
1778 | |
1779 | // Reload the spell file "fname" if it's loaded. |
1780 | static void |
1781 | spell_reload_one ( |
1782 | char_u *fname, |
1783 | bool added_word // invoked through "zg" |
1784 | ) |
1785 | { |
1786 | slang_T *slang; |
1787 | bool didit = false; |
1788 | |
1789 | for (slang = first_lang; slang != NULL; slang = slang->sl_next) { |
1790 | if (path_full_compare(fname, slang->sl_fname, false) == kEqualFiles) { |
1791 | slang_clear(slang); |
1792 | if (spell_load_file(fname, NULL, slang, false) == NULL) |
1793 | // reloading failed, clear the language |
1794 | slang_clear(slang); |
1795 | redraw_all_later(SOME_VALID); |
1796 | didit = true; |
1797 | } |
1798 | } |
1799 | |
1800 | // When "zg" was used and the file wasn't loaded yet, should redo |
1801 | // 'spelllang' to load it now. |
1802 | if (added_word && !didit) |
1803 | did_set_spelllang(curwin); |
1804 | } |
1805 | |
1806 | // Functions for ":mkspell". |
1807 | |
1808 | // In the postponed prefixes tree wn_flags is used to store the WFP_ flags, |
1809 | // but it must be negative to indicate the prefix tree to tree_add_word(). |
1810 | // Use a negative number with the lower 8 bits zero. |
1811 | #define PFX_FLAGS -256 |
1812 | |
1813 | // flags for "condit" argument of store_aff_word() |
1814 | #define CONDIT_COMB 1 // affix must combine |
1815 | #define CONDIT_CFIX 2 // affix must have CIRCUMFIX flag |
1816 | #define CONDIT_SUF 4 // add a suffix for matching flags |
1817 | #define CONDIT_AFF 8 // word already has an affix |
1818 | |
1819 | // Tunable parameters for when the tree is compressed. See 'mkspellmem'. |
1820 | static long compress_start = 30000; // memory / SBLOCKSIZE |
1821 | static long compress_inc = 100; // memory / SBLOCKSIZE |
1822 | static long compress_added = 500000; // word count |
1823 | |
1824 | // Check the 'mkspellmem' option. Return FAIL if it's wrong. |
1825 | // Sets "sps_flags". |
1826 | int spell_check_msm(void) |
1827 | { |
1828 | char_u *p = p_msm; |
1829 | long start = 0; |
1830 | long incr = 0; |
1831 | long added = 0; |
1832 | |
1833 | if (!ascii_isdigit(*p)) |
1834 | return FAIL; |
1835 | // block count = (value * 1024) / SBLOCKSIZE (but avoid overflow) |
1836 | start = (getdigits_long(&p, true, 0) * 10) / (SBLOCKSIZE / 102); |
1837 | if (*p != ',') { |
1838 | return FAIL; |
1839 | } |
1840 | p++; |
1841 | if (!ascii_isdigit(*p)) { |
1842 | return FAIL; |
1843 | } |
1844 | incr = (getdigits_long(&p, true, 0) * 102) / (SBLOCKSIZE / 10); |
1845 | if (*p != ',') { |
1846 | return FAIL; |
1847 | } |
1848 | p++; |
1849 | if (!ascii_isdigit(*p)) { |
1850 | return FAIL; |
1851 | } |
1852 | added = getdigits_long(&p, true, 0) * 1024; |
1853 | if (*p != NUL) { |
1854 | return FAIL; |
1855 | } |
1856 | |
1857 | if (start == 0 || incr == 0 || added == 0 || incr > start) { |
1858 | return FAIL; |
1859 | } |
1860 | |
1861 | compress_start = start; |
1862 | compress_inc = incr; |
1863 | compress_added = added; |
1864 | return OK; |
1865 | } |
1866 | |
1867 | #ifdef SPELL_PRINTTREE |
1868 | // For debugging the tree code: print the current tree in a (more or less) |
1869 | // readable format, so that we can see what happens when adding a word and/or |
1870 | // compressing the tree. |
1871 | // Based on code from Olaf Seibert. |
1872 | #define PRINTLINESIZE 1000 |
1873 | #define PRINTWIDTH 6 |
1874 | |
1875 | #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ |
1876 | PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) |
1877 | |
1878 | static char line1[PRINTLINESIZE]; |
1879 | static char line2[PRINTLINESIZE]; |
1880 | static char line3[PRINTLINESIZE]; |
1881 | |
1882 | static void spell_clear_flags(wordnode_T *node) |
1883 | { |
1884 | wordnode_T *np; |
1885 | |
1886 | for (np = node; np != NULL; np = np->wn_sibling) { |
1887 | np->wn_u1.index = FALSE; |
1888 | spell_clear_flags(np->wn_child); |
1889 | } |
1890 | } |
1891 | |
1892 | static void spell_print_node(wordnode_T *node, int depth) |
1893 | { |
1894 | if (node->wn_u1.index) { |
1895 | // Done this node before, print the reference. |
1896 | PRINTSOME(line1, depth, "(%d)" , node->wn_nr, 0); |
1897 | PRINTSOME(line2, depth, " " , 0, 0); |
1898 | PRINTSOME(line3, depth, " " , 0, 0); |
1899 | msg((char_u *)line1); |
1900 | msg((char_u *)line2); |
1901 | msg((char_u *)line3); |
1902 | } else { |
1903 | node->wn_u1.index = TRUE; |
1904 | |
1905 | if (node->wn_byte != NUL) { |
1906 | if (node->wn_child != NULL) |
1907 | PRINTSOME(line1, depth, " %c -> " , node->wn_byte, 0); |
1908 | else |
1909 | // Cannot happen? |
1910 | PRINTSOME(line1, depth, " %c ???" , node->wn_byte, 0); |
1911 | } else |
1912 | PRINTSOME(line1, depth, " $ " , 0, 0); |
1913 | |
1914 | PRINTSOME(line2, depth, "%d/%d " , node->wn_nr, node->wn_refs); |
1915 | |
1916 | if (node->wn_sibling != NULL) |
1917 | PRINTSOME(line3, depth, " | " , 0, 0); |
1918 | else |
1919 | PRINTSOME(line3, depth, " " , 0, 0); |
1920 | |
1921 | if (node->wn_byte == NUL) { |
1922 | msg((char_u *)line1); |
1923 | msg((char_u *)line2); |
1924 | msg((char_u *)line3); |
1925 | } |
1926 | |
1927 | // do the children |
1928 | if (node->wn_byte != NUL && node->wn_child != NULL) |
1929 | spell_print_node(node->wn_child, depth + 1); |
1930 | |
1931 | // do the siblings |
1932 | if (node->wn_sibling != NULL) { |
1933 | // get rid of all parent details except | |
1934 | STRCPY(line1, line3); |
1935 | STRCPY(line2, line3); |
1936 | spell_print_node(node->wn_sibling, depth); |
1937 | } |
1938 | } |
1939 | } |
1940 | |
1941 | static void spell_print_tree(wordnode_T *root) |
1942 | { |
1943 | if (root != NULL) { |
1944 | // Clear the "wn_u1.index" fields, used to remember what has been |
1945 | // done. |
1946 | spell_clear_flags(root); |
1947 | |
1948 | // Recursively print the tree. |
1949 | spell_print_node(root, 0); |
1950 | } |
1951 | } |
1952 | |
1953 | #endif // SPELL_PRINTTREE |
1954 | |
1955 | // Reads the affix file "fname". |
1956 | // Returns an afffile_T, NULL for complete failure. |
1957 | static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname) |
1958 | { |
1959 | FILE *fd; |
1960 | char_u rline[MAXLINELEN]; |
1961 | char_u *line; |
1962 | char_u *pc = NULL; |
1963 | #define MAXITEMCNT 30 |
1964 | char_u *(items[MAXITEMCNT]); |
1965 | int itemcnt; |
1966 | char_u *p; |
1967 | int lnum = 0; |
1968 | affheader_T *cur_aff = NULL; |
1969 | bool did_postpone_prefix = false; |
1970 | int aff_todo = 0; |
1971 | hashtab_T *tp; |
1972 | char_u *low = NULL; |
1973 | char_u *fol = NULL; |
1974 | char_u *upp = NULL; |
1975 | int do_rep; |
1976 | int do_repsal; |
1977 | int do_sal; |
1978 | int do_mapline; |
1979 | bool found_map = false; |
1980 | hashitem_T *hi; |
1981 | int l; |
1982 | int compminlen = 0; // COMPOUNDMIN value |
1983 | int compsylmax = 0; // COMPOUNDSYLMAX value |
1984 | int compoptions = 0; // COMP_ flags |
1985 | int compmax = 0; // COMPOUNDWORDMAX value |
1986 | char_u *compflags = NULL; // COMPOUNDFLAG and COMPOUNDRULE |
1987 | // concatenated |
1988 | char_u *midword = NULL; // MIDWORD value |
1989 | char_u *syllable = NULL; // SYLLABLE value |
1990 | char_u *sofofrom = NULL; // SOFOFROM value |
1991 | char_u *sofoto = NULL; // SOFOTO value |
1992 | |
1993 | // Open the file. |
1994 | fd = os_fopen((char *)fname, "r" ); |
1995 | if (fd == NULL) { |
1996 | EMSG2(_(e_notopen), fname); |
1997 | return NULL; |
1998 | } |
1999 | |
2000 | vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s..." ), fname); |
2001 | spell_message(spin, IObuff); |
2002 | |
2003 | // Only do REP lines when not done in another .aff file already. |
2004 | do_rep = GA_EMPTY(&spin->si_rep); |
2005 | |
2006 | // Only do REPSAL lines when not done in another .aff file already. |
2007 | do_repsal = GA_EMPTY(&spin->si_repsal); |
2008 | |
2009 | // Only do SAL lines when not done in another .aff file already. |
2010 | do_sal = GA_EMPTY(&spin->si_sal); |
2011 | |
2012 | // Only do MAP lines when not done in another .aff file already. |
2013 | do_mapline = GA_EMPTY(&spin->si_map); |
2014 | |
2015 | // Allocate and init the afffile_T structure. |
2016 | afffile_T *aff = getroom(spin, sizeof(*aff), true); |
2017 | hash_init(&aff->af_pref); |
2018 | hash_init(&aff->af_suff); |
2019 | hash_init(&aff->af_comp); |
2020 | |
2021 | // Read all the lines in the file one by one. |
2022 | while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) { |
2023 | line_breakcheck(); |
2024 | ++lnum; |
2025 | |
2026 | // Skip comment lines. |
2027 | if (*rline == '#') |
2028 | continue; |
2029 | |
2030 | // Convert from "SET" to 'encoding' when needed. |
2031 | xfree(pc); |
2032 | if (spin->si_conv.vc_type != CONV_NONE) { |
2033 | pc = string_convert(&spin->si_conv, rline, NULL); |
2034 | if (pc == NULL) { |
2035 | smsg(_("Conversion failure for word in %s line %d: %s" ), |
2036 | fname, lnum, rline); |
2037 | continue; |
2038 | } |
2039 | line = pc; |
2040 | } else { |
2041 | pc = NULL; |
2042 | line = rline; |
2043 | } |
2044 | |
2045 | // Split the line up in white separated items. Put a NUL after each |
2046 | // item. |
2047 | itemcnt = 0; |
2048 | for (p = line;; ) { |
2049 | while (*p != NUL && *p <= ' ') // skip white space and CR/NL |
2050 | ++p; |
2051 | if (*p == NUL) |
2052 | break; |
2053 | if (itemcnt == MAXITEMCNT) // too many items |
2054 | break; |
2055 | items[itemcnt++] = p; |
2056 | // A few items have arbitrary text argument, don't split them. |
2057 | if (itemcnt == 2 && spell_info_item(items[0])) |
2058 | while (*p >= ' ' || *p == TAB) // skip until CR/NL |
2059 | ++p; |
2060 | else |
2061 | while (*p > ' ') // skip until white space or CR/NL |
2062 | ++p; |
2063 | if (*p == NUL) |
2064 | break; |
2065 | *p++ = NUL; |
2066 | } |
2067 | |
2068 | // Handle non-empty lines. |
2069 | if (itemcnt > 0) { |
2070 | if (is_aff_rule(items, itemcnt, "SET" , 2) && aff->af_enc == NULL) { |
2071 | // Setup for conversion from "ENC" to 'encoding'. |
2072 | aff->af_enc = enc_canonize(items[1]); |
2073 | if (!spin->si_ascii |
2074 | && convert_setup(&spin->si_conv, aff->af_enc, |
2075 | p_enc) == FAIL) |
2076 | smsg(_("Conversion in %s not supported: from %s to %s" ), |
2077 | fname, aff->af_enc, p_enc); |
2078 | spin->si_conv.vc_fail = true; |
2079 | } else if (is_aff_rule(items, itemcnt, "FLAG" , 2) |
2080 | && aff->af_flagtype == AFT_CHAR) { |
2081 | if (STRCMP(items[1], "long" ) == 0) |
2082 | aff->af_flagtype = AFT_LONG; |
2083 | else if (STRCMP(items[1], "num" ) == 0) |
2084 | aff->af_flagtype = AFT_NUM; |
2085 | else if (STRCMP(items[1], "caplong" ) == 0) |
2086 | aff->af_flagtype = AFT_CAPLONG; |
2087 | else |
2088 | smsg(_("Invalid value for FLAG in %s line %d: %s" ), |
2089 | fname, lnum, items[1]); |
2090 | if (aff->af_rare != 0 |
2091 | || aff->af_keepcase != 0 |
2092 | || aff->af_bad != 0 |
2093 | || aff->af_needaffix != 0 |
2094 | || aff->af_circumfix != 0 |
2095 | || aff->af_needcomp != 0 |
2096 | || aff->af_comproot != 0 |
2097 | || aff->af_nosuggest != 0 |
2098 | || compflags != NULL |
2099 | || aff->af_suff.ht_used > 0 |
2100 | || aff->af_pref.ht_used > 0) |
2101 | smsg(_("FLAG after using flags in %s line %d: %s" ), |
2102 | fname, lnum, items[1]); |
2103 | } else if (spell_info_item(items[0]) && itemcnt > 1) { |
2104 | p = getroom(spin, |
2105 | (spin->si_info == NULL ? 0 : STRLEN(spin->si_info)) |
2106 | + STRLEN(items[0]) |
2107 | + STRLEN(items[1]) + 3, false); |
2108 | if (spin->si_info != NULL) { |
2109 | STRCPY(p, spin->si_info); |
2110 | STRCAT(p, "\n" ); |
2111 | } |
2112 | STRCAT(p, items[0]); |
2113 | STRCAT(p, " " ); |
2114 | STRCAT(p, items[1]); |
2115 | spin->si_info = p; |
2116 | } else if (is_aff_rule(items, itemcnt, "MIDWORD" , 2) |
2117 | && midword == NULL) { |
2118 | midword = getroom_save(spin, items[1]); |
2119 | } else if (is_aff_rule(items, itemcnt, "TRY" , 2)) { |
2120 | // ignored, we look in the tree for what chars may appear |
2121 | } |
2122 | // TODO: remove "RAR" later |
2123 | else if ((is_aff_rule(items, itemcnt, "RAR" , 2) |
2124 | || is_aff_rule(items, itemcnt, "RARE" , 2)) |
2125 | && aff->af_rare == 0) { |
2126 | aff->af_rare = affitem2flag(aff->af_flagtype, items[1], |
2127 | fname, lnum); |
2128 | } |
2129 | // TODO: remove "KEP" later |
2130 | else if ((is_aff_rule(items, itemcnt, "KEP" , 2) |
2131 | || is_aff_rule(items, itemcnt, "KEEPCASE" , 2)) |
2132 | && aff->af_keepcase == 0) { |
2133 | aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1], |
2134 | fname, lnum); |
2135 | } else if ((is_aff_rule(items, itemcnt, "BAD" , 2) |
2136 | || is_aff_rule(items, itemcnt, "FORBIDDENWORD" , 2)) |
2137 | && aff->af_bad == 0) { |
2138 | aff->af_bad = affitem2flag(aff->af_flagtype, items[1], |
2139 | fname, lnum); |
2140 | } else if (is_aff_rule(items, itemcnt, "NEEDAFFIX" , 2) |
2141 | && aff->af_needaffix == 0) { |
2142 | aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], |
2143 | fname, lnum); |
2144 | } else if (is_aff_rule(items, itemcnt, "CIRCUMFIX" , 2) |
2145 | && aff->af_circumfix == 0) { |
2146 | aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1], |
2147 | fname, lnum); |
2148 | } else if (is_aff_rule(items, itemcnt, "NOSUGGEST" , 2) |
2149 | && aff->af_nosuggest == 0) { |
2150 | aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1], |
2151 | fname, lnum); |
2152 | } else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND" , 2) |
2153 | || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND" , 2)) |
2154 | && aff->af_needcomp == 0) { |
2155 | aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], |
2156 | fname, lnum); |
2157 | } else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT" , 2) |
2158 | && aff->af_comproot == 0) { |
2159 | aff->af_comproot = affitem2flag(aff->af_flagtype, items[1], |
2160 | fname, lnum); |
2161 | } else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG" , 2) |
2162 | && aff->af_compforbid == 0) { |
2163 | aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1], |
2164 | fname, lnum); |
2165 | if (aff->af_pref.ht_used > 0) |
2166 | smsg(_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d" ), |
2167 | fname, lnum); |
2168 | } else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG" , 2) |
2169 | && aff->af_comppermit == 0) { |
2170 | aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1], |
2171 | fname, lnum); |
2172 | if (aff->af_pref.ht_used > 0) |
2173 | smsg(_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d" ), |
2174 | fname, lnum); |
2175 | } else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG" , 2) |
2176 | && compflags == NULL) { |
2177 | // Turn flag "c" into COMPOUNDRULE compatible string "c+", |
2178 | // "Na" into "Na+", "1234" into "1234+". |
2179 | p = getroom(spin, STRLEN(items[1]) + 2, false); |
2180 | STRCPY(p, items[1]); |
2181 | STRCAT(p, "+" ); |
2182 | compflags = p; |
2183 | } else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES" , 2)) { |
2184 | // We don't use the count, but do check that it's a number and |
2185 | // not COMPOUNDRULE mistyped. |
2186 | if (atoi((char *)items[1]) == 0) |
2187 | smsg(_("Wrong COMPOUNDRULES value in %s line %d: %s" ), |
2188 | fname, lnum, items[1]); |
2189 | } else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE" , 2)) { |
2190 | // Don't use the first rule if it is a number. |
2191 | if (compflags != NULL || *skipdigits(items[1]) != NUL) { |
2192 | // Concatenate this string to previously defined ones, |
2193 | // using a slash to separate them. |
2194 | l = (int)STRLEN(items[1]) + 1; |
2195 | if (compflags != NULL) |
2196 | l += (int)STRLEN(compflags) + 1; |
2197 | p = getroom(spin, l, false); |
2198 | if (compflags != NULL) { |
2199 | STRCPY(p, compflags); |
2200 | STRCAT(p, "/" ); |
2201 | } |
2202 | STRCAT(p, items[1]); |
2203 | compflags = p; |
2204 | } |
2205 | } else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX" , 2) |
2206 | && compmax == 0) { |
2207 | compmax = atoi((char *)items[1]); |
2208 | if (compmax == 0) |
2209 | smsg(_("Wrong COMPOUNDWORDMAX value in %s line %d: %s" ), |
2210 | fname, lnum, items[1]); |
2211 | } else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN" , 2) |
2212 | && compminlen == 0) { |
2213 | compminlen = atoi((char *)items[1]); |
2214 | if (compminlen == 0) |
2215 | smsg(_("Wrong COMPOUNDMIN value in %s line %d: %s" ), |
2216 | fname, lnum, items[1]); |
2217 | } else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX" , 2) |
2218 | && compsylmax == 0) { |
2219 | compsylmax = atoi((char *)items[1]); |
2220 | if (compsylmax == 0) |
2221 | smsg(_("Wrong COMPOUNDSYLMAX value in %s line %d: %s" ), |
2222 | fname, lnum, items[1]); |
2223 | } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP" , 1)) { |
2224 | compoptions |= COMP_CHECKDUP; |
2225 | } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP" , 1)) { |
2226 | compoptions |= COMP_CHECKREP; |
2227 | } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE" , 1)) { |
2228 | compoptions |= COMP_CHECKCASE; |
2229 | } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE" , 1)) { |
2230 | compoptions |= COMP_CHECKTRIPLE; |
2231 | } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN" , 2)) { |
2232 | if (atoi((char *)items[1]) == 0) |
2233 | smsg(_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s" ), |
2234 | fname, lnum, items[1]); |
2235 | } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN" , 3)) { |
2236 | garray_T *gap = &spin->si_comppat; |
2237 | int i; |
2238 | |
2239 | // Only add the couple if it isn't already there. |
2240 | for (i = 0; i < gap->ga_len - 1; i += 2) |
2241 | if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0 |
2242 | && STRCMP(((char_u **)(gap->ga_data))[i + 1], |
2243 | items[2]) == 0) |
2244 | break; |
2245 | if (i >= gap->ga_len) { |
2246 | ga_grow(gap, 2); |
2247 | ((char_u **)(gap->ga_data))[gap->ga_len++] |
2248 | = getroom_save(spin, items[1]); |
2249 | ((char_u **)(gap->ga_data))[gap->ga_len++] |
2250 | = getroom_save(spin, items[2]); |
2251 | } |
2252 | } else if (is_aff_rule(items, itemcnt, "SYLLABLE" , 2) |
2253 | && syllable == NULL) { |
2254 | syllable = getroom_save(spin, items[1]); |
2255 | } else if (is_aff_rule(items, itemcnt, "NOBREAK" , 1)) { |
2256 | spin->si_nobreak = true; |
2257 | } else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS" , 1)) { |
2258 | spin->si_nosplitsugs = true; |
2259 | } else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS" , 1)) { |
2260 | spin->si_nocompoundsugs = true; |
2261 | } else if (is_aff_rule(items, itemcnt, "NOSUGFILE" , 1)) { |
2262 | spin->si_nosugfile = true; |
2263 | } else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE" , 1)) { |
2264 | aff->af_pfxpostpone = true; |
2265 | } else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA" , 1)) { |
2266 | aff->af_ignoreextra = true; |
2267 | } else if ((STRCMP(items[0], "PFX" ) == 0 |
2268 | || STRCMP(items[0], "SFX" ) == 0) |
2269 | && aff_todo == 0 |
2270 | && itemcnt >= 4) { |
2271 | int lasti = 4; |
2272 | char_u key[AH_KEY_LEN]; |
2273 | |
2274 | if (*items[0] == 'P') |
2275 | tp = &aff->af_pref; |
2276 | else |
2277 | tp = &aff->af_suff; |
2278 | |
2279 | // Myspell allows the same affix name to be used multiple |
2280 | // times. The affix files that do this have an undocumented |
2281 | // "S" flag on all but the last block, thus we check for that |
2282 | // and store it in ah_follows. |
2283 | STRLCPY(key, items[1], AH_KEY_LEN); |
2284 | hi = hash_find(tp, key); |
2285 | if (!HASHITEM_EMPTY(hi)) { |
2286 | cur_aff = HI2AH(hi); |
2287 | if (cur_aff->ah_combine != (*items[2] == 'Y')) |
2288 | smsg(_("Different combining flag in continued affix block in %s line %d: %s" ), |
2289 | fname, lnum, items[1]); |
2290 | if (!cur_aff->ah_follows) |
2291 | smsg(_("Duplicate affix in %s line %d: %s" ), |
2292 | fname, lnum, items[1]); |
2293 | } else { |
2294 | // New affix letter. |
2295 | cur_aff = getroom(spin, sizeof(*cur_aff), true); |
2296 | cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], |
2297 | fname, lnum); |
2298 | if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) { |
2299 | break; |
2300 | } |
2301 | if (cur_aff->ah_flag == aff->af_bad |
2302 | || cur_aff->ah_flag == aff->af_rare |
2303 | || cur_aff->ah_flag == aff->af_keepcase |
2304 | || cur_aff->ah_flag == aff->af_needaffix |
2305 | || cur_aff->ah_flag == aff->af_circumfix |
2306 | || cur_aff->ah_flag == aff->af_nosuggest |
2307 | || cur_aff->ah_flag == aff->af_needcomp |
2308 | || cur_aff->ah_flag == aff->af_comproot) { |
2309 | smsg(_("Affix also used for " |
2310 | "BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST" |
2311 | "in %s line %d: %s" ), |
2312 | fname, lnum, items[1]); |
2313 | } |
2314 | STRCPY(cur_aff->ah_key, items[1]); |
2315 | hash_add(tp, cur_aff->ah_key); |
2316 | |
2317 | cur_aff->ah_combine = (*items[2] == 'Y'); |
2318 | } |
2319 | |
2320 | // Check for the "S" flag, which apparently means that another |
2321 | // block with the same affix name is following. |
2322 | if (itemcnt > lasti && STRCMP(items[lasti], "S" ) == 0) { |
2323 | ++lasti; |
2324 | cur_aff->ah_follows = true; |
2325 | } else |
2326 | cur_aff->ah_follows = false; |
2327 | |
2328 | // Myspell allows extra text after the item, but that might |
2329 | // mean mistakes go unnoticed. Require a comment-starter, |
2330 | // unless IGNOREEXTRA is used. Hunspell uses a "-" item. |
2331 | if (itemcnt > lasti |
2332 | && !aff->af_ignoreextra |
2333 | && *items[lasti] != '#') |
2334 | smsg(_(e_afftrailing), fname, lnum, items[lasti]); |
2335 | |
2336 | if (STRCMP(items[2], "Y" ) != 0 && STRCMP(items[2], "N" ) != 0) |
2337 | smsg(_("Expected Y or N in %s line %d: %s" ), |
2338 | fname, lnum, items[2]); |
2339 | |
2340 | if (*items[0] == 'P' && aff->af_pfxpostpone) { |
2341 | if (cur_aff->ah_newID == 0) { |
2342 | // Use a new number in the .spl file later, to be able |
2343 | // to handle multiple .aff files. |
2344 | check_renumber(spin); |
2345 | cur_aff->ah_newID = ++spin->si_newprefID; |
2346 | |
2347 | // We only really use ah_newID if the prefix is |
2348 | // postponed. We know that only after handling all |
2349 | // the items. |
2350 | did_postpone_prefix = false; |
2351 | } else |
2352 | // Did use the ID in a previous block. |
2353 | did_postpone_prefix = true; |
2354 | } |
2355 | |
2356 | aff_todo = atoi((char *)items[3]); |
2357 | } else if ((STRCMP(items[0], "PFX" ) == 0 |
2358 | || STRCMP(items[0], "SFX" ) == 0) |
2359 | && aff_todo > 0 |
2360 | && STRCMP(cur_aff->ah_key, items[1]) == 0 |
2361 | && itemcnt >= 5) { |
2362 | affentry_T *aff_entry; |
2363 | bool upper = false; |
2364 | int lasti = 5; |
2365 | |
2366 | // Myspell allows extra text after the item, but that might |
2367 | // mean mistakes go unnoticed. Require a comment-starter. |
2368 | // Hunspell uses a "-" item. |
2369 | if (itemcnt > lasti && *items[lasti] != '#' |
2370 | && (STRCMP(items[lasti], "-" ) != 0 |
2371 | || itemcnt != lasti + 1)) |
2372 | smsg(_(e_afftrailing), fname, lnum, items[lasti]); |
2373 | |
2374 | // New item for an affix letter. |
2375 | aff_todo--; |
2376 | aff_entry = getroom(spin, sizeof(*aff_entry), true); |
2377 | |
2378 | if (STRCMP(items[2], "0" ) != 0) |
2379 | aff_entry->ae_chop = getroom_save(spin, items[2]); |
2380 | if (STRCMP(items[3], "0" ) != 0) { |
2381 | aff_entry->ae_add = getroom_save(spin, items[3]); |
2382 | |
2383 | // Recognize flags on the affix: abcd/XYZ |
2384 | aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/'); |
2385 | if (aff_entry->ae_flags != NULL) { |
2386 | *aff_entry->ae_flags++ = NUL; |
2387 | aff_process_flags(aff, aff_entry); |
2388 | } |
2389 | } |
2390 | |
2391 | // Don't use an affix entry with non-ASCII characters when |
2392 | // "spin->si_ascii" is true. |
2393 | if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) |
2394 | || has_non_ascii(aff_entry->ae_add))) { |
2395 | aff_entry->ae_next = cur_aff->ah_first; |
2396 | cur_aff->ah_first = aff_entry; |
2397 | |
2398 | if (STRCMP(items[4], "." ) != 0) { |
2399 | char_u buf[MAXLINELEN]; |
2400 | |
2401 | aff_entry->ae_cond = getroom_save(spin, items[4]); |
2402 | if (*items[0] == 'P') |
2403 | sprintf((char *)buf, "^%s" , items[4]); |
2404 | else |
2405 | sprintf((char *)buf, "%s$" , items[4]); |
2406 | aff_entry->ae_prog = vim_regcomp(buf, |
2407 | RE_MAGIC + RE_STRING + RE_STRICT); |
2408 | if (aff_entry->ae_prog == NULL) |
2409 | smsg(_("Broken condition in %s line %d: %s" ), |
2410 | fname, lnum, items[4]); |
2411 | } |
2412 | |
2413 | // For postponed prefixes we need an entry in si_prefcond |
2414 | // for the condition. Use an existing one if possible. |
2415 | // Can't be done for an affix with flags, ignoring |
2416 | // COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. |
2417 | if (*items[0] == 'P' && aff->af_pfxpostpone |
2418 | && aff_entry->ae_flags == NULL) { |
2419 | // When the chop string is one lower-case letter and |
2420 | // the add string ends in the upper-case letter we set |
2421 | // the "upper" flag, clear "ae_chop" and remove the |
2422 | // letters from "ae_add". The condition must either |
2423 | // be empty or start with the same letter. |
2424 | if (aff_entry->ae_chop != NULL |
2425 | && aff_entry->ae_add != NULL |
2426 | && aff_entry->ae_chop[(*mb_ptr2len)( |
2427 | aff_entry->ae_chop)] == NUL |
2428 | ) { |
2429 | int c, c_up; |
2430 | |
2431 | c = PTR2CHAR(aff_entry->ae_chop); |
2432 | c_up = SPELL_TOUPPER(c); |
2433 | if (c_up != c |
2434 | && (aff_entry->ae_cond == NULL |
2435 | || PTR2CHAR(aff_entry->ae_cond) == c)) { |
2436 | p = aff_entry->ae_add |
2437 | + STRLEN(aff_entry->ae_add); |
2438 | MB_PTR_BACK(aff_entry->ae_add, p); |
2439 | if (PTR2CHAR(p) == c_up) { |
2440 | upper = true; |
2441 | aff_entry->ae_chop = NULL; |
2442 | *p = NUL; |
2443 | |
2444 | // The condition is matched with the |
2445 | // actual word, thus must check for the |
2446 | // upper-case letter. |
2447 | if (aff_entry->ae_cond != NULL) { |
2448 | char_u buf[MAXLINELEN]; |
2449 | if (has_mbyte) { |
2450 | onecap_copy(items[4], buf, true); |
2451 | aff_entry->ae_cond = getroom_save( |
2452 | spin, buf); |
2453 | } else |
2454 | *aff_entry->ae_cond = c_up; |
2455 | if (aff_entry->ae_cond != NULL) { |
2456 | sprintf((char *)buf, "^%s" , |
2457 | aff_entry->ae_cond); |
2458 | vim_regfree(aff_entry->ae_prog); |
2459 | aff_entry->ae_prog = vim_regcomp( |
2460 | buf, RE_MAGIC + RE_STRING); |
2461 | } |
2462 | } |
2463 | } |
2464 | } |
2465 | } |
2466 | |
2467 | if (aff_entry->ae_chop == NULL) { |
2468 | int idx; |
2469 | char_u **pp; |
2470 | int n; |
2471 | |
2472 | // Find a previously used condition. |
2473 | for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; |
2474 | --idx) { |
2475 | p = ((char_u **)spin->si_prefcond.ga_data)[idx]; |
2476 | if (str_equal(p, aff_entry->ae_cond)) |
2477 | break; |
2478 | } |
2479 | if (idx < 0) { |
2480 | // Not found, add a new condition. |
2481 | idx = spin->si_prefcond.ga_len; |
2482 | pp = GA_APPEND_VIA_PTR(char_u *, &spin->si_prefcond); |
2483 | *pp = (aff_entry->ae_cond == NULL) ? |
2484 | NULL : getroom_save(spin, aff_entry->ae_cond); |
2485 | } |
2486 | |
2487 | // Add the prefix to the prefix tree. |
2488 | if (aff_entry->ae_add == NULL) |
2489 | p = (char_u *)"" ; |
2490 | else |
2491 | p = aff_entry->ae_add; |
2492 | |
2493 | // PFX_FLAGS is a negative number, so that |
2494 | // tree_add_word() knows this is the prefix tree. |
2495 | n = PFX_FLAGS; |
2496 | if (!cur_aff->ah_combine) |
2497 | n |= WFP_NC; |
2498 | if (upper) |
2499 | n |= WFP_UP; |
2500 | if (aff_entry->ae_comppermit) |
2501 | n |= WFP_COMPPERMIT; |
2502 | if (aff_entry->ae_compforbid) |
2503 | n |= WFP_COMPFORBID; |
2504 | tree_add_word(spin, p, spin->si_prefroot, n, |
2505 | idx, cur_aff->ah_newID); |
2506 | did_postpone_prefix = true; |
2507 | } |
2508 | |
2509 | // Didn't actually use ah_newID, backup si_newprefID. |
2510 | if (aff_todo == 0 && !did_postpone_prefix) { |
2511 | --spin->si_newprefID; |
2512 | cur_aff->ah_newID = 0; |
2513 | } |
2514 | } |
2515 | } |
2516 | } else if (is_aff_rule(items, itemcnt, "FOL" , 2) && fol == NULL) { |
2517 | fol = vim_strsave(items[1]); |
2518 | } else if (is_aff_rule(items, itemcnt, "LOW" , 2) && low == NULL) { |
2519 | low = vim_strsave(items[1]); |
2520 | } else if (is_aff_rule(items, itemcnt, "UPP" , 2) && upp == NULL) { |
2521 | upp = vim_strsave(items[1]); |
2522 | } else if (is_aff_rule(items, itemcnt, "REP" , 2) |
2523 | || is_aff_rule(items, itemcnt, "REPSAL" , 2)) { |
2524 | /* Ignore REP/REPSAL count */; |
2525 | if (!isdigit(*items[1])) |
2526 | smsg(_("Expected REP(SAL) count in %s line %d" ), |
2527 | fname, lnum); |
2528 | } else if ((STRCMP(items[0], "REP" ) == 0 |
2529 | || STRCMP(items[0], "REPSAL" ) == 0) |
2530 | && itemcnt >= 3) { |
2531 | // REP/REPSAL item |
2532 | // Myspell ignores extra arguments, we require it starts with |
2533 | // # to detect mistakes. |
2534 | if (itemcnt > 3 && items[3][0] != '#') |
2535 | smsg(_(e_afftrailing), fname, lnum, items[3]); |
2536 | if (items[0][3] == 'S' ? do_repsal : do_rep) { |
2537 | // Replace underscore with space (can't include a space |
2538 | // directly). |
2539 | for (p = items[1]; *p != NUL; MB_PTR_ADV(p)) { |
2540 | if (*p == '_') { |
2541 | *p = ' '; |
2542 | } |
2543 | } |
2544 | for (p = items[2]; *p != NUL; MB_PTR_ADV(p)) { |
2545 | if (*p == '_') { |
2546 | *p = ' '; |
2547 | } |
2548 | } |
2549 | add_fromto(spin, items[0][3] == 'S' |
2550 | ? &spin->si_repsal |
2551 | : &spin->si_rep, items[1], items[2]); |
2552 | } |
2553 | } else if (is_aff_rule(items, itemcnt, "MAP" , 2)) { |
2554 | // MAP item or count |
2555 | if (!found_map) { |
2556 | // First line contains the count. |
2557 | found_map = true; |
2558 | if (!isdigit(*items[1])) |
2559 | smsg(_("Expected MAP count in %s line %d" ), |
2560 | fname, lnum); |
2561 | } else if (do_mapline) { |
2562 | int c; |
2563 | |
2564 | // Check that every character appears only once. |
2565 | for (p = items[1]; *p != NUL; ) { |
2566 | c = mb_ptr2char_adv((const char_u **)&p); |
2567 | if ((!GA_EMPTY(&spin->si_map) |
2568 | && vim_strchr(spin->si_map.ga_data, c) |
2569 | != NULL) |
2570 | || vim_strchr(p, c) != NULL) { |
2571 | smsg(_("Duplicate character in MAP in %s line %d" ), |
2572 | fname, lnum); |
2573 | } |
2574 | } |
2575 | |
2576 | // We simply concatenate all the MAP strings, separated by |
2577 | // slashes. |
2578 | ga_concat(&spin->si_map, items[1]); |
2579 | ga_append(&spin->si_map, '/'); |
2580 | } |
2581 | } |
2582 | // Accept "SAL from to" and "SAL from to #comment". |
2583 | else if (is_aff_rule(items, itemcnt, "SAL" , 3)) { |
2584 | if (do_sal) { |
2585 | // SAL item (sounds-a-like) |
2586 | // Either one of the known keys or a from-to pair. |
2587 | if (STRCMP(items[1], "followup" ) == 0) |
2588 | spin->si_followup = sal_to_bool(items[2]); |
2589 | else if (STRCMP(items[1], "collapse_result" ) == 0) |
2590 | spin->si_collapse = sal_to_bool(items[2]); |
2591 | else if (STRCMP(items[1], "remove_accents" ) == 0) |
2592 | spin->si_rem_accents = sal_to_bool(items[2]); |
2593 | else |
2594 | // when "to" is "_" it means empty |
2595 | add_fromto(spin, &spin->si_sal, items[1], |
2596 | STRCMP(items[2], "_" ) == 0 ? (char_u *)"" |
2597 | : items[2]); |
2598 | } |
2599 | } else if (is_aff_rule(items, itemcnt, "SOFOFROM" , 2) |
2600 | && sofofrom == NULL) { |
2601 | sofofrom = getroom_save(spin, items[1]); |
2602 | } else if (is_aff_rule(items, itemcnt, "SOFOTO" , 2) |
2603 | && sofoto == NULL) { |
2604 | sofoto = getroom_save(spin, items[1]); |
2605 | } else if (STRCMP(items[0], "COMMON" ) == 0) { |
2606 | int i; |
2607 | |
2608 | for (i = 1; i < itemcnt; ++i) { |
2609 | if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, |
2610 | items[i]))) { |
2611 | p = vim_strsave(items[i]); |
2612 | hash_add(&spin->si_commonwords, p); |
2613 | } |
2614 | } |
2615 | } else |
2616 | smsg(_("Unrecognized or duplicate item in %s line %d: %s" ), |
2617 | fname, lnum, items[0]); |
2618 | } |
2619 | } |
2620 | |
2621 | if (fol != NULL || low != NULL || upp != NULL) { |
2622 | if (spin->si_clear_chartab) { |
2623 | // Clear the char type tables, don't want to use any of the |
2624 | // currently used spell properties. |
2625 | init_spell_chartab(); |
2626 | spin->si_clear_chartab = false; |
2627 | } |
2628 | |
2629 | // Don't write a word table for an ASCII file, so that we don't check |
2630 | // for conflicts with a word table that matches 'encoding'. |
2631 | // Don't write one for utf-8 either, we use utf_*() and |
2632 | // mb_get_class(), the list of chars in the file will be incomplete. |
2633 | if (!spin->si_ascii |
2634 | && !enc_utf8 |
2635 | ) { |
2636 | if (fol == NULL || low == NULL || upp == NULL) |
2637 | smsg(_("Missing FOL/LOW/UPP line in %s" ), fname); |
2638 | else |
2639 | (void)set_spell_chartab(fol, low, upp); |
2640 | } |
2641 | |
2642 | xfree(fol); |
2643 | xfree(low); |
2644 | xfree(upp); |
2645 | } |
2646 | |
2647 | // Use compound specifications of the .aff file for the spell info. |
2648 | if (compmax != 0) { |
2649 | aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX" ); |
2650 | spin->si_compmax = compmax; |
2651 | } |
2652 | |
2653 | if (compminlen != 0) { |
2654 | aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN" ); |
2655 | spin->si_compminlen = compminlen; |
2656 | } |
2657 | |
2658 | if (compsylmax != 0) { |
2659 | if (syllable == NULL) |
2660 | smsg(_("COMPOUNDSYLMAX used without SYLLABLE" )); |
2661 | aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX" ); |
2662 | spin->si_compsylmax = compsylmax; |
2663 | } |
2664 | |
2665 | if (compoptions != 0) { |
2666 | aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options" ); |
2667 | spin->si_compoptions |= compoptions; |
2668 | } |
2669 | |
2670 | if (compflags != NULL) |
2671 | process_compflags(spin, aff, compflags); |
2672 | |
2673 | // Check that we didn't use too many renumbered flags. |
2674 | if (spin->si_newcompID < spin->si_newprefID) { |
2675 | if (spin->si_newcompID == 127 || spin->si_newcompID == 255) |
2676 | MSG(_("Too many postponed prefixes" )); |
2677 | else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) |
2678 | MSG(_("Too many compound flags" )); |
2679 | else |
2680 | MSG(_("Too many postponed prefixes and/or compound flags" )); |
2681 | } |
2682 | |
2683 | if (syllable != NULL) { |
2684 | aff_check_string(spin->si_syllable, syllable, "SYLLABLE" ); |
2685 | spin->si_syllable = syllable; |
2686 | } |
2687 | |
2688 | if (sofofrom != NULL || sofoto != NULL) { |
2689 | if (sofofrom == NULL || sofoto == NULL) |
2690 | smsg(_("Missing SOFO%s line in %s" ), |
2691 | sofofrom == NULL ? "FROM" : "TO" , fname); |
2692 | else if (!GA_EMPTY(&spin->si_sal)) |
2693 | smsg(_("Both SAL and SOFO lines in %s" ), fname); |
2694 | else { |
2695 | aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM" ); |
2696 | aff_check_string(spin->si_sofoto, sofoto, "SOFOTO" ); |
2697 | spin->si_sofofr = sofofrom; |
2698 | spin->si_sofoto = sofoto; |
2699 | } |
2700 | } |
2701 | |
2702 | if (midword != NULL) { |
2703 | aff_check_string(spin->si_midword, midword, "MIDWORD" ); |
2704 | spin->si_midword = midword; |
2705 | } |
2706 | |
2707 | xfree(pc); |
2708 | fclose(fd); |
2709 | return aff; |
2710 | } |
2711 | |
2712 | // Returns true when items[0] equals "rulename", there are "mincount" items or |
2713 | // a comment is following after item "mincount". |
2714 | static bool is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount) |
2715 | { |
2716 | return STRCMP(items[0], rulename) == 0 |
2717 | && (itemcnt == mincount |
2718 | || (itemcnt > mincount && items[mincount][0] == '#')); |
2719 | } |
2720 | |
2721 | // For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from |
2722 | // ae_flags to ae_comppermit and ae_compforbid. |
2723 | static void aff_process_flags(afffile_T *affile, affentry_T *entry) |
2724 | { |
2725 | char_u *p; |
2726 | char_u *prevp; |
2727 | unsigned flag; |
2728 | |
2729 | if (entry->ae_flags != NULL |
2730 | && (affile->af_compforbid != 0 || affile->af_comppermit != 0)) { |
2731 | for (p = entry->ae_flags; *p != NUL; ) { |
2732 | prevp = p; |
2733 | flag = get_affitem(affile->af_flagtype, &p); |
2734 | if (flag == affile->af_comppermit || flag == affile->af_compforbid) { |
2735 | STRMOVE(prevp, p); |
2736 | p = prevp; |
2737 | if (flag == affile->af_comppermit) |
2738 | entry->ae_comppermit = true; |
2739 | else |
2740 | entry->ae_compforbid = true; |
2741 | } |
2742 | if (affile->af_flagtype == AFT_NUM && *p == ',') |
2743 | ++p; |
2744 | } |
2745 | if (*entry->ae_flags == NUL) |
2746 | entry->ae_flags = NULL; // nothing left |
2747 | } |
2748 | } |
2749 | |
2750 | // Returns true if "s" is the name of an info item in the affix file. |
2751 | static bool spell_info_item(char_u *s) |
2752 | { |
2753 | return STRCMP(s, "NAME" ) == 0 |
2754 | || STRCMP(s, "HOME" ) == 0 |
2755 | || STRCMP(s, "VERSION" ) == 0 |
2756 | || STRCMP(s, "AUTHOR" ) == 0 |
2757 | || STRCMP(s, "EMAIL" ) == 0 |
2758 | || STRCMP(s, "COPYRIGHT" ) == 0; |
2759 | } |
2760 | |
2761 | // Turn an affix flag name into a number, according to the FLAG type. |
2762 | // returns zero for failure. |
2763 | static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum) |
2764 | { |
2765 | unsigned res; |
2766 | char_u *p = item; |
2767 | |
2768 | res = get_affitem(flagtype, &p); |
2769 | if (res == 0) { |
2770 | if (flagtype == AFT_NUM) |
2771 | smsg(_("Flag is not a number in %s line %d: %s" ), |
2772 | fname, lnum, item); |
2773 | else |
2774 | smsg(_("Illegal flag in %s line %d: %s" ), |
2775 | fname, lnum, item); |
2776 | } |
2777 | if (*p != NUL) { |
2778 | smsg(_(e_affname), fname, lnum, item); |
2779 | return 0; |
2780 | } |
2781 | |
2782 | return res; |
2783 | } |
2784 | |
2785 | // Get one affix name from "*pp" and advance the pointer. |
2786 | // Returns zero for an error, still advances the pointer then. |
2787 | static unsigned get_affitem(int flagtype, char_u **pp) |
2788 | { |
2789 | int res; |
2790 | |
2791 | if (flagtype == AFT_NUM) { |
2792 | if (!ascii_isdigit(**pp)) { |
2793 | ++*pp; // always advance, avoid getting stuck |
2794 | return 0; |
2795 | } |
2796 | res = getdigits_int(pp, true, 0); |
2797 | } else { |
2798 | res = mb_ptr2char_adv((const char_u **)pp); |
2799 | if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG |
2800 | && res >= 'A' && res <= 'Z')) { |
2801 | if (**pp == NUL) |
2802 | return 0; |
2803 | res = mb_ptr2char_adv((const char_u **)pp) + (res << 16); |
2804 | } |
2805 | } |
2806 | return res; |
2807 | } |
2808 | |
2809 | // Process the "compflags" string used in an affix file and append it to |
2810 | // spin->si_compflags. |
2811 | // The processing involves changing the affix names to ID numbers, so that |
2812 | // they fit in one byte. |
2813 | static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags) |
2814 | { |
2815 | char_u *p; |
2816 | char_u *prevp; |
2817 | unsigned flag; |
2818 | compitem_T *ci; |
2819 | int id; |
2820 | int len; |
2821 | char_u *tp; |
2822 | char_u key[AH_KEY_LEN]; |
2823 | hashitem_T *hi; |
2824 | |
2825 | // Make room for the old and the new compflags, concatenated with a / in |
2826 | // between. Processing it makes it shorter, but we don't know by how |
2827 | // much, thus allocate the maximum. |
2828 | len = (int)STRLEN(compflags) + 1; |
2829 | if (spin->si_compflags != NULL) |
2830 | len += (int)STRLEN(spin->si_compflags) + 1; |
2831 | p = getroom(spin, len, false); |
2832 | if (spin->si_compflags != NULL) { |
2833 | STRCPY(p, spin->si_compflags); |
2834 | STRCAT(p, "/" ); |
2835 | } |
2836 | spin->si_compflags = p; |
2837 | tp = p + STRLEN(p); |
2838 | |
2839 | for (p = compflags; *p != NUL; ) { |
2840 | if (vim_strchr((char_u *)"/?*+[]" , *p) != NULL) |
2841 | // Copy non-flag characters directly. |
2842 | *tp++ = *p++; |
2843 | else { |
2844 | // First get the flag number, also checks validity. |
2845 | prevp = p; |
2846 | flag = get_affitem(aff->af_flagtype, &p); |
2847 | if (flag != 0) { |
2848 | // Find the flag in the hashtable. If it was used before, use |
2849 | // the existing ID. Otherwise add a new entry. |
2850 | STRLCPY(key, prevp, p - prevp + 1); |
2851 | hi = hash_find(&aff->af_comp, key); |
2852 | if (!HASHITEM_EMPTY(hi)) { |
2853 | id = HI2CI(hi)->ci_newID; |
2854 | } else { |
2855 | ci = getroom(spin, sizeof(compitem_T), true); |
2856 | STRCPY(ci->ci_key, key); |
2857 | ci->ci_flag = flag; |
2858 | // Avoid using a flag ID that has a special meaning in a |
2859 | // regexp (also inside []). |
2860 | do { |
2861 | check_renumber(spin); |
2862 | id = spin->si_newcompID--; |
2863 | } while (vim_strchr((char_u *)"/?*+[]\\-^" , id) != NULL); |
2864 | ci->ci_newID = id; |
2865 | hash_add(&aff->af_comp, ci->ci_key); |
2866 | } |
2867 | *tp++ = id; |
2868 | } |
2869 | if (aff->af_flagtype == AFT_NUM && *p == ',') |
2870 | ++p; |
2871 | } |
2872 | } |
2873 | |
2874 | *tp = NUL; |
2875 | } |
2876 | |
2877 | // Check that the new IDs for postponed affixes and compounding don't overrun |
2878 | // each other. We have almost 255 available, but start at 0-127 to avoid |
2879 | // using two bytes for utf-8. When the 0-127 range is used up go to 128-255. |
2880 | // When that is used up an error message is given. |
2881 | static void check_renumber(spellinfo_T *spin) |
2882 | { |
2883 | if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) { |
2884 | spin->si_newprefID = 127; |
2885 | spin->si_newcompID = 255; |
2886 | } |
2887 | } |
2888 | |
2889 | // Returns true if flag "flag" appears in affix list "afflist". |
2890 | static bool flag_in_afflist(int flagtype, char_u *afflist, unsigned flag) |
2891 | { |
2892 | char_u *p; |
2893 | unsigned n; |
2894 | |
2895 | switch (flagtype) { |
2896 | case AFT_CHAR: |
2897 | return vim_strchr(afflist, flag) != NULL; |
2898 | |
2899 | case AFT_CAPLONG: |
2900 | case AFT_LONG: |
2901 | for (p = afflist; *p != NUL; ) { |
2902 | n = mb_ptr2char_adv((const char_u **)&p); |
2903 | if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) |
2904 | && *p != NUL) { |
2905 | n = mb_ptr2char_adv((const char_u **)&p) + (n << 16); |
2906 | } |
2907 | if (n == flag) { |
2908 | return true; |
2909 | } |
2910 | } |
2911 | break; |
2912 | |
2913 | case AFT_NUM: |
2914 | for (p = afflist; *p != NUL; ) { |
2915 | int digits = getdigits_int(&p, true, 0); |
2916 | assert(digits >= 0); |
2917 | n = (unsigned int)digits; |
2918 | if (n == flag) |
2919 | return true; |
2920 | if (*p != NUL) // skip over comma |
2921 | ++p; |
2922 | } |
2923 | break; |
2924 | } |
2925 | return false; |
2926 | } |
2927 | |
2928 | // Give a warning when "spinval" and "affval" numbers are set and not the same. |
2929 | static void aff_check_number(int spinval, int affval, char *name) |
2930 | { |
2931 | if (spinval != 0 && spinval != affval) |
2932 | smsg(_("%s value differs from what is used in another .aff file" ), |
2933 | name); |
2934 | } |
2935 | |
2936 | // Give a warning when "spinval" and "affval" strings are set and not the same. |
2937 | static void aff_check_string(char_u *spinval, char_u *affval, char *name) |
2938 | { |
2939 | if (spinval != NULL && STRCMP(spinval, affval) != 0) |
2940 | smsg(_("%s value differs from what is used in another .aff file" ), |
2941 | name); |
2942 | } |
2943 | |
2944 | // Returns true if strings "s1" and "s2" are equal. Also consider both being |
2945 | // NULL as equal. |
2946 | static bool str_equal(char_u *s1, char_u *s2) |
2947 | { |
2948 | if (s1 == NULL || s2 == NULL) |
2949 | return s1 == s2; |
2950 | return STRCMP(s1, s2) == 0; |
2951 | } |
2952 | |
2953 | // Add a from-to item to "gap". Used for REP and SAL items. |
2954 | // They are stored case-folded. |
2955 | static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to) |
2956 | { |
2957 | char_u word[MAXWLEN]; |
2958 | |
2959 | fromto_T *ftp = GA_APPEND_VIA_PTR(fromto_T, gap); |
2960 | (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN); |
2961 | ftp->ft_from = getroom_save(spin, word); |
2962 | (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN); |
2963 | ftp->ft_to = getroom_save(spin, word); |
2964 | } |
2965 | |
2966 | // Converts a boolean argument in a SAL line to true or false; |
2967 | static bool sal_to_bool(char_u *s) |
2968 | { |
2969 | return STRCMP(s, "1" ) == 0 || STRCMP(s, "true" ) == 0; |
2970 | } |
2971 | |
2972 | // Free the structure filled by spell_read_aff(). |
2973 | static void spell_free_aff(afffile_T *aff) |
2974 | { |
2975 | hashtab_T *ht; |
2976 | hashitem_T *hi; |
2977 | int todo; |
2978 | affheader_T *ah; |
2979 | affentry_T *ae; |
2980 | |
2981 | xfree(aff->af_enc); |
2982 | |
2983 | // All this trouble to free the "ae_prog" items... |
2984 | for (ht = &aff->af_pref;; ht = &aff->af_suff) { |
2985 | todo = (int)ht->ht_used; |
2986 | for (hi = ht->ht_array; todo > 0; ++hi) { |
2987 | if (!HASHITEM_EMPTY(hi)) { |
2988 | --todo; |
2989 | ah = HI2AH(hi); |
2990 | for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) |
2991 | vim_regfree(ae->ae_prog); |
2992 | } |
2993 | } |
2994 | if (ht == &aff->af_suff) |
2995 | break; |
2996 | } |
2997 | |
2998 | hash_clear(&aff->af_pref); |
2999 | hash_clear(&aff->af_suff); |
3000 | hash_clear(&aff->af_comp); |
3001 | } |
3002 | |
3003 | // Read dictionary file "fname". |
3004 | // Returns OK or FAIL; |
3005 | static int spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile) |
3006 | { |
3007 | hashtab_T ht; |
3008 | char_u line[MAXLINELEN]; |
3009 | char_u *p; |
3010 | char_u *afflist; |
3011 | char_u store_afflist[MAXWLEN]; |
3012 | int pfxlen; |
3013 | bool need_affix; |
3014 | char_u *dw; |
3015 | char_u *pc; |
3016 | char_u *w; |
3017 | int l; |
3018 | hash_T hash; |
3019 | hashitem_T *hi; |
3020 | FILE *fd; |
3021 | int lnum = 1; |
3022 | int non_ascii = 0; |
3023 | int retval = OK; |
3024 | char_u message[MAXLINELEN + MAXWLEN]; |
3025 | int flags; |
3026 | int duplicate = 0; |
3027 | |
3028 | // Open the file. |
3029 | fd = os_fopen((char *)fname, "r" ); |
3030 | if (fd == NULL) { |
3031 | EMSG2(_(e_notopen), fname); |
3032 | return FAIL; |
3033 | } |
3034 | |
3035 | // The hashtable is only used to detect duplicated words. |
3036 | hash_init(&ht); |
3037 | |
3038 | vim_snprintf((char *)IObuff, IOSIZE, |
3039 | _("Reading dictionary file %s..." ), fname); |
3040 | spell_message(spin, IObuff); |
3041 | |
3042 | // start with a message for the first line |
3043 | spin->si_msg_count = 999999; |
3044 | |
3045 | // Read and ignore the first line: word count. |
3046 | (void)vim_fgets(line, MAXLINELEN, fd); |
3047 | if (!ascii_isdigit(*skipwhite(line))) |
3048 | EMSG2(_("E760: No word count in %s" ), fname); |
3049 | |
3050 | // Read all the lines in the file one by one. |
3051 | // The words are converted to 'encoding' here, before being added to |
3052 | // the hashtable. |
3053 | while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) { |
3054 | line_breakcheck(); |
3055 | ++lnum; |
3056 | if (line[0] == '#' || line[0] == '/') |
3057 | continue; // comment line |
3058 | |
3059 | // Remove CR, LF and white space from the end. White space halfway through |
3060 | // the word is kept to allow multi-word terms like "et al.". |
3061 | l = (int)STRLEN(line); |
3062 | while (l > 0 && line[l - 1] <= ' ') |
3063 | --l; |
3064 | if (l == 0) |
3065 | continue; // empty line |
3066 | line[l] = NUL; |
3067 | |
3068 | // Convert from "SET" to 'encoding' when needed. |
3069 | if (spin->si_conv.vc_type != CONV_NONE) { |
3070 | pc = string_convert(&spin->si_conv, line, NULL); |
3071 | if (pc == NULL) { |
3072 | smsg(_("Conversion failure for word in %s line %d: %s" ), |
3073 | fname, lnum, line); |
3074 | continue; |
3075 | } |
3076 | w = pc; |
3077 | } else { |
3078 | pc = NULL; |
3079 | w = line; |
3080 | } |
3081 | |
3082 | // Truncate the word at the "/", set "afflist" to what follows. |
3083 | // Replace "\/" by "/" and "\\" by "\". |
3084 | afflist = NULL; |
3085 | for (p = w; *p != NUL; MB_PTR_ADV(p)) { |
3086 | if (*p == '\\' && (p[1] == '\\' || p[1] == '/')) { |
3087 | STRMOVE(p, p + 1); |
3088 | } else if (*p == '/') { |
3089 | *p = NUL; |
3090 | afflist = p + 1; |
3091 | break; |
3092 | } |
3093 | } |
3094 | |
3095 | // Skip non-ASCII words when "spin->si_ascii" is true. |
3096 | if (spin->si_ascii && has_non_ascii(w)) { |
3097 | ++non_ascii; |
3098 | xfree(pc); |
3099 | continue; |
3100 | } |
3101 | |
3102 | // This takes time, print a message every 10000 words. |
3103 | if (spin->si_verbose && spin->si_msg_count > 10000) { |
3104 | spin->si_msg_count = 0; |
3105 | vim_snprintf((char *)message, sizeof(message), |
3106 | _("line %6d, word %6ld - %s" ), |
3107 | lnum, spin->si_foldwcount + spin->si_keepwcount, w); |
3108 | msg_start(); |
3109 | msg_puts_long_attr(message, 0); |
3110 | msg_clr_eos(); |
3111 | msg_didout = FALSE; |
3112 | msg_col = 0; |
3113 | ui_flush(); |
3114 | } |
3115 | |
3116 | // Store the word in the hashtable to be able to find duplicates. |
3117 | dw = getroom_save(spin, w); |
3118 | if (dw == NULL) { |
3119 | retval = FAIL; |
3120 | xfree(pc); |
3121 | break; |
3122 | } |
3123 | |
3124 | hash = hash_hash(dw); |
3125 | hi = hash_lookup(&ht, (const char *)dw, STRLEN(dw), hash); |
3126 | if (!HASHITEM_EMPTY(hi)) { |
3127 | if (p_verbose > 0) |
3128 | smsg(_("Duplicate word in %s line %d: %s" ), |
3129 | fname, lnum, dw); |
3130 | else if (duplicate == 0) |
3131 | smsg(_("First duplicate word in %s line %d: %s" ), |
3132 | fname, lnum, dw); |
3133 | ++duplicate; |
3134 | } else |
3135 | hash_add_item(&ht, hi, dw, hash); |
3136 | |
3137 | flags = 0; |
3138 | store_afflist[0] = NUL; |
3139 | pfxlen = 0; |
3140 | need_affix = false; |
3141 | if (afflist != NULL) { |
3142 | // Extract flags from the affix list. |
3143 | flags |= get_affix_flags(affile, afflist); |
3144 | |
3145 | if (affile->af_needaffix != 0 && flag_in_afflist( |
3146 | affile->af_flagtype, afflist, affile->af_needaffix)) |
3147 | need_affix = true; |
3148 | |
3149 | if (affile->af_pfxpostpone) |
3150 | // Need to store the list of prefix IDs with the word. |
3151 | pfxlen = get_pfxlist(affile, afflist, store_afflist); |
3152 | |
3153 | if (spin->si_compflags != NULL) |
3154 | // Need to store the list of compound flags with the word. |
3155 | // Concatenate them to the list of prefix IDs. |
3156 | get_compflags(affile, afflist, store_afflist + pfxlen); |
3157 | } |
3158 | |
3159 | // Add the word to the word tree(s). |
3160 | if (store_word(spin, dw, flags, spin->si_region, |
3161 | store_afflist, need_affix) == FAIL) |
3162 | retval = FAIL; |
3163 | |
3164 | if (afflist != NULL) { |
3165 | // Find all matching suffixes and add the resulting words. |
3166 | // Additionally do matching prefixes that combine. |
3167 | if (store_aff_word(spin, dw, afflist, affile, |
3168 | &affile->af_suff, &affile->af_pref, |
3169 | CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) |
3170 | retval = FAIL; |
3171 | |
3172 | // Find all matching prefixes and add the resulting words. |
3173 | if (store_aff_word(spin, dw, afflist, affile, |
3174 | &affile->af_pref, NULL, |
3175 | CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) |
3176 | retval = FAIL; |
3177 | } |
3178 | |
3179 | xfree(pc); |
3180 | } |
3181 | |
3182 | if (duplicate > 0) |
3183 | smsg(_("%d duplicate word(s) in %s" ), duplicate, fname); |
3184 | if (spin->si_ascii && non_ascii > 0) |
3185 | smsg(_("Ignored %d word(s) with non-ASCII characters in %s" ), |
3186 | non_ascii, fname); |
3187 | hash_clear(&ht); |
3188 | |
3189 | fclose(fd); |
3190 | return retval; |
3191 | } |
3192 | |
3193 | // Check for affix flags in "afflist" that are turned into word flags. |
3194 | // Return WF_ flags. |
3195 | static int get_affix_flags(afffile_T *affile, char_u *afflist) |
3196 | { |
3197 | int flags = 0; |
3198 | |
3199 | if (affile->af_keepcase != 0 && flag_in_afflist( |
3200 | affile->af_flagtype, afflist, affile->af_keepcase)) |
3201 | flags |= WF_KEEPCAP | WF_FIXCAP; |
3202 | if (affile->af_rare != 0 && flag_in_afflist( |
3203 | affile->af_flagtype, afflist, affile->af_rare)) |
3204 | flags |= WF_RARE; |
3205 | if (affile->af_bad != 0 && flag_in_afflist( |
3206 | affile->af_flagtype, afflist, affile->af_bad)) |
3207 | flags |= WF_BANNED; |
3208 | if (affile->af_needcomp != 0 && flag_in_afflist( |
3209 | affile->af_flagtype, afflist, affile->af_needcomp)) |
3210 | flags |= WF_NEEDCOMP; |
3211 | if (affile->af_comproot != 0 && flag_in_afflist( |
3212 | affile->af_flagtype, afflist, affile->af_comproot)) |
3213 | flags |= WF_COMPROOT; |
3214 | if (affile->af_nosuggest != 0 && flag_in_afflist( |
3215 | affile->af_flagtype, afflist, affile->af_nosuggest)) |
3216 | flags |= WF_NOSUGGEST; |
3217 | return flags; |
3218 | } |
3219 | |
3220 | // Get the list of prefix IDs from the affix list "afflist". |
3221 | // Used for PFXPOSTPONE. |
3222 | // Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL |
3223 | // and return the number of affixes. |
3224 | static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist) |
3225 | { |
3226 | char_u *p; |
3227 | char_u *prevp; |
3228 | int cnt = 0; |
3229 | int id; |
3230 | char_u key[AH_KEY_LEN]; |
3231 | hashitem_T *hi; |
3232 | |
3233 | for (p = afflist; *p != NUL; ) { |
3234 | prevp = p; |
3235 | if (get_affitem(affile->af_flagtype, &p) != 0) { |
3236 | // A flag is a postponed prefix flag if it appears in "af_pref" |
3237 | // and its ID is not zero. |
3238 | STRLCPY(key, prevp, p - prevp + 1); |
3239 | hi = hash_find(&affile->af_pref, key); |
3240 | if (!HASHITEM_EMPTY(hi)) { |
3241 | id = HI2AH(hi)->ah_newID; |
3242 | if (id != 0) |
3243 | store_afflist[cnt++] = id; |
3244 | } |
3245 | } |
3246 | if (affile->af_flagtype == AFT_NUM && *p == ',') |
3247 | ++p; |
3248 | } |
3249 | |
3250 | store_afflist[cnt] = NUL; |
3251 | return cnt; |
3252 | } |
3253 | |
3254 | // Get the list of compound IDs from the affix list "afflist" that are used |
3255 | // for compound words. |
3256 | // Puts the flags in "store_afflist[]". |
3257 | static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist) |
3258 | { |
3259 | char_u *p; |
3260 | char_u *prevp; |
3261 | int cnt = 0; |
3262 | char_u key[AH_KEY_LEN]; |
3263 | hashitem_T *hi; |
3264 | |
3265 | for (p = afflist; *p != NUL; ) { |
3266 | prevp = p; |
3267 | if (get_affitem(affile->af_flagtype, &p) != 0) { |
3268 | // A flag is a compound flag if it appears in "af_comp". |
3269 | STRLCPY(key, prevp, p - prevp + 1); |
3270 | hi = hash_find(&affile->af_comp, key); |
3271 | if (!HASHITEM_EMPTY(hi)) |
3272 | store_afflist[cnt++] = HI2CI(hi)->ci_newID; |
3273 | } |
3274 | if (affile->af_flagtype == AFT_NUM && *p == ',') |
3275 | ++p; |
3276 | } |
3277 | |
3278 | store_afflist[cnt] = NUL; |
3279 | } |
3280 | |
3281 | // Apply affixes to a word and store the resulting words. |
3282 | // "ht" is the hashtable with affentry_T that need to be applied, either |
3283 | // prefixes or suffixes. |
3284 | // "xht", when not NULL, is the prefix hashtable, to be used additionally on |
3285 | // the resulting words for combining affixes. |
3286 | // |
3287 | // Returns FAIL when out of memory. |
3288 | static int |
3289 | store_aff_word ( |
3290 | spellinfo_T *spin, // spell info |
3291 | char_u *word, // basic word start |
3292 | char_u *afflist, // list of names of supported affixes |
3293 | afffile_T *affile, |
3294 | hashtab_T *ht, |
3295 | hashtab_T *xht, |
3296 | int condit, // CONDIT_SUF et al. |
3297 | int flags, // flags for the word |
3298 | char_u *pfxlist, // list of prefix IDs |
3299 | int pfxlen // nr of flags in "pfxlist" for prefixes, rest |
3300 | // is compound flags |
3301 | ) |
3302 | { |
3303 | int todo; |
3304 | hashitem_T *hi; |
3305 | affheader_T *ah; |
3306 | affentry_T *ae; |
3307 | char_u newword[MAXWLEN]; |
3308 | int retval = OK; |
3309 | int i, j; |
3310 | char_u *p; |
3311 | int use_flags; |
3312 | char_u *use_pfxlist; |
3313 | int use_pfxlen; |
3314 | bool need_affix; |
3315 | char_u store_afflist[MAXWLEN]; |
3316 | char_u pfx_pfxlist[MAXWLEN]; |
3317 | size_t wordlen = STRLEN(word); |
3318 | int use_condit; |
3319 | |
3320 | todo = (int)ht->ht_used; |
3321 | for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) { |
3322 | if (!HASHITEM_EMPTY(hi)) { |
3323 | --todo; |
3324 | ah = HI2AH(hi); |
3325 | |
3326 | // Check that the affix combines, if required, and that the word |
3327 | // supports this affix. |
3328 | if (((condit & CONDIT_COMB) == 0 || ah->ah_combine) |
3329 | && flag_in_afflist(affile->af_flagtype, afflist, |
3330 | ah->ah_flag)) { |
3331 | // Loop over all affix entries with this name. |
3332 | for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) { |
3333 | // Check the condition. It's not logical to match case |
3334 | // here, but it is required for compatibility with |
3335 | // Myspell. |
3336 | // Another requirement from Myspell is that the chop |
3337 | // string is shorter than the word itself. |
3338 | // For prefixes, when "PFXPOSTPONE" was used, only do |
3339 | // prefixes with a chop string and/or flags. |
3340 | // When a previously added affix had CIRCUMFIX this one |
3341 | // must have it too, if it had not then this one must not |
3342 | // have one either. |
3343 | if ((xht != NULL || !affile->af_pfxpostpone |
3344 | || ae->ae_chop != NULL |
3345 | || ae->ae_flags != NULL) |
3346 | && (ae->ae_chop == NULL |
3347 | || STRLEN(ae->ae_chop) < wordlen) |
3348 | && (ae->ae_prog == NULL |
3349 | || vim_regexec_prog(&ae->ae_prog, false, word, (colnr_T)0)) |
3350 | && (((condit & CONDIT_CFIX) == 0) |
3351 | == ((condit & CONDIT_AFF) == 0 |
3352 | || ae->ae_flags == NULL |
3353 | || !flag_in_afflist(affile->af_flagtype, |
3354 | ae->ae_flags, affile->af_circumfix)))) { |
3355 | // Match. Remove the chop and add the affix. |
3356 | if (xht == NULL) { |
3357 | // prefix: chop/add at the start of the word |
3358 | if (ae->ae_add == NULL) { |
3359 | *newword = NUL; |
3360 | } else { |
3361 | STRLCPY(newword, ae->ae_add, MAXWLEN); |
3362 | } |
3363 | p = word; |
3364 | if (ae->ae_chop != NULL) { |
3365 | // Skip chop string. |
3366 | if (has_mbyte) { |
3367 | i = mb_charlen(ae->ae_chop); |
3368 | for (; i > 0; i--) { |
3369 | MB_PTR_ADV(p); |
3370 | } |
3371 | } else { |
3372 | p += STRLEN(ae->ae_chop); |
3373 | } |
3374 | } |
3375 | STRCAT(newword, p); |
3376 | } else { |
3377 | // suffix: chop/add at the end of the word |
3378 | STRLCPY(newword, word, MAXWLEN); |
3379 | if (ae->ae_chop != NULL) { |
3380 | // Remove chop string. |
3381 | p = newword + STRLEN(newword); |
3382 | i = (int)MB_CHARLEN(ae->ae_chop); |
3383 | for (; i > 0; i--) { |
3384 | MB_PTR_BACK(newword, p); |
3385 | } |
3386 | *p = NUL; |
3387 | } |
3388 | if (ae->ae_add != NULL) |
3389 | STRCAT(newword, ae->ae_add); |
3390 | } |
3391 | |
3392 | use_flags = flags; |
3393 | use_pfxlist = pfxlist; |
3394 | use_pfxlen = pfxlen; |
3395 | need_affix = false; |
3396 | use_condit = condit | CONDIT_COMB | CONDIT_AFF; |
3397 | if (ae->ae_flags != NULL) { |
3398 | // Extract flags from the affix list. |
3399 | use_flags |= get_affix_flags(affile, ae->ae_flags); |
3400 | |
3401 | if (affile->af_needaffix != 0 && flag_in_afflist( |
3402 | affile->af_flagtype, ae->ae_flags, |
3403 | affile->af_needaffix)) |
3404 | need_affix = true; |
3405 | |
3406 | // When there is a CIRCUMFIX flag the other affix |
3407 | // must also have it and we don't add the word |
3408 | // with one affix. |
3409 | if (affile->af_circumfix != 0 && flag_in_afflist( |
3410 | affile->af_flagtype, ae->ae_flags, |
3411 | affile->af_circumfix)) { |
3412 | use_condit |= CONDIT_CFIX; |
3413 | if ((condit & CONDIT_CFIX) == 0) |
3414 | need_affix = true; |
3415 | } |
3416 | |
3417 | if (affile->af_pfxpostpone |
3418 | || spin->si_compflags != NULL) { |
3419 | if (affile->af_pfxpostpone) |
3420 | // Get prefix IDS from the affix list. |
3421 | use_pfxlen = get_pfxlist(affile, |
3422 | ae->ae_flags, store_afflist); |
3423 | else |
3424 | use_pfxlen = 0; |
3425 | use_pfxlist = store_afflist; |
3426 | |
3427 | // Combine the prefix IDs. Avoid adding the |
3428 | // same ID twice. |
3429 | for (i = 0; i < pfxlen; ++i) { |
3430 | for (j = 0; j < use_pfxlen; ++j) |
3431 | if (pfxlist[i] == use_pfxlist[j]) |
3432 | break; |
3433 | if (j == use_pfxlen) |
3434 | use_pfxlist[use_pfxlen++] = pfxlist[i]; |
3435 | } |
3436 | |
3437 | if (spin->si_compflags != NULL) |
3438 | // Get compound IDS from the affix list. |
3439 | get_compflags(affile, ae->ae_flags, |
3440 | use_pfxlist + use_pfxlen); |
3441 | else |
3442 | use_pfxlist[use_pfxlen] = NUL; |
3443 | |
3444 | // Combine the list of compound flags. |
3445 | // Concatenate them to the prefix IDs list. |
3446 | // Avoid adding the same ID twice. |
3447 | for (i = pfxlen; pfxlist[i] != NUL; ++i) { |
3448 | for (j = use_pfxlen; |
3449 | use_pfxlist[j] != NUL; ++j) |
3450 | if (pfxlist[i] == use_pfxlist[j]) |
3451 | break; |
3452 | if (use_pfxlist[j] == NUL) { |
3453 | use_pfxlist[j++] = pfxlist[i]; |
3454 | use_pfxlist[j] = NUL; |
3455 | } |
3456 | } |
3457 | } |
3458 | } |
3459 | |
3460 | // Obey a "COMPOUNDFORBIDFLAG" of the affix: don't |
3461 | // use the compound flags. |
3462 | if (use_pfxlist != NULL && ae->ae_compforbid) { |
3463 | STRLCPY(pfx_pfxlist, use_pfxlist, use_pfxlen + 1); |
3464 | use_pfxlist = pfx_pfxlist; |
3465 | } |
3466 | |
3467 | // When there are postponed prefixes... |
3468 | if (spin->si_prefroot != NULL |
3469 | && spin->si_prefroot->wn_sibling != NULL) { |
3470 | // ... add a flag to indicate an affix was used. |
3471 | use_flags |= WF_HAS_AFF; |
3472 | |
3473 | // ... don't use a prefix list if combining |
3474 | // affixes is not allowed. But do use the |
3475 | // compound flags after them. |
3476 | if (!ah->ah_combine && use_pfxlist != NULL) |
3477 | use_pfxlist += use_pfxlen; |
3478 | } |
3479 | |
3480 | // When compounding is supported and there is no |
3481 | // "COMPOUNDPERMITFLAG" then forbid compounding on the |
3482 | // side where the affix is applied. |
3483 | if (spin->si_compflags != NULL && !ae->ae_comppermit) { |
3484 | if (xht != NULL) |
3485 | use_flags |= WF_NOCOMPAFT; |
3486 | else |
3487 | use_flags |= WF_NOCOMPBEF; |
3488 | } |
3489 | |
3490 | // Store the modified word. |
3491 | if (store_word(spin, newword, use_flags, |
3492 | spin->si_region, use_pfxlist, |
3493 | need_affix) == FAIL) |
3494 | retval = FAIL; |
3495 | |
3496 | // When added a prefix or a first suffix and the affix |
3497 | // has flags may add a(nother) suffix. RECURSIVE! |
3498 | if ((condit & CONDIT_SUF) && ae->ae_flags != NULL) |
3499 | if (store_aff_word(spin, newword, ae->ae_flags, |
3500 | affile, &affile->af_suff, xht, |
3501 | use_condit & (xht == NULL |
3502 | ? ~0 : ~CONDIT_SUF), |
3503 | use_flags, use_pfxlist, pfxlen) == FAIL) |
3504 | retval = FAIL; |
3505 | |
3506 | // When added a suffix and combining is allowed also |
3507 | // try adding a prefix additionally. Both for the |
3508 | // word flags and for the affix flags. RECURSIVE! |
3509 | if (xht != NULL && ah->ah_combine) { |
3510 | if (store_aff_word(spin, newword, |
3511 | afflist, affile, |
3512 | xht, NULL, use_condit, |
3513 | use_flags, use_pfxlist, |
3514 | pfxlen) == FAIL |
3515 | || (ae->ae_flags != NULL |
3516 | && store_aff_word(spin, newword, |
3517 | ae->ae_flags, affile, |
3518 | xht, NULL, use_condit, |
3519 | use_flags, use_pfxlist, |
3520 | pfxlen) == FAIL)) |
3521 | retval = FAIL; |
3522 | } |
3523 | } |
3524 | } |
3525 | } |
3526 | } |
3527 | } |
3528 | |
3529 | return retval; |
3530 | } |
3531 | |
3532 | // Read a file with a list of words. |
3533 | static int spell_read_wordfile(spellinfo_T *spin, char_u *fname) |
3534 | { |
3535 | FILE *fd; |
3536 | long lnum = 0; |
3537 | char_u rline[MAXLINELEN]; |
3538 | char_u *line; |
3539 | char_u *pc = NULL; |
3540 | char_u *p; |
3541 | int l; |
3542 | int retval = OK; |
3543 | bool did_word = false; |
3544 | int non_ascii = 0; |
3545 | int flags; |
3546 | int regionmask; |
3547 | |
3548 | // Open the file. |
3549 | fd = os_fopen((char *)fname, "r" ); |
3550 | if (fd == NULL) { |
3551 | EMSG2(_(e_notopen), fname); |
3552 | return FAIL; |
3553 | } |
3554 | |
3555 | vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s..." ), fname); |
3556 | spell_message(spin, IObuff); |
3557 | |
3558 | // Read all the lines in the file one by one. |
3559 | while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) { |
3560 | line_breakcheck(); |
3561 | ++lnum; |
3562 | |
3563 | // Skip comment lines. |
3564 | if (*rline == '#') |
3565 | continue; |
3566 | |
3567 | // Remove CR, LF and white space from the end. |
3568 | l = (int)STRLEN(rline); |
3569 | while (l > 0 && rline[l - 1] <= ' ') |
3570 | --l; |
3571 | if (l == 0) |
3572 | continue; // empty or blank line |
3573 | rline[l] = NUL; |
3574 | |
3575 | // Convert from "/encoding={encoding}" to 'encoding' when needed. |
3576 | xfree(pc); |
3577 | if (spin->si_conv.vc_type != CONV_NONE) { |
3578 | pc = string_convert(&spin->si_conv, rline, NULL); |
3579 | if (pc == NULL) { |
3580 | smsg(_("Conversion failure for word in %s line %ld: %s" ), |
3581 | fname, lnum, rline); |
3582 | continue; |
3583 | } |
3584 | line = pc; |
3585 | } else { |
3586 | pc = NULL; |
3587 | line = rline; |
3588 | } |
3589 | |
3590 | if (*line == '/') { |
3591 | ++line; |
3592 | if (STRNCMP(line, "encoding=" , 9) == 0) { |
3593 | if (spin->si_conv.vc_type != CONV_NONE) { |
3594 | smsg(_("Duplicate /encoding= line ignored in %s line %ld: %s" ), |
3595 | fname, lnum, line - 1); |
3596 | } else if (did_word) { |
3597 | smsg(_("/encoding= line after word ignored in %s line %ld: %s" ), |
3598 | fname, lnum, line - 1); |
3599 | } else { |
3600 | char_u *enc; |
3601 | |
3602 | // Setup for conversion to 'encoding'. |
3603 | line += 9; |
3604 | enc = enc_canonize(line); |
3605 | if (!spin->si_ascii |
3606 | && convert_setup(&spin->si_conv, enc, |
3607 | p_enc) == FAIL) |
3608 | smsg(_("Conversion in %s not supported: from %s to %s" ), |
3609 | fname, line, p_enc); |
3610 | xfree(enc); |
3611 | spin->si_conv.vc_fail = true; |
3612 | } |
3613 | continue; |
3614 | } |
3615 | |
3616 | if (STRNCMP(line, "regions=" , 8) == 0) { |
3617 | if (spin->si_region_count > 1) { |
3618 | smsg(_("Duplicate /regions= line ignored in %s line %ld: %s" ), |
3619 | fname, lnum, line); |
3620 | } else { |
3621 | line += 8; |
3622 | if (STRLEN(line) > MAXREGIONS * 2) { |
3623 | smsg(_("Too many regions in %s line %ld: %s" ), |
3624 | fname, lnum, line); |
3625 | } else { |
3626 | spin->si_region_count = (int)STRLEN(line) / 2; |
3627 | STRCPY(spin->si_region_name, line); |
3628 | |
3629 | // Adjust the mask for a word valid in all regions. |
3630 | spin->si_region = (1 << spin->si_region_count) - 1; |
3631 | } |
3632 | } |
3633 | continue; |
3634 | } |
3635 | |
3636 | smsg(_("/ line ignored in %s line %ld: %s" ), |
3637 | fname, lnum, line - 1); |
3638 | continue; |
3639 | } |
3640 | |
3641 | flags = 0; |
3642 | regionmask = spin->si_region; |
3643 | |
3644 | // Check for flags and region after a slash. |
3645 | p = vim_strchr(line, '/'); |
3646 | if (p != NULL) { |
3647 | *p++ = NUL; |
3648 | while (*p != NUL) { |
3649 | if (*p == '=') // keep-case word |
3650 | flags |= WF_KEEPCAP | WF_FIXCAP; |
3651 | else if (*p == '!') // Bad, bad, wicked word. |
3652 | flags |= WF_BANNED; |
3653 | else if (*p == '?') // Rare word. |
3654 | flags |= WF_RARE; |
3655 | else if (ascii_isdigit(*p)) { // region number(s) |
3656 | if ((flags & WF_REGION) == 0) // first one |
3657 | regionmask = 0; |
3658 | flags |= WF_REGION; |
3659 | |
3660 | l = *p - '0'; |
3661 | if (l == 0 || l > spin->si_region_count) { |
3662 | smsg(_("Invalid region nr in %s line %ld: %s" ), |
3663 | fname, lnum, p); |
3664 | break; |
3665 | } |
3666 | regionmask |= 1 << (l - 1); |
3667 | } else { |
3668 | smsg(_("Unrecognized flags in %s line %ld: %s" ), |
3669 | fname, lnum, p); |
3670 | break; |
3671 | } |
3672 | ++p; |
3673 | } |
3674 | } |
3675 | |
3676 | // Skip non-ASCII words when "spin->si_ascii" is true. |
3677 | if (spin->si_ascii && has_non_ascii(line)) { |
3678 | ++non_ascii; |
3679 | continue; |
3680 | } |
3681 | |
3682 | // Normal word: store it. |
3683 | if (store_word(spin, line, flags, regionmask, NULL, false) == FAIL) { |
3684 | retval = FAIL; |
3685 | break; |
3686 | } |
3687 | did_word = true; |
3688 | } |
3689 | |
3690 | xfree(pc); |
3691 | fclose(fd); |
3692 | |
3693 | if (spin->si_ascii && non_ascii > 0) { |
3694 | vim_snprintf((char *)IObuff, IOSIZE, |
3695 | _("Ignored %d words with non-ASCII characters" ), non_ascii); |
3696 | spell_message(spin, IObuff); |
3697 | } |
3698 | |
3699 | return retval; |
3700 | } |
3701 | |
3702 | /// Get part of an sblock_T, "len" bytes long. |
3703 | /// This avoids calling free() for every little struct we use (and keeping |
3704 | /// track of them). |
3705 | /// The memory is cleared to all zeros. |
3706 | /// |
3707 | /// @param len Length needed (<= SBLOCKSIZE). |
3708 | /// @param align Align for pointer. |
3709 | /// @return Pointer into block data. |
3710 | static void *getroom(spellinfo_T *spin, size_t len, bool align) |
3711 | FUNC_ATTR_NONNULL_RET |
3712 | { |
3713 | char_u *p; |
3714 | sblock_T *bl = spin->si_blocks; |
3715 | |
3716 | assert(len <= SBLOCKSIZE); |
3717 | |
3718 | if (align && bl != NULL) |
3719 | // Round size up for alignment. On some systems structures need to be |
3720 | // aligned to the size of a pointer (e.g., SPARC). |
3721 | bl->sb_used = (bl->sb_used + sizeof(char *) - 1) |
3722 | & ~(sizeof(char *) - 1); |
3723 | |
3724 | if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) { |
3725 | // Allocate a block of memory. It is not freed until much later. |
3726 | bl = xcalloc(1, (sizeof(sblock_T) + SBLOCKSIZE)); |
3727 | bl->sb_next = spin->si_blocks; |
3728 | spin->si_blocks = bl; |
3729 | bl->sb_used = 0; |
3730 | ++spin->si_blocks_cnt; |
3731 | } |
3732 | |
3733 | p = bl->sb_data + bl->sb_used; |
3734 | bl->sb_used += (int)len; |
3735 | |
3736 | return p; |
3737 | } |
3738 | |
3739 | // Make a copy of a string into memory allocated with getroom(). |
3740 | // Returns NULL when out of memory. |
3741 | static char_u *getroom_save(spellinfo_T *spin, char_u *s) |
3742 | { |
3743 | const size_t s_size = STRLEN(s) + 1; |
3744 | return memcpy(getroom(spin, s_size, false), s, s_size); |
3745 | } |
3746 | |
3747 | |
3748 | // Free the list of allocated sblock_T. |
3749 | static void free_blocks(sblock_T *bl) |
3750 | { |
3751 | sblock_T *next; |
3752 | |
3753 | while (bl != NULL) { |
3754 | next = bl->sb_next; |
3755 | xfree(bl); |
3756 | bl = next; |
3757 | } |
3758 | } |
3759 | |
3760 | // Allocate the root of a word tree. |
3761 | // Returns NULL when out of memory. |
3762 | static wordnode_T *wordtree_alloc(spellinfo_T *spin) |
3763 | FUNC_ATTR_NONNULL_RET |
3764 | { |
3765 | return (wordnode_T *)getroom(spin, sizeof(wordnode_T), true); |
3766 | } |
3767 | |
3768 | // Store a word in the tree(s). |
3769 | // Always store it in the case-folded tree. For a keep-case word this is |
3770 | // useful when the word can also be used with all caps (no WF_FIXCAP flag) and |
3771 | // used to find suggestions. |
3772 | // For a keep-case word also store it in the keep-case tree. |
3773 | // When "pfxlist" is not NULL store the word for each postponed prefix ID and |
3774 | // compound flag. |
3775 | static int |
3776 | store_word ( |
3777 | spellinfo_T *spin, |
3778 | char_u *word, |
3779 | int flags, // extra flags, WF_BANNED |
3780 | int region, // supported region(s) |
3781 | char_u *pfxlist, // list of prefix IDs or NULL |
3782 | bool need_affix // only store word with affix ID |
3783 | ) |
3784 | { |
3785 | int len = (int)STRLEN(word); |
3786 | int ct = captype(word, word + len); |
3787 | char_u foldword[MAXWLEN]; |
3788 | int res = OK; |
3789 | char_u *p; |
3790 | |
3791 | (void)spell_casefold(word, len, foldword, MAXWLEN); |
3792 | for (p = pfxlist; res == OK; ++p) { |
3793 | if (!need_affix || (p != NULL && *p != NUL)) |
3794 | res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, |
3795 | region, p == NULL ? 0 : *p); |
3796 | if (p == NULL || *p == NUL) |
3797 | break; |
3798 | } |
3799 | ++spin->si_foldwcount; |
3800 | |
3801 | if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) { |
3802 | for (p = pfxlist; res == OK; ++p) { |
3803 | if (!need_affix || (p != NULL && *p != NUL)) |
3804 | res = tree_add_word(spin, word, spin->si_keeproot, flags, |
3805 | region, p == NULL ? 0 : *p); |
3806 | if (p == NULL || *p == NUL) |
3807 | break; |
3808 | } |
3809 | ++spin->si_keepwcount; |
3810 | } |
3811 | return res; |
3812 | } |
3813 | |
3814 | // Add word "word" to a word tree at "root". |
3815 | // When "flags" < 0 we are adding to the prefix tree where "flags" is used for |
3816 | // "rare" and "region" is the condition nr. |
3817 | // Returns FAIL when out of memory. |
3818 | static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *root, int flags, int region, int affixID) |
3819 | { |
3820 | wordnode_T *node = root; |
3821 | wordnode_T *np; |
3822 | wordnode_T *copyp, **copyprev; |
3823 | wordnode_T **prev = NULL; |
3824 | int i; |
3825 | |
3826 | // Add each byte of the word to the tree, including the NUL at the end. |
3827 | for (i = 0;; ++i) { |
3828 | // When there is more than one reference to this node we need to make |
3829 | // a copy, so that we can modify it. Copy the whole list of siblings |
3830 | // (we don't optimize for a partly shared list of siblings). |
3831 | if (node != NULL && node->wn_refs > 1) { |
3832 | --node->wn_refs; |
3833 | copyprev = prev; |
3834 | for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) { |
3835 | // Allocate a new node and copy the info. |
3836 | np = get_wordnode(spin); |
3837 | if (np == NULL) |
3838 | return FAIL; |
3839 | np->wn_child = copyp->wn_child; |
3840 | if (np->wn_child != NULL) |
3841 | ++np->wn_child->wn_refs; // child gets extra ref |
3842 | np->wn_byte = copyp->wn_byte; |
3843 | if (np->wn_byte == NUL) { |
3844 | np->wn_flags = copyp->wn_flags; |
3845 | np->wn_region = copyp->wn_region; |
3846 | np->wn_affixID = copyp->wn_affixID; |
3847 | } |
3848 | |
3849 | // Link the new node in the list, there will be one ref. |
3850 | np->wn_refs = 1; |
3851 | if (copyprev != NULL) |
3852 | *copyprev = np; |
3853 | copyprev = &np->wn_sibling; |
3854 | |
3855 | // Let "node" point to the head of the copied list. |
3856 | if (copyp == node) |
3857 | node = np; |
3858 | } |
3859 | } |
3860 | |
3861 | // Look for the sibling that has the same character. They are sorted |
3862 | // on byte value, thus stop searching when a sibling is found with a |
3863 | // higher byte value. For zero bytes (end of word) the sorting is |
3864 | // done on flags and then on affixID. |
3865 | while (node != NULL |
3866 | && (node->wn_byte < word[i] |
3867 | || (node->wn_byte == NUL |
3868 | && (flags < 0 |
3869 | ? node->wn_affixID < (unsigned)affixID |
3870 | : (node->wn_flags < (unsigned)(flags & WN_MASK) |
3871 | || (node->wn_flags == (flags & WN_MASK) |
3872 | && (spin->si_sugtree |
3873 | ? (node->wn_region & 0xffff) < region |
3874 | : node->wn_affixID |
3875 | < (unsigned)affixID))))))) { |
3876 | prev = &node->wn_sibling; |
3877 | node = *prev; |
3878 | } |
3879 | if (node == NULL |
3880 | || node->wn_byte != word[i] |
3881 | || (word[i] == NUL |
3882 | && (flags < 0 |
3883 | || spin->si_sugtree |
3884 | || node->wn_flags != (flags & WN_MASK) |
3885 | || node->wn_affixID != affixID))) { |
3886 | // Allocate a new node. |
3887 | np = get_wordnode(spin); |
3888 | if (np == NULL) |
3889 | return FAIL; |
3890 | np->wn_byte = word[i]; |
3891 | |
3892 | // If "node" is NULL this is a new child or the end of the sibling |
3893 | // list: ref count is one. Otherwise use ref count of sibling and |
3894 | // make ref count of sibling one (matters when inserting in front |
3895 | // of the list of siblings). |
3896 | if (node == NULL) |
3897 | np->wn_refs = 1; |
3898 | else { |
3899 | np->wn_refs = node->wn_refs; |
3900 | node->wn_refs = 1; |
3901 | } |
3902 | if (prev != NULL) |
3903 | *prev = np; |
3904 | np->wn_sibling = node; |
3905 | node = np; |
3906 | } |
3907 | |
3908 | if (word[i] == NUL) { |
3909 | node->wn_flags = flags; |
3910 | node->wn_region |= region; |
3911 | node->wn_affixID = affixID; |
3912 | break; |
3913 | } |
3914 | prev = &node->wn_child; |
3915 | node = *prev; |
3916 | } |
3917 | #ifdef SPELL_PRINTTREE |
3918 | smsg((char_u *)"Added \"%s\"" , word); |
3919 | spell_print_tree(root->wn_sibling); |
3920 | #endif |
3921 | |
3922 | // count nr of words added since last message |
3923 | ++spin->si_msg_count; |
3924 | |
3925 | if (spin->si_compress_cnt > 1) { |
3926 | if (--spin->si_compress_cnt == 1) |
3927 | // Did enough words to lower the block count limit. |
3928 | spin->si_blocks_cnt += compress_inc; |
3929 | } |
3930 | |
3931 | // When we have allocated lots of memory we need to compress the word tree |
3932 | // to free up some room. But compression is slow, and we might actually |
3933 | // need that room, thus only compress in the following situations: |
3934 | // 1. When not compressed before (si_compress_cnt == 0): when using |
3935 | // "compress_start" blocks. |
3936 | // 2. When compressed before and used "compress_inc" blocks before |
3937 | // adding "compress_added" words (si_compress_cnt > 1). |
3938 | // 3. When compressed before, added "compress_added" words |
3939 | // (si_compress_cnt == 1) and the number of free nodes drops below the |
3940 | // maximum word length. |
3941 | #ifndef SPELL_COMPRESS_ALLWAYS |
3942 | if (spin->si_compress_cnt == 1 // NOLINT(readability/braces) |
3943 | ? spin->si_free_count < MAXWLEN |
3944 | : spin->si_blocks_cnt >= compress_start) |
3945 | #endif |
3946 | { |
3947 | // Decrement the block counter. The effect is that we compress again |
3948 | // when the freed up room has been used and another "compress_inc" |
3949 | // blocks have been allocated. Unless "compress_added" words have |
3950 | // been added, then the limit is put back again. |
3951 | spin->si_blocks_cnt -= compress_inc; |
3952 | spin->si_compress_cnt = compress_added; |
3953 | |
3954 | if (spin->si_verbose) { |
3955 | msg_start(); |
3956 | msg_puts(_(msg_compressing)); |
3957 | msg_clr_eos(); |
3958 | msg_didout = FALSE; |
3959 | msg_col = 0; |
3960 | ui_flush(); |
3961 | } |
3962 | |
3963 | // Compress both trees. Either they both have many nodes, which makes |
3964 | // compression useful, or one of them is small, which means |
3965 | // compression goes fast. But when filling the soundfold word tree |
3966 | // there is no keep-case tree. |
3967 | wordtree_compress(spin, spin->si_foldroot); |
3968 | if (affixID >= 0) |
3969 | wordtree_compress(spin, spin->si_keeproot); |
3970 | } |
3971 | |
3972 | return OK; |
3973 | } |
3974 | |
3975 | // Get a wordnode_T, either from the list of previously freed nodes or |
3976 | // allocate a new one. |
3977 | // Returns NULL when out of memory. |
3978 | static wordnode_T *get_wordnode(spellinfo_T *spin) |
3979 | { |
3980 | wordnode_T *n; |
3981 | |
3982 | if (spin->si_first_free == NULL) |
3983 | n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), true); |
3984 | else { |
3985 | n = spin->si_first_free; |
3986 | spin->si_first_free = n->wn_child; |
3987 | memset(n, 0, sizeof(wordnode_T)); |
3988 | --spin->si_free_count; |
3989 | } |
3990 | #ifdef SPELL_PRINTTREE |
3991 | if (n != NULL) |
3992 | n->wn_nr = ++spin->si_wordnode_nr; |
3993 | #endif |
3994 | return n; |
3995 | } |
3996 | |
3997 | // Decrement the reference count on a node (which is the head of a list of |
3998 | // siblings). If the reference count becomes zero free the node and its |
3999 | // siblings. |
4000 | // Returns the number of nodes actually freed. |
4001 | static int deref_wordnode(spellinfo_T *spin, wordnode_T *node) |
4002 | { |
4003 | wordnode_T *np; |
4004 | int cnt = 0; |
4005 | |
4006 | if (--node->wn_refs == 0) { |
4007 | for (np = node; np != NULL; np = np->wn_sibling) { |
4008 | if (np->wn_child != NULL) |
4009 | cnt += deref_wordnode(spin, np->wn_child); |
4010 | free_wordnode(spin, np); |
4011 | ++cnt; |
4012 | } |
4013 | ++cnt; // length field |
4014 | } |
4015 | return cnt; |
4016 | } |
4017 | |
4018 | // Free a wordnode_T for re-use later. |
4019 | // Only the "wn_child" field becomes invalid. |
4020 | static void free_wordnode(spellinfo_T *spin, wordnode_T *n) |
4021 | { |
4022 | n->wn_child = spin->si_first_free; |
4023 | spin->si_first_free = n; |
4024 | ++spin->si_free_count; |
4025 | } |
4026 | |
4027 | // Compress a tree: find tails that are identical and can be shared. |
4028 | static void wordtree_compress(spellinfo_T *spin, wordnode_T *root) |
4029 | { |
4030 | hashtab_T ht; |
4031 | int n; |
4032 | int tot = 0; |
4033 | int perc; |
4034 | |
4035 | // Skip the root itself, it's not actually used. The first sibling is the |
4036 | // start of the tree. |
4037 | if (root->wn_sibling != NULL) { |
4038 | hash_init(&ht); |
4039 | n = node_compress(spin, root->wn_sibling, &ht, &tot); |
4040 | |
4041 | #ifndef SPELL_PRINTTREE |
4042 | if (spin->si_verbose || p_verbose > 2) |
4043 | #endif |
4044 | { |
4045 | if (tot > 1000000) |
4046 | perc = (tot - n) / (tot / 100); |
4047 | else if (tot == 0) |
4048 | perc = 0; |
4049 | else |
4050 | perc = (tot - n) * 100 / tot; |
4051 | vim_snprintf((char *)IObuff, IOSIZE, |
4052 | _("Compressed %d of %d nodes; %d (%d%%) remaining" ), |
4053 | n, tot, tot - n, perc); |
4054 | spell_message(spin, IObuff); |
4055 | } |
4056 | #ifdef SPELL_PRINTTREE |
4057 | spell_print_tree(root->wn_sibling); |
4058 | #endif |
4059 | hash_clear(&ht); |
4060 | } |
4061 | } |
4062 | |
4063 | // Compress a node, its siblings and its children, depth first. |
4064 | // Returns the number of compressed nodes. |
4065 | static int |
4066 | node_compress ( |
4067 | spellinfo_T *spin, |
4068 | wordnode_T *node, |
4069 | hashtab_T *ht, |
4070 | int *tot // total count of nodes before compressing, |
4071 | // incremented while going through the tree |
4072 | ) |
4073 | { |
4074 | wordnode_T *np; |
4075 | wordnode_T *tp; |
4076 | wordnode_T *child; |
4077 | hash_T hash; |
4078 | hashitem_T *hi; |
4079 | int len = 0; |
4080 | unsigned nr, n; |
4081 | int compressed = 0; |
4082 | |
4083 | // Go through the list of siblings. Compress each child and then try |
4084 | // finding an identical child to replace it. |
4085 | // Note that with "child" we mean not just the node that is pointed to, |
4086 | // but the whole list of siblings of which the child node is the first. |
4087 | for (np = node; np != NULL && !got_int; np = np->wn_sibling) { |
4088 | ++len; |
4089 | if ((child = np->wn_child) != NULL) { |
4090 | // Compress the child first. This fills hashkey. |
4091 | compressed += node_compress(spin, child, ht, tot); |
4092 | |
4093 | // Try to find an identical child. |
4094 | hash = hash_hash(child->wn_u1.hashkey); |
4095 | hi = hash_lookup(ht, (const char *)child->wn_u1.hashkey, |
4096 | STRLEN(child->wn_u1.hashkey), hash); |
4097 | if (!HASHITEM_EMPTY(hi)) { |
4098 | // There are children we encountered before with a hash value |
4099 | // identical to the current child. Now check if there is one |
4100 | // that is really identical. |
4101 | for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) |
4102 | if (node_equal(child, tp)) { |
4103 | // Found one! Now use that child in place of the |
4104 | // current one. This means the current child and all |
4105 | // its siblings is unlinked from the tree. |
4106 | ++tp->wn_refs; |
4107 | compressed += deref_wordnode(spin, child); |
4108 | np->wn_child = tp; |
4109 | break; |
4110 | } |
4111 | if (tp == NULL) { |
4112 | // No other child with this hash value equals the child of |
4113 | // the node, add it to the linked list after the first |
4114 | // item. |
4115 | tp = HI2WN(hi); |
4116 | child->wn_u2.next = tp->wn_u2.next; |
4117 | tp->wn_u2.next = child; |
4118 | } |
4119 | } else |
4120 | // No other child has this hash value, add it to the |
4121 | // hashtable. |
4122 | hash_add_item(ht, hi, child->wn_u1.hashkey, hash); |
4123 | } |
4124 | } |
4125 | *tot += len + 1; // add one for the node that stores the length |
4126 | |
4127 | // Make a hash key for the node and its siblings, so that we can quickly |
4128 | // find a lookalike node. This must be done after compressing the sibling |
4129 | // list, otherwise the hash key would become invalid by the compression. |
4130 | node->wn_u1.hashkey[0] = len; |
4131 | nr = 0; |
4132 | for (np = node; np != NULL; np = np->wn_sibling) { |
4133 | if (np->wn_byte == NUL) |
4134 | // end node: use wn_flags, wn_region and wn_affixID |
4135 | n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); |
4136 | else |
4137 | // byte node: use the byte value and the child pointer |
4138 | n = (unsigned)(np->wn_byte + ((uintptr_t)np->wn_child << 8)); |
4139 | nr = nr * 101 + n; |
4140 | } |
4141 | |
4142 | // Avoid NUL bytes, it terminates the hash key. |
4143 | n = nr & 0xff; |
4144 | node->wn_u1.hashkey[1] = n == 0 ? 1 : n; |
4145 | n = (nr >> 8) & 0xff; |
4146 | node->wn_u1.hashkey[2] = n == 0 ? 1 : n; |
4147 | n = (nr >> 16) & 0xff; |
4148 | node->wn_u1.hashkey[3] = n == 0 ? 1 : n; |
4149 | n = (nr >> 24) & 0xff; |
4150 | node->wn_u1.hashkey[4] = n == 0 ? 1 : n; |
4151 | node->wn_u1.hashkey[5] = NUL; |
4152 | |
4153 | // Check for CTRL-C pressed now and then. |
4154 | fast_breakcheck(); |
4155 | |
4156 | return compressed; |
4157 | } |
4158 | |
4159 | // Returns true when two nodes have identical siblings and children. |
4160 | static bool node_equal(wordnode_T *n1, wordnode_T *n2) |
4161 | { |
4162 | wordnode_T *p1; |
4163 | wordnode_T *p2; |
4164 | |
4165 | for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; |
4166 | p1 = p1->wn_sibling, p2 = p2->wn_sibling) |
4167 | if (p1->wn_byte != p2->wn_byte |
4168 | || (p1->wn_byte == NUL |
4169 | ? (p1->wn_flags != p2->wn_flags |
4170 | || p1->wn_region != p2->wn_region |
4171 | || p1->wn_affixID != p2->wn_affixID) |
4172 | : (p1->wn_child != p2->wn_child))) |
4173 | break; |
4174 | |
4175 | return p1 == NULL && p2 == NULL; |
4176 | } |
4177 | |
4178 | |
4179 | // Function given to qsort() to sort the REP items on "from" string. |
4180 | static int rep_compare(const void *s1, const void *s2) |
4181 | { |
4182 | fromto_T *p1 = (fromto_T *)s1; |
4183 | fromto_T *p2 = (fromto_T *)s2; |
4184 | |
4185 | return STRCMP(p1->ft_from, p2->ft_from); |
4186 | } |
4187 | |
4188 | // Write the Vim .spl file "fname". |
4189 | // Return OK/FAIL. |
4190 | static int write_vim_spell(spellinfo_T *spin, char_u *fname) |
4191 | { |
4192 | int retval = OK; |
4193 | int regionmask; |
4194 | |
4195 | FILE *fd = os_fopen((char *)fname, "w" ); |
4196 | if (fd == NULL) { |
4197 | EMSG2(_(e_notopen), fname); |
4198 | return FAIL; |
4199 | } |
4200 | |
4201 | // <HEADER>: <fileID> <versionnr> |
4202 | // <fileID> |
4203 | size_t fwv = fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, 1, fd); |
4204 | if (fwv != (size_t)1) |
4205 | // Catch first write error, don't try writing more. |
4206 | goto theend; |
4207 | |
4208 | putc(VIMSPELLVERSION, fd); // <versionnr> |
4209 | |
4210 | // <SECTIONS>: <section> ... <sectionend> |
4211 | |
4212 | // SN_INFO: <infotext> |
4213 | if (spin->si_info != NULL) { |
4214 | putc(SN_INFO, fd); // <sectionID> |
4215 | putc(0, fd); // <sectionflags> |
4216 | size_t i = STRLEN(spin->si_info); |
4217 | put_bytes(fd, i, 4); // <sectionlen> |
4218 | fwv &= fwrite(spin->si_info, i, 1, fd); // <infotext> |
4219 | } |
4220 | |
4221 | // SN_REGION: <regionname> ... |
4222 | // Write the region names only if there is more than one. |
4223 | if (spin->si_region_count > 1) { |
4224 | putc(SN_REGION, fd); // <sectionID> |
4225 | putc(SNF_REQUIRED, fd); // <sectionflags> |
4226 | size_t l = (size_t)spin->si_region_count * 2; |
4227 | put_bytes(fd, l, 4); // <sectionlen> |
4228 | fwv &= fwrite(spin->si_region_name, l, 1, fd); |
4229 | // <regionname> ... |
4230 | regionmask = (1 << spin->si_region_count) - 1; |
4231 | } else |
4232 | regionmask = 0; |
4233 | |
4234 | // SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> |
4235 | // |
4236 | // The table with character flags and the table for case folding. |
4237 | // This makes sure the same characters are recognized as word characters |
4238 | // when generating an when using a spell file. |
4239 | // Skip this for ASCII, the table may conflict with the one used for |
4240 | // 'encoding'. |
4241 | // Also skip this for an .add.spl file, the main spell file must contain |
4242 | // the table (avoids that it conflicts). File is shorter too. |
4243 | if (!spin->si_ascii && !spin->si_add) { |
4244 | char_u folchars[128 * 8]; |
4245 | int flags; |
4246 | |
4247 | putc(SN_CHARFLAGS, fd); // <sectionID> |
4248 | putc(SNF_REQUIRED, fd); // <sectionflags> |
4249 | |
4250 | // Form the <folchars> string first, we need to know its length. |
4251 | size_t l = 0; |
4252 | for (size_t i = 128; i < 256; i++) { |
4253 | l += (size_t)utf_char2bytes(spelltab.st_fold[i], folchars + l); |
4254 | } |
4255 | put_bytes(fd, 1 + 128 + 2 + l, 4); // <sectionlen> |
4256 | |
4257 | fputc(128, fd); // <charflagslen> |
4258 | for (size_t i = 128; i < 256; ++i) { |
4259 | flags = 0; |
4260 | if (spelltab.st_isw[i]) |
4261 | flags |= CF_WORD; |
4262 | if (spelltab.st_isu[i]) |
4263 | flags |= CF_UPPER; |
4264 | fputc(flags, fd); // <charflags> |
4265 | } |
4266 | |
4267 | put_bytes(fd, l, 2); // <folcharslen> |
4268 | fwv &= fwrite(folchars, l, 1, fd); // <folchars> |
4269 | } |
4270 | |
4271 | // SN_MIDWORD: <midword> |
4272 | if (spin->si_midword != NULL) { |
4273 | putc(SN_MIDWORD, fd); // <sectionID> |
4274 | putc(SNF_REQUIRED, fd); // <sectionflags> |
4275 | |
4276 | size_t i = STRLEN(spin->si_midword); |
4277 | put_bytes(fd, i, 4); // <sectionlen> |
4278 | fwv &= fwrite(spin->si_midword, i, 1, fd); |
4279 | // <midword> |
4280 | } |
4281 | |
4282 | // SN_PREFCOND: <prefcondcnt> <prefcond> ... |
4283 | if (!GA_EMPTY(&spin->si_prefcond)) { |
4284 | putc(SN_PREFCOND, fd); // <sectionID> |
4285 | putc(SNF_REQUIRED, fd); // <sectionflags> |
4286 | |
4287 | size_t l = (size_t)write_spell_prefcond(NULL, &spin->si_prefcond); |
4288 | put_bytes(fd, l, 4); // <sectionlen> |
4289 | |
4290 | write_spell_prefcond(fd, &spin->si_prefcond); |
4291 | } |
4292 | |
4293 | // SN_REP: <repcount> <rep> ... |
4294 | // SN_SAL: <salflags> <salcount> <sal> ... |
4295 | // SN_REPSAL: <repcount> <rep> ... |
4296 | |
4297 | // round 1: SN_REP section |
4298 | // round 2: SN_SAL section (unless SN_SOFO is used) |
4299 | // round 3: SN_REPSAL section |
4300 | for (unsigned int round = 1; round <= 3; ++round) { |
4301 | garray_T *gap; |
4302 | if (round == 1) |
4303 | gap = &spin->si_rep; |
4304 | else if (round == 2) { |
4305 | // Don't write SN_SAL when using a SN_SOFO section |
4306 | if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) |
4307 | continue; |
4308 | gap = &spin->si_sal; |
4309 | } else |
4310 | gap = &spin->si_repsal; |
4311 | |
4312 | // Don't write the section if there are no items. |
4313 | if (GA_EMPTY(gap)) |
4314 | continue; |
4315 | |
4316 | // Sort the REP/REPSAL items. |
4317 | if (round != 2) |
4318 | qsort(gap->ga_data, (size_t)gap->ga_len, |
4319 | sizeof(fromto_T), rep_compare); |
4320 | |
4321 | int sect_id = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); |
4322 | putc(sect_id, fd); // <sectionID> |
4323 | |
4324 | // This is for making suggestions, section is not required. |
4325 | putc(0, fd); // <sectionflags> |
4326 | |
4327 | // Compute the length of what follows. |
4328 | size_t l = 2; // count <repcount> or <salcount> |
4329 | assert(gap->ga_len >= 0); |
4330 | for (size_t i = 0; i < (size_t)gap->ga_len; ++i) { |
4331 | fromto_T *ftp = &((fromto_T *)gap->ga_data)[i]; |
4332 | l += 1 + STRLEN(ftp->ft_from); // count <*fromlen> and <*from> |
4333 | l += 1 + STRLEN(ftp->ft_to); // count <*tolen> and <*to> |
4334 | } |
4335 | if (round == 2) |
4336 | ++l; // count <salflags> |
4337 | put_bytes(fd, l, 4); // <sectionlen> |
4338 | |
4339 | if (round == 2) { |
4340 | int i = 0; |
4341 | if (spin->si_followup) |
4342 | i |= SAL_F0LLOWUP; |
4343 | if (spin->si_collapse) |
4344 | i |= SAL_COLLAPSE; |
4345 | if (spin->si_rem_accents) |
4346 | i |= SAL_REM_ACCENTS; |
4347 | putc(i, fd); // <salflags> |
4348 | } |
4349 | |
4350 | put_bytes(fd, (uintmax_t)gap->ga_len, 2); // <repcount> or <salcount> |
4351 | for (size_t i = 0; i < (size_t)gap->ga_len; ++i) { |
4352 | // <rep> : <repfromlen> <repfrom> <reptolen> <repto> |
4353 | // <sal> : <salfromlen> <salfrom> <saltolen> <salto> |
4354 | fromto_T *ftp = &((fromto_T *)gap->ga_data)[i]; |
4355 | for (unsigned int rr = 1; rr <= 2; ++rr) { |
4356 | char_u *p = rr == 1 ? ftp->ft_from : ftp->ft_to; |
4357 | l = STRLEN(p); |
4358 | assert(l < INT_MAX); |
4359 | putc((int)l, fd); |
4360 | if (l > 0) |
4361 | fwv &= fwrite(p, l, 1, fd); |
4362 | } |
4363 | } |
4364 | |
4365 | } |
4366 | |
4367 | // SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> |
4368 | // This is for making suggestions, section is not required. |
4369 | if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) { |
4370 | putc(SN_SOFO, fd); // <sectionID> |
4371 | putc(0, fd); // <sectionflags> |
4372 | |
4373 | size_t l = STRLEN(spin->si_sofofr); |
4374 | put_bytes(fd, l + STRLEN(spin->si_sofoto) + 4, 4); // <sectionlen> |
4375 | |
4376 | put_bytes(fd, l, 2); // <sofofromlen> |
4377 | fwv &= fwrite(spin->si_sofofr, l, 1, fd); // <sofofrom> |
4378 | |
4379 | l = STRLEN(spin->si_sofoto); |
4380 | put_bytes(fd, l, 2); // <sofotolen> |
4381 | fwv &= fwrite(spin->si_sofoto, l, 1, fd); // <sofoto> |
4382 | } |
4383 | |
4384 | // SN_WORDS: <word> ... |
4385 | // This is for making suggestions, section is not required. |
4386 | if (spin->si_commonwords.ht_used > 0) { |
4387 | putc(SN_WORDS, fd); // <sectionID> |
4388 | putc(0, fd); // <sectionflags> |
4389 | |
4390 | // round 1: count the bytes |
4391 | // round 2: write the bytes |
4392 | for (unsigned int round = 1; round <= 2; ++round) { |
4393 | size_t todo; |
4394 | size_t len = 0; |
4395 | hashitem_T *hi; |
4396 | |
4397 | todo = spin->si_commonwords.ht_used; |
4398 | for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) |
4399 | if (!HASHITEM_EMPTY(hi)) { |
4400 | size_t l = STRLEN(hi->hi_key) + 1; |
4401 | len += l; |
4402 | if (round == 2) // <word> |
4403 | fwv &= fwrite(hi->hi_key, l, 1, fd); |
4404 | --todo; |
4405 | } |
4406 | if (round == 1) |
4407 | put_bytes(fd, len, 4); // <sectionlen> |
4408 | } |
4409 | } |
4410 | |
4411 | // SN_MAP: <mapstr> |
4412 | // This is for making suggestions, section is not required. |
4413 | if (!GA_EMPTY(&spin->si_map)) { |
4414 | putc(SN_MAP, fd); // <sectionID> |
4415 | putc(0, fd); // <sectionflags> |
4416 | size_t l = (size_t)spin->si_map.ga_len; |
4417 | put_bytes(fd, l, 4); // <sectionlen> |
4418 | fwv &= fwrite(spin->si_map.ga_data, l, 1, fd); // <mapstr> |
4419 | } |
4420 | |
4421 | // SN_SUGFILE: <timestamp> |
4422 | // This is used to notify that a .sug file may be available and at the |
4423 | // same time allows for checking that a .sug file that is found matches |
4424 | // with this .spl file. That's because the word numbers must be exactly |
4425 | // right. |
4426 | if (!spin->si_nosugfile |
4427 | && (!GA_EMPTY(&spin->si_sal) |
4428 | || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) { |
4429 | putc(SN_SUGFILE, fd); // <sectionID> |
4430 | putc(0, fd); // <sectionflags> |
4431 | put_bytes(fd, 8, 4); // <sectionlen> |
4432 | |
4433 | // Set si_sugtime and write it to the file. |
4434 | spin->si_sugtime = time(NULL); |
4435 | put_time(fd, spin->si_sugtime); // <timestamp> |
4436 | } |
4437 | |
4438 | // SN_NOSPLITSUGS: nothing |
4439 | // This is used to notify that no suggestions with word splits are to be |
4440 | // made. |
4441 | if (spin->si_nosplitsugs) { |
4442 | putc(SN_NOSPLITSUGS, fd); // <sectionID> |
4443 | putc(0, fd); // <sectionflags> |
4444 | put_bytes(fd, 0, 4); // <sectionlen> |
4445 | } |
4446 | |
4447 | // SN_NOCOMPUNDSUGS: nothing |
4448 | // This is used to notify that no suggestions with compounds are to be |
4449 | // made. |
4450 | if (spin->si_nocompoundsugs) { |
4451 | putc(SN_NOCOMPOUNDSUGS, fd); // <sectionID> |
4452 | putc(0, fd); // <sectionflags> |
4453 | put_bytes(fd, 0, 4); // <sectionlen> |
4454 | } |
4455 | |
4456 | // SN_COMPOUND: compound info. |
4457 | // We don't mark it required, when not supported all compound words will |
4458 | // be bad words. |
4459 | if (spin->si_compflags != NULL) { |
4460 | putc(SN_COMPOUND, fd); // <sectionID> |
4461 | putc(0, fd); // <sectionflags> |
4462 | |
4463 | size_t l = STRLEN(spin->si_compflags); |
4464 | assert(spin->si_comppat.ga_len >= 0); |
4465 | for (size_t i = 0; i < (size_t)spin->si_comppat.ga_len; ++i) { |
4466 | l += STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1; |
4467 | } |
4468 | put_bytes(fd, l + 7, 4); // <sectionlen> |
4469 | |
4470 | putc(spin->si_compmax, fd); // <compmax> |
4471 | putc(spin->si_compminlen, fd); // <compminlen> |
4472 | putc(spin->si_compsylmax, fd); // <compsylmax> |
4473 | putc(0, fd); // for Vim 7.0b compatibility |
4474 | putc(spin->si_compoptions, fd); // <compoptions> |
4475 | put_bytes(fd, (uintmax_t)spin->si_comppat.ga_len, 2); // <comppatcount> |
4476 | for (size_t i = 0; i < (size_t)spin->si_comppat.ga_len; ++i) { |
4477 | char_u *p = ((char_u **)(spin->si_comppat.ga_data))[i]; |
4478 | assert(STRLEN(p) < INT_MAX); |
4479 | putc((int)STRLEN(p), fd); // <comppatlen> |
4480 | fwv &= fwrite(p, STRLEN(p), 1, fd); // <comppattext> |
4481 | } |
4482 | // <compflags> |
4483 | fwv &= fwrite(spin->si_compflags, STRLEN(spin->si_compflags), 1, fd); |
4484 | } |
4485 | |
4486 | // SN_NOBREAK: NOBREAK flag |
4487 | if (spin->si_nobreak) { |
4488 | putc(SN_NOBREAK, fd); // <sectionID> |
4489 | putc(0, fd); // <sectionflags> |
4490 | |
4491 | // It's empty, the presence of the section flags the feature. |
4492 | put_bytes(fd, 0, 4); // <sectionlen> |
4493 | } |
4494 | |
4495 | // SN_SYLLABLE: syllable info. |
4496 | // We don't mark it required, when not supported syllables will not be |
4497 | // counted. |
4498 | if (spin->si_syllable != NULL) { |
4499 | putc(SN_SYLLABLE, fd); // <sectionID> |
4500 | putc(0, fd); // <sectionflags> |
4501 | |
4502 | size_t l = STRLEN(spin->si_syllable); |
4503 | put_bytes(fd, l, 4); // <sectionlen> |
4504 | fwv &= fwrite(spin->si_syllable, l, 1, fd); // <syllable> |
4505 | } |
4506 | |
4507 | // end of <SECTIONS> |
4508 | putc(SN_END, fd); // <sectionend> |
4509 | |
4510 | |
4511 | // <LWORDTREE> <KWORDTREE> <PREFIXTREE> |
4512 | spin->si_memtot = 0; |
4513 | for (unsigned int round = 1; round <= 3; ++round) { |
4514 | wordnode_T *tree; |
4515 | if (round == 1) |
4516 | tree = spin->si_foldroot->wn_sibling; |
4517 | else if (round == 2) |
4518 | tree = spin->si_keeproot->wn_sibling; |
4519 | else |
4520 | tree = spin->si_prefroot->wn_sibling; |
4521 | |
4522 | // Clear the index and wnode fields in the tree. |
4523 | clear_node(tree); |
4524 | |
4525 | // Count the number of nodes. Needed to be able to allocate the |
4526 | // memory when reading the nodes. Also fills in index for shared |
4527 | // nodes. |
4528 | size_t nodecount = (size_t)put_node(NULL, tree, 0, regionmask, round == 3); |
4529 | |
4530 | // number of nodes in 4 bytes |
4531 | put_bytes(fd, nodecount, 4); // <nodecount> |
4532 | assert(nodecount + nodecount * sizeof(int) < INT_MAX); |
4533 | spin->si_memtot += (int)(nodecount + nodecount * sizeof(int)); |
4534 | |
4535 | // Write the nodes. |
4536 | (void)put_node(fd, tree, 0, regionmask, round == 3); |
4537 | } |
4538 | |
4539 | // Write another byte to check for errors (file system full). |
4540 | if (putc(0, fd) == EOF) |
4541 | retval = FAIL; |
4542 | theend: |
4543 | if (fclose(fd) == EOF) |
4544 | retval = FAIL; |
4545 | |
4546 | if (fwv != (size_t)1) |
4547 | retval = FAIL; |
4548 | if (retval == FAIL) |
4549 | EMSG(_(e_write)); |
4550 | |
4551 | return retval; |
4552 | } |
4553 | |
4554 | // Clear the index and wnode fields of "node", it siblings and its |
4555 | // children. This is needed because they are a union with other items to save |
4556 | // space. |
4557 | static void clear_node(wordnode_T *node) |
4558 | { |
4559 | wordnode_T *np; |
4560 | |
4561 | if (node != NULL) |
4562 | for (np = node; np != NULL; np = np->wn_sibling) { |
4563 | np->wn_u1.index = 0; |
4564 | np->wn_u2.wnode = NULL; |
4565 | |
4566 | if (np->wn_byte != NUL) |
4567 | clear_node(np->wn_child); |
4568 | } |
4569 | } |
4570 | |
4571 | |
4572 | // Dump a word tree at node "node". |
4573 | // |
4574 | // This first writes the list of possible bytes (siblings). Then for each |
4575 | // byte recursively write the children. |
4576 | // |
4577 | // NOTE: The code here must match the code in read_tree_node(), since |
4578 | // assumptions are made about the indexes (so that we don't have to write them |
4579 | // in the file). |
4580 | // |
4581 | // Returns the number of nodes used. |
4582 | static int |
4583 | put_node ( |
4584 | FILE *fd, // NULL when only counting |
4585 | wordnode_T *node, |
4586 | int idx, |
4587 | int regionmask, |
4588 | bool prefixtree // true for PREFIXTREE |
4589 | ) |
4590 | { |
4591 | // If "node" is zero the tree is empty. |
4592 | if (node == NULL) |
4593 | return 0; |
4594 | |
4595 | // Store the index where this node is written. |
4596 | node->wn_u1.index = idx; |
4597 | |
4598 | // Count the number of siblings. |
4599 | int siblingcount = 0; |
4600 | for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) |
4601 | ++siblingcount; |
4602 | |
4603 | // Write the sibling count. |
4604 | if (fd != NULL) |
4605 | putc(siblingcount, fd); // <siblingcount> |
4606 | |
4607 | // Write each sibling byte and optionally extra info. |
4608 | for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) { |
4609 | if (np->wn_byte == 0) { |
4610 | if (fd != NULL) { |
4611 | // For a NUL byte (end of word) write the flags etc. |
4612 | if (prefixtree) { |
4613 | // In PREFIXTREE write the required affixID and the |
4614 | // associated condition nr (stored in wn_region). The |
4615 | // byte value is misused to store the "rare" and "not |
4616 | // combining" flags |
4617 | if (np->wn_flags == (uint16_t)PFX_FLAGS) |
4618 | putc(BY_NOFLAGS, fd); // <byte> |
4619 | else { |
4620 | putc(BY_FLAGS, fd); // <byte> |
4621 | putc(np->wn_flags, fd); // <pflags> |
4622 | } |
4623 | putc(np->wn_affixID, fd); // <affixID> |
4624 | put_bytes(fd, (uintmax_t)np->wn_region, 2); // <prefcondnr> |
4625 | } else { |
4626 | // For word trees we write the flag/region items. |
4627 | int flags = np->wn_flags; |
4628 | if (regionmask != 0 && np->wn_region != regionmask) |
4629 | flags |= WF_REGION; |
4630 | if (np->wn_affixID != 0) |
4631 | flags |= WF_AFX; |
4632 | if (flags == 0) { |
4633 | // word without flags or region |
4634 | putc(BY_NOFLAGS, fd); // <byte> |
4635 | } else { |
4636 | if (np->wn_flags >= 0x100) { |
4637 | putc(BY_FLAGS2, fd); // <byte> |
4638 | putc(flags, fd); // <flags> |
4639 | putc((int)((unsigned)flags >> 8), fd); // <flags2> |
4640 | } else { |
4641 | putc(BY_FLAGS, fd); // <byte> |
4642 | putc(flags, fd); // <flags> |
4643 | } |
4644 | if (flags & WF_REGION) |
4645 | putc(np->wn_region, fd); // <region> |
4646 | if (flags & WF_AFX) |
4647 | putc(np->wn_affixID, fd); // <affixID> |
4648 | } |
4649 | } |
4650 | } |
4651 | } else { |
4652 | if (np->wn_child->wn_u1.index != 0 |
4653 | && np->wn_child->wn_u2.wnode != node) { |
4654 | // The child is written elsewhere, write the reference. |
4655 | if (fd != NULL) { |
4656 | putc(BY_INDEX, fd); // <byte> |
4657 | put_bytes(fd, (uintmax_t)np->wn_child->wn_u1.index, 3); // <nodeidx> |
4658 | } |
4659 | } else if (np->wn_child->wn_u2.wnode == NULL) |
4660 | // We will write the child below and give it an index. |
4661 | np->wn_child->wn_u2.wnode = node; |
4662 | |
4663 | if (fd != NULL) |
4664 | if (putc(np->wn_byte, fd) == EOF) { // <byte> or <xbyte> |
4665 | EMSG(_(e_write)); |
4666 | return 0; |
4667 | } |
4668 | } |
4669 | } |
4670 | |
4671 | // Space used in the array when reading: one for each sibling and one for |
4672 | // the count. |
4673 | int newindex = idx + siblingcount + 1; |
4674 | |
4675 | // Recursively dump the children of each sibling. |
4676 | for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) |
4677 | if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) |
4678 | newindex = put_node(fd, np->wn_child, newindex, regionmask, |
4679 | prefixtree); |
4680 | |
4681 | return newindex; |
4682 | } |
4683 | |
4684 | |
4685 | // ":mkspell [-ascii] outfile infile ..." |
4686 | // ":mkspell [-ascii] addfile" |
4687 | void ex_mkspell(exarg_T *eap) |
4688 | { |
4689 | int fcount; |
4690 | char_u **fnames; |
4691 | char_u *arg = eap->arg; |
4692 | bool ascii = false; |
4693 | |
4694 | if (STRNCMP(arg, "-ascii" , 6) == 0) { |
4695 | ascii = true; |
4696 | arg = skipwhite(arg + 6); |
4697 | } |
4698 | |
4699 | // Expand all the remaining arguments (e.g., $VIMRUNTIME). |
4700 | if (get_arglist_exp(arg, &fcount, &fnames, false) == OK) { |
4701 | mkspell(fcount, fnames, ascii, eap->forceit, false); |
4702 | FreeWild(fcount, fnames); |
4703 | } |
4704 | } |
4705 | |
4706 | // Create the .sug file. |
4707 | // Uses the soundfold info in "spin". |
4708 | // Writes the file with the name "wfname", with ".spl" changed to ".sug". |
4709 | static void spell_make_sugfile(spellinfo_T *spin, char_u *wfname) |
4710 | { |
4711 | char_u *fname = NULL; |
4712 | int len; |
4713 | slang_T *slang; |
4714 | bool free_slang = false; |
4715 | |
4716 | // Read back the .spl file that was written. This fills the required |
4717 | // info for soundfolding. This also uses less memory than the |
4718 | // pointer-linked version of the trie. And it avoids having two versions |
4719 | // of the code for the soundfolding stuff. |
4720 | // It might have been done already by spell_reload_one(). |
4721 | for (slang = first_lang; slang != NULL; slang = slang->sl_next) { |
4722 | if (path_full_compare(wfname, slang->sl_fname, false) == kEqualFiles) { |
4723 | break; |
4724 | } |
4725 | } |
4726 | if (slang == NULL) { |
4727 | spell_message(spin, (char_u *)_("Reading back spell file..." )); |
4728 | slang = spell_load_file(wfname, NULL, NULL, false); |
4729 | if (slang == NULL) |
4730 | return; |
4731 | free_slang = true; |
4732 | } |
4733 | |
4734 | // Clear the info in "spin" that is used. |
4735 | spin->si_blocks = NULL; |
4736 | spin->si_blocks_cnt = 0; |
4737 | spin->si_compress_cnt = 0; // will stay at 0 all the time |
4738 | spin->si_free_count = 0; |
4739 | spin->si_first_free = NULL; |
4740 | spin->si_foldwcount = 0; |
4741 | |
4742 | // Go through the trie of good words, soundfold each word and add it to |
4743 | // the soundfold trie. |
4744 | spell_message(spin, (char_u *)_("Performing soundfolding..." )); |
4745 | if (sug_filltree(spin, slang) == FAIL) |
4746 | goto theend; |
4747 | |
4748 | // Create the table which links each soundfold word with a list of the |
4749 | // good words it may come from. Creates buffer "spin->si_spellbuf". |
4750 | // This also removes the wordnr from the NUL byte entries to make |
4751 | // compression possible. |
4752 | if (sug_maketable(spin) == FAIL) |
4753 | goto theend; |
4754 | |
4755 | smsg(_("Number of words after soundfolding: %" PRId64), |
4756 | (int64_t)spin->si_spellbuf->b_ml.ml_line_count); |
4757 | |
4758 | // Compress the soundfold trie. |
4759 | spell_message(spin, (char_u *)_(msg_compressing)); |
4760 | wordtree_compress(spin, spin->si_foldroot); |
4761 | |
4762 | // Write the .sug file. |
4763 | // Make the file name by changing ".spl" to ".sug". |
4764 | fname = xmalloc(MAXPATHL); |
4765 | STRLCPY(fname, wfname, MAXPATHL); |
4766 | len = (int)STRLEN(fname); |
4767 | fname[len - 2] = 'u'; |
4768 | fname[len - 1] = 'g'; |
4769 | sug_write(spin, fname); |
4770 | |
4771 | theend: |
4772 | xfree(fname); |
4773 | if (free_slang) |
4774 | slang_free(slang); |
4775 | free_blocks(spin->si_blocks); |
4776 | close_spellbuf(spin->si_spellbuf); |
4777 | } |
4778 | |
4779 | // Build the soundfold trie for language "slang". |
4780 | static int sug_filltree(spellinfo_T *spin, slang_T *slang) |
4781 | { |
4782 | char_u *byts; |
4783 | idx_T *idxs; |
4784 | int depth; |
4785 | idx_T arridx[MAXWLEN]; |
4786 | int curi[MAXWLEN]; |
4787 | char_u tword[MAXWLEN]; |
4788 | char_u tsalword[MAXWLEN]; |
4789 | int c; |
4790 | idx_T n; |
4791 | unsigned words_done = 0; |
4792 | int wordcount[MAXWLEN]; |
4793 | |
4794 | // We use si_foldroot for the soundfolded trie. |
4795 | spin->si_foldroot = wordtree_alloc(spin); |
4796 | |
4797 | // Let tree_add_word() know we're adding to the soundfolded tree |
4798 | spin->si_sugtree = true; |
4799 | |
4800 | // Go through the whole case-folded tree, soundfold each word and put it |
4801 | // in the trie. |
4802 | byts = slang->sl_fbyts; |
4803 | idxs = slang->sl_fidxs; |
4804 | |
4805 | arridx[0] = 0; |
4806 | curi[0] = 1; |
4807 | wordcount[0] = 0; |
4808 | |
4809 | depth = 0; |
4810 | while (depth >= 0 && !got_int) { |
4811 | if (curi[depth] > byts[arridx[depth]]) { |
4812 | // Done all bytes at this node, go up one level. |
4813 | idxs[arridx[depth]] = wordcount[depth]; |
4814 | if (depth > 0) |
4815 | wordcount[depth - 1] += wordcount[depth]; |
4816 | |
4817 | --depth; |
4818 | line_breakcheck(); |
4819 | } else { |
4820 | |
4821 | // Do one more byte at this node. |
4822 | n = arridx[depth] + curi[depth]; |
4823 | ++curi[depth]; |
4824 | |
4825 | c = byts[n]; |
4826 | if (c == 0) { |
4827 | // Sound-fold the word. |
4828 | tword[depth] = NUL; |
4829 | spell_soundfold(slang, tword, true, tsalword); |
4830 | |
4831 | // We use the "flags" field for the MSB of the wordnr, |
4832 | // "region" for the LSB of the wordnr. |
4833 | if (tree_add_word(spin, tsalword, spin->si_foldroot, |
4834 | words_done >> 16, words_done & 0xffff, |
4835 | 0) == FAIL) |
4836 | return FAIL; |
4837 | |
4838 | ++words_done; |
4839 | ++wordcount[depth]; |
4840 | |
4841 | // Reset the block count each time to avoid compression |
4842 | // kicking in. |
4843 | spin->si_blocks_cnt = 0; |
4844 | |
4845 | // Skip over any other NUL bytes (same word with different |
4846 | // flags). |
4847 | while (byts[n + 1] == 0) { |
4848 | ++n; |
4849 | ++curi[depth]; |
4850 | } |
4851 | } else { |
4852 | // Normal char, go one level deeper. |
4853 | tword[depth++] = c; |
4854 | arridx[depth] = idxs[n]; |
4855 | curi[depth] = 1; |
4856 | wordcount[depth] = 0; |
4857 | } |
4858 | } |
4859 | } |
4860 | |
4861 | smsg(_("Total number of words: %d" ), words_done); |
4862 | |
4863 | return OK; |
4864 | } |
4865 | |
4866 | // Make the table that links each word in the soundfold trie to the words it |
4867 | // can be produced from. |
4868 | // This is not unlike lines in a file, thus use a memfile to be able to access |
4869 | // the table efficiently. |
4870 | // Returns FAIL when out of memory. |
4871 | static int sug_maketable(spellinfo_T *spin) |
4872 | { |
4873 | garray_T ga; |
4874 | int res = OK; |
4875 | |
4876 | // Allocate a buffer, open a memline for it and create the swap file |
4877 | // (uses a temp file, not a .swp file). |
4878 | spin->si_spellbuf = open_spellbuf(); |
4879 | |
4880 | // Use a buffer to store the line info, avoids allocating many small |
4881 | // pieces of memory. |
4882 | ga_init(&ga, 1, 100); |
4883 | |
4884 | // recursively go through the tree |
4885 | if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) |
4886 | res = FAIL; |
4887 | |
4888 | ga_clear(&ga); |
4889 | return res; |
4890 | } |
4891 | |
4892 | // Fill the table for one node and its children. |
4893 | // Returns the wordnr at the start of the node. |
4894 | // Returns -1 when out of memory. |
4895 | static int |
4896 | sug_filltable ( |
4897 | spellinfo_T *spin, |
4898 | wordnode_T *node, |
4899 | int startwordnr, |
4900 | garray_T *gap // place to store line of numbers |
4901 | ) |
4902 | { |
4903 | wordnode_T *p, *np; |
4904 | int wordnr = startwordnr; |
4905 | int nr; |
4906 | int prev_nr; |
4907 | |
4908 | for (p = node; p != NULL; p = p->wn_sibling) { |
4909 | if (p->wn_byte == NUL) { |
4910 | gap->ga_len = 0; |
4911 | prev_nr = 0; |
4912 | for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) { |
4913 | ga_grow(gap, 10); |
4914 | |
4915 | nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); |
4916 | // Compute the offset from the previous nr and store the |
4917 | // offset in a way that it takes a minimum number of bytes. |
4918 | // It's a bit like utf-8, but without the need to mark |
4919 | // following bytes. |
4920 | nr -= prev_nr; |
4921 | prev_nr += nr; |
4922 | gap->ga_len += offset2bytes(nr, |
4923 | (char_u *)gap->ga_data + gap->ga_len); |
4924 | } |
4925 | |
4926 | // add the NUL byte |
4927 | ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; |
4928 | |
4929 | if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, |
4930 | gap->ga_data, gap->ga_len, true) == FAIL) { |
4931 | return -1; |
4932 | } |
4933 | wordnr++; |
4934 | |
4935 | // Remove extra NUL entries, we no longer need them. We don't |
4936 | // bother freeing the nodes, the won't be reused anyway. |
4937 | while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) |
4938 | p->wn_sibling = p->wn_sibling->wn_sibling; |
4939 | |
4940 | // Clear the flags on the remaining NUL node, so that compression |
4941 | // works a lot better. |
4942 | p->wn_flags = 0; |
4943 | p->wn_region = 0; |
4944 | } else { |
4945 | wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); |
4946 | if (wordnr == -1) |
4947 | return -1; |
4948 | } |
4949 | } |
4950 | return wordnr; |
4951 | } |
4952 | |
4953 | // Convert an offset into a minimal number of bytes. |
4954 | // Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL |
4955 | // bytes. |
4956 | static int offset2bytes(int nr, char_u *buf) |
4957 | { |
4958 | int rem; |
4959 | int b1, b2, b3, b4; |
4960 | |
4961 | // Split the number in parts of base 255. We need to avoid NUL bytes. |
4962 | b1 = nr % 255 + 1; |
4963 | rem = nr / 255; |
4964 | b2 = rem % 255 + 1; |
4965 | rem = rem / 255; |
4966 | b3 = rem % 255 + 1; |
4967 | b4 = rem / 255 + 1; |
4968 | |
4969 | if (b4 > 1 || b3 > 0x1f) { // 4 bytes |
4970 | buf[0] = 0xe0 + b4; |
4971 | buf[1] = b3; |
4972 | buf[2] = b2; |
4973 | buf[3] = b1; |
4974 | return 4; |
4975 | } |
4976 | if (b3 > 1 || b2 > 0x3f ) { // 3 bytes |
4977 | buf[0] = 0xc0 + b3; |
4978 | buf[1] = b2; |
4979 | buf[2] = b1; |
4980 | return 3; |
4981 | } |
4982 | if (b2 > 1 || b1 > 0x7f ) { // 2 bytes |
4983 | buf[0] = 0x80 + b2; |
4984 | buf[1] = b1; |
4985 | return 2; |
4986 | } |
4987 | // 1 byte |
4988 | buf[0] = b1; |
4989 | return 1; |
4990 | } |
4991 | |
4992 | // Write the .sug file in "fname". |
4993 | static void sug_write(spellinfo_T *spin, char_u *fname) |
4994 | { |
4995 | // Create the file. Note that an existing file is silently overwritten! |
4996 | FILE *fd = os_fopen((char *)fname, "w" ); |
4997 | if (fd == NULL) { |
4998 | EMSG2(_(e_notopen), fname); |
4999 | return; |
5000 | } |
5001 | |
5002 | vim_snprintf((char *)IObuff, IOSIZE, |
5003 | _("Writing suggestion file %s..." ), fname); |
5004 | spell_message(spin, IObuff); |
5005 | |
5006 | // <SUGHEADER>: <fileID> <versionnr> <timestamp> |
5007 | if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) { // <fileID> |
5008 | EMSG(_(e_write)); |
5009 | goto theend; |
5010 | } |
5011 | putc(VIMSUGVERSION, fd); // <versionnr> |
5012 | |
5013 | // Write si_sugtime to the file. |
5014 | put_time(fd, spin->si_sugtime); // <timestamp> |
5015 | |
5016 | // <SUGWORDTREE> |
5017 | spin->si_memtot = 0; |
5018 | wordnode_T *tree = spin->si_foldroot->wn_sibling; |
5019 | |
5020 | // Clear the index and wnode fields in the tree. |
5021 | clear_node(tree); |
5022 | |
5023 | // Count the number of nodes. Needed to be able to allocate the |
5024 | // memory when reading the nodes. Also fills in index for shared |
5025 | // nodes. |
5026 | size_t nodecount = (size_t)put_node(NULL, tree, 0, 0, false); |
5027 | |
5028 | // number of nodes in 4 bytes |
5029 | put_bytes(fd, nodecount, 4); // <nodecount> |
5030 | assert(nodecount + nodecount * sizeof(int) < INT_MAX); |
5031 | spin->si_memtot += (int)(nodecount + nodecount * sizeof(int)); |
5032 | |
5033 | // Write the nodes. |
5034 | (void)put_node(fd, tree, 0, 0, false); |
5035 | |
5036 | // <SUGTABLE>: <sugwcount> <sugline> ... |
5037 | linenr_T wcount = spin->si_spellbuf->b_ml.ml_line_count; |
5038 | assert(wcount >= 0); |
5039 | put_bytes(fd, (uintmax_t)wcount, 4); // <sugwcount> |
5040 | |
5041 | for (linenr_T lnum = 1; lnum <= wcount; ++lnum) { |
5042 | // <sugline>: <sugnr> ... NUL |
5043 | char_u *line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); |
5044 | size_t len = STRLEN(line) + 1; |
5045 | if (fwrite(line, len, 1, fd) == 0) { |
5046 | EMSG(_(e_write)); |
5047 | goto theend; |
5048 | } |
5049 | assert((size_t)spin->si_memtot + len <= INT_MAX); |
5050 | spin->si_memtot += (int)len; |
5051 | } |
5052 | |
5053 | // Write another byte to check for errors. |
5054 | if (putc(0, fd) == EOF) |
5055 | EMSG(_(e_write)); |
5056 | |
5057 | vim_snprintf((char *)IObuff, IOSIZE, |
5058 | _("Estimated runtime memory use: %d bytes" ), spin->si_memtot); |
5059 | spell_message(spin, IObuff); |
5060 | |
5061 | theend: |
5062 | // close the file |
5063 | fclose(fd); |
5064 | } |
5065 | |
5066 | |
5067 | // Create a Vim spell file from one or more word lists. |
5068 | // "fnames[0]" is the output file name. |
5069 | // "fnames[fcount - 1]" is the last input file name. |
5070 | // Exception: when "fnames[0]" ends in ".add" it's used as the input file name |
5071 | // and ".spl" is appended to make the output file name. |
5072 | static void |
5073 | mkspell ( |
5074 | int fcount, |
5075 | char_u **fnames, |
5076 | bool ascii, // -ascii argument given |
5077 | bool over_write, // overwrite existing output file |
5078 | bool added_word // invoked through "zg" |
5079 | ) |
5080 | { |
5081 | char_u *fname = NULL; |
5082 | char_u *wfname; |
5083 | char_u **innames; |
5084 | int incount; |
5085 | afffile_T *(afile[MAXREGIONS]); |
5086 | int i; |
5087 | int len; |
5088 | bool error = false; |
5089 | spellinfo_T spin; |
5090 | |
5091 | memset(&spin, 0, sizeof(spin)); |
5092 | spin.si_verbose = !added_word; |
5093 | spin.si_ascii = ascii; |
5094 | spin.si_followup = true; |
5095 | spin.si_rem_accents = true; |
5096 | ga_init(&spin.si_rep, (int)sizeof(fromto_T), 20); |
5097 | ga_init(&spin.si_repsal, (int)sizeof(fromto_T), 20); |
5098 | ga_init(&spin.si_sal, (int)sizeof(fromto_T), 20); |
5099 | ga_init(&spin.si_map, (int)sizeof(char_u), 100); |
5100 | ga_init(&spin.si_comppat, (int)sizeof(char_u *), 20); |
5101 | ga_init(&spin.si_prefcond, (int)sizeof(char_u *), 50); |
5102 | hash_init(&spin.si_commonwords); |
5103 | spin.si_newcompID = 127; // start compound ID at first maximum |
5104 | |
5105 | // default: fnames[0] is output file, following are input files |
5106 | innames = &fnames[1]; |
5107 | incount = fcount - 1; |
5108 | |
5109 | wfname = xmalloc(MAXPATHL); |
5110 | |
5111 | if (fcount >= 1) { |
5112 | len = (int)STRLEN(fnames[0]); |
5113 | if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add" ) == 0) { |
5114 | // For ":mkspell path/en.latin1.add" output file is |
5115 | // "path/en.latin1.add.spl". |
5116 | innames = &fnames[0]; |
5117 | incount = 1; |
5118 | vim_snprintf((char *)wfname, MAXPATHL, "%s.spl" , fnames[0]); |
5119 | } else if (fcount == 1) { |
5120 | // For ":mkspell path/vim" output file is "path/vim.latin1.spl". |
5121 | innames = &fnames[0]; |
5122 | incount = 1; |
5123 | vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, |
5124 | fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); |
5125 | } else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl" ) == 0) { |
5126 | // Name ends in ".spl", use as the file name. |
5127 | STRLCPY(wfname, fnames[0], MAXPATHL); |
5128 | } else |
5129 | // Name should be language, make the file name from it. |
5130 | vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, |
5131 | fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); |
5132 | |
5133 | // Check for .ascii.spl. |
5134 | if (strstr((char *)path_tail(wfname), SPL_FNAME_ASCII) != NULL) |
5135 | spin.si_ascii = true; |
5136 | |
5137 | // Check for .add.spl. |
5138 | if (strstr((char *)path_tail(wfname), SPL_FNAME_ADD) != NULL) |
5139 | spin.si_add = true; |
5140 | } |
5141 | |
5142 | if (incount <= 0) { |
5143 | EMSG(_(e_invarg)); // need at least output and input names |
5144 | } else if (vim_strchr(path_tail(wfname), '_') != NULL) { |
5145 | EMSG(_("E751: Output file name must not have region name" )); |
5146 | } else if (incount > MAXREGIONS) { |
5147 | emsgf(_("E754: Only up to %d regions supported" ), MAXREGIONS); |
5148 | } else { |
5149 | // Check for overwriting before doing things that may take a lot of |
5150 | // time. |
5151 | if (!over_write && os_path_exists(wfname)) { |
5152 | EMSG(_(e_exists)); |
5153 | goto theend; |
5154 | } |
5155 | if (os_isdir(wfname)) { |
5156 | EMSG2(_(e_isadir2), wfname); |
5157 | goto theend; |
5158 | } |
5159 | |
5160 | fname = xmalloc(MAXPATHL); |
5161 | |
5162 | // Init the aff and dic pointers. |
5163 | // Get the region names if there are more than 2 arguments. |
5164 | for (i = 0; i < incount; ++i) { |
5165 | afile[i] = NULL; |
5166 | |
5167 | if (incount > 1) { |
5168 | len = (int)STRLEN(innames[i]); |
5169 | if (STRLEN(path_tail(innames[i])) < 5 |
5170 | || innames[i][len - 3] != '_') { |
5171 | EMSG2(_("E755: Invalid region in %s" ), innames[i]); |
5172 | goto theend; |
5173 | } |
5174 | spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); |
5175 | spin.si_region_name[i * 2 + 1] = |
5176 | TOLOWER_ASC(innames[i][len - 1]); |
5177 | } |
5178 | } |
5179 | spin.si_region_count = incount; |
5180 | |
5181 | spin.si_foldroot = wordtree_alloc(&spin); |
5182 | spin.si_keeproot = wordtree_alloc(&spin); |
5183 | spin.si_prefroot = wordtree_alloc(&spin); |
5184 | |
5185 | // When not producing a .add.spl file clear the character table when |
5186 | // we encounter one in the .aff file. This means we dump the current |
5187 | // one in the .spl file if the .aff file doesn't define one. That's |
5188 | // better than guessing the contents, the table will match a |
5189 | // previously loaded spell file. |
5190 | if (!spin.si_add) |
5191 | spin.si_clear_chartab = true; |
5192 | |
5193 | // Read all the .aff and .dic files. |
5194 | // Text is converted to 'encoding'. |
5195 | // Words are stored in the case-folded and keep-case trees. |
5196 | for (i = 0; i < incount && !error; ++i) { |
5197 | spin.si_conv.vc_type = CONV_NONE; |
5198 | spin.si_region = 1 << i; |
5199 | |
5200 | vim_snprintf((char *)fname, MAXPATHL, "%s.aff" , innames[i]); |
5201 | if (os_path_exists(fname)) { |
5202 | // Read the .aff file. Will init "spin->si_conv" based on the |
5203 | // "SET" line. |
5204 | afile[i] = spell_read_aff(&spin, fname); |
5205 | if (afile[i] == NULL) |
5206 | error = true; |
5207 | else { |
5208 | // Read the .dic file and store the words in the trees. |
5209 | vim_snprintf((char *)fname, MAXPATHL, "%s.dic" , |
5210 | innames[i]); |
5211 | if (spell_read_dic(&spin, fname, afile[i]) == FAIL) |
5212 | error = true; |
5213 | } |
5214 | } else { |
5215 | // No .aff file, try reading the file as a word list. Store |
5216 | // the words in the trees. |
5217 | if (spell_read_wordfile(&spin, innames[i]) == FAIL) |
5218 | error = true; |
5219 | } |
5220 | |
5221 | // Free any conversion stuff. |
5222 | convert_setup(&spin.si_conv, NULL, NULL); |
5223 | } |
5224 | |
5225 | if (spin.si_compflags != NULL && spin.si_nobreak) |
5226 | MSG(_("Warning: both compounding and NOBREAK specified" )); |
5227 | |
5228 | if (!error && !got_int) { |
5229 | // Combine tails in the tree. |
5230 | spell_message(&spin, (char_u *)_(msg_compressing)); |
5231 | wordtree_compress(&spin, spin.si_foldroot); |
5232 | wordtree_compress(&spin, spin.si_keeproot); |
5233 | wordtree_compress(&spin, spin.si_prefroot); |
5234 | } |
5235 | |
5236 | if (!error && !got_int) { |
5237 | // Write the info in the spell file. |
5238 | vim_snprintf((char *)IObuff, IOSIZE, |
5239 | _("Writing spell file %s..." ), wfname); |
5240 | spell_message(&spin, IObuff); |
5241 | |
5242 | error = write_vim_spell(&spin, wfname) == FAIL; |
5243 | |
5244 | spell_message(&spin, (char_u *)_("Done!" )); |
5245 | vim_snprintf((char *)IObuff, IOSIZE, |
5246 | _("Estimated runtime memory use: %d bytes" ), spin.si_memtot); |
5247 | spell_message(&spin, IObuff); |
5248 | |
5249 | // If the file is loaded need to reload it. |
5250 | if (!error) |
5251 | spell_reload_one(wfname, added_word); |
5252 | } |
5253 | |
5254 | // Free the allocated memory. |
5255 | ga_clear(&spin.si_rep); |
5256 | ga_clear(&spin.si_repsal); |
5257 | ga_clear(&spin.si_sal); |
5258 | ga_clear(&spin.si_map); |
5259 | ga_clear(&spin.si_comppat); |
5260 | ga_clear(&spin.si_prefcond); |
5261 | hash_clear_all(&spin.si_commonwords, 0); |
5262 | |
5263 | // Free the .aff file structures. |
5264 | for (i = 0; i < incount; ++i) |
5265 | if (afile[i] != NULL) |
5266 | spell_free_aff(afile[i]); |
5267 | |
5268 | // Free all the bits and pieces at once. |
5269 | free_blocks(spin.si_blocks); |
5270 | |
5271 | // If there is soundfolding info and no NOSUGFILE item create the |
5272 | // .sug file with the soundfolded word trie. |
5273 | if (spin.si_sugtime != 0 && !error && !got_int) |
5274 | spell_make_sugfile(&spin, wfname); |
5275 | |
5276 | } |
5277 | |
5278 | theend: |
5279 | xfree(fname); |
5280 | xfree(wfname); |
5281 | } |
5282 | |
5283 | // Display a message for spell file processing when 'verbose' is set or using |
5284 | // ":mkspell". "str" can be IObuff. |
5285 | static void spell_message(spellinfo_T *spin, char_u *str) |
5286 | { |
5287 | if (spin->si_verbose || p_verbose > 2) { |
5288 | if (!spin->si_verbose) |
5289 | verbose_enter(); |
5290 | MSG(str); |
5291 | ui_flush(); |
5292 | if (!spin->si_verbose) |
5293 | verbose_leave(); |
5294 | } |
5295 | } |
5296 | |
5297 | // ":[count]spellgood {word}" |
5298 | // ":[count]spellwrong {word}" |
5299 | // ":[count]spellundo {word}" |
5300 | void ex_spell(exarg_T *eap) |
5301 | { |
5302 | spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong, |
5303 | eap->forceit ? 0 : (int)eap->line2, |
5304 | eap->cmdidx == CMD_spellundo); |
5305 | } |
5306 | |
5307 | // Add "word[len]" to 'spellfile' as a good or bad word. |
5308 | void |
5309 | spell_add_word ( |
5310 | char_u *word, |
5311 | int len, |
5312 | int bad, |
5313 | int idx, // "zG" and "zW": zero, otherwise index in |
5314 | // 'spellfile' |
5315 | bool undo // true for "zug", "zuG", "zuw" and "zuW" |
5316 | ) |
5317 | { |
5318 | FILE *fd = NULL; |
5319 | buf_T *buf = NULL; |
5320 | bool new_spf = false; |
5321 | char_u *fname; |
5322 | char_u *fnamebuf = NULL; |
5323 | char_u line[MAXWLEN * 2]; |
5324 | long fpos, fpos_next = 0; |
5325 | int i; |
5326 | char_u *spf; |
5327 | |
5328 | if (idx == 0) { // use internal wordlist |
5329 | if (int_wordlist == NULL) { |
5330 | int_wordlist = vim_tempname(); |
5331 | if (int_wordlist == NULL) |
5332 | return; |
5333 | } |
5334 | fname = int_wordlist; |
5335 | } else { |
5336 | // If 'spellfile' isn't set figure out a good default value. |
5337 | if (*curwin->w_s->b_p_spf == NUL) { |
5338 | init_spellfile(); |
5339 | new_spf = true; |
5340 | } |
5341 | |
5342 | if (*curwin->w_s->b_p_spf == NUL) { |
5343 | EMSG2(_(e_notset), "spellfile" ); |
5344 | return; |
5345 | } |
5346 | fnamebuf = xmalloc(MAXPATHL); |
5347 | |
5348 | for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i) { |
5349 | copy_option_part(&spf, fnamebuf, MAXPATHL, "," ); |
5350 | if (i == idx) |
5351 | break; |
5352 | if (*spf == NUL) { |
5353 | EMSGN(_("E765: 'spellfile' does not have %" PRId64 " entries" ), idx); |
5354 | xfree(fnamebuf); |
5355 | return; |
5356 | } |
5357 | } |
5358 | |
5359 | // Check that the user isn't editing the .add file somewhere. |
5360 | buf = buflist_findname_exp(fnamebuf); |
5361 | if (buf != NULL && buf->b_ml.ml_mfp == NULL) |
5362 | buf = NULL; |
5363 | if (buf != NULL && bufIsChanged(buf)) { |
5364 | EMSG(_(e_bufloaded)); |
5365 | xfree(fnamebuf); |
5366 | return; |
5367 | } |
5368 | |
5369 | fname = fnamebuf; |
5370 | } |
5371 | |
5372 | if (bad || undo) { |
5373 | // When the word appears as good word we need to remove that one, |
5374 | // since its flags sort before the one with WF_BANNED. |
5375 | fd = os_fopen((char *)fname, "r" ); |
5376 | if (fd != NULL) { |
5377 | while (!vim_fgets(line, MAXWLEN * 2, fd)) { |
5378 | fpos = fpos_next; |
5379 | fpos_next = ftell(fd); |
5380 | if (STRNCMP(word, line, len) == 0 |
5381 | && (line[len] == '/' || line[len] < ' ')) { |
5382 | // Found duplicate word. Remove it by writing a '#' at |
5383 | // the start of the line. Mixing reading and writing |
5384 | // doesn't work for all systems, close the file first. |
5385 | fclose(fd); |
5386 | fd = os_fopen((char *)fname, "r+" ); |
5387 | if (fd == NULL) { |
5388 | break; |
5389 | } |
5390 | if (fseek(fd, fpos, SEEK_SET) == 0) { |
5391 | fputc('#', fd); |
5392 | if (undo) { |
5393 | home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); |
5394 | smsg(_("Word '%.*s' removed from %s" ), |
5395 | len, word, NameBuff); |
5396 | } |
5397 | } |
5398 | if (fseek(fd, fpos_next, SEEK_SET) <= 0) { |
5399 | break; |
5400 | } |
5401 | } |
5402 | } |
5403 | if (fd != NULL) |
5404 | fclose(fd); |
5405 | } |
5406 | } |
5407 | |
5408 | if (!undo) { |
5409 | fd = os_fopen((char *)fname, "a" ); |
5410 | if (fd == NULL && new_spf) { |
5411 | char_u *p; |
5412 | |
5413 | // We just initialized the 'spellfile' option and can't open the |
5414 | // file. We may need to create the "spell" directory first. We |
5415 | // already checked the runtime directory is writable in |
5416 | // init_spellfile(). |
5417 | if (!dir_of_file_exists(fname) && (p = path_tail_with_sep(fname)) != fname) { |
5418 | int c = *p; |
5419 | |
5420 | // The directory doesn't exist. Try creating it and opening |
5421 | // the file again. |
5422 | *p = NUL; |
5423 | os_mkdir((char *)fname, 0755); |
5424 | *p = c; |
5425 | fd = os_fopen((char *)fname, "a" ); |
5426 | } |
5427 | } |
5428 | |
5429 | if (fd == NULL) |
5430 | EMSG2(_(e_notopen), fname); |
5431 | else { |
5432 | if (bad) |
5433 | fprintf(fd, "%.*s/!\n" , len, word); |
5434 | else |
5435 | fprintf(fd, "%.*s\n" , len, word); |
5436 | fclose(fd); |
5437 | |
5438 | home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); |
5439 | smsg(_("Word '%.*s' added to %s" ), len, word, NameBuff); |
5440 | } |
5441 | } |
5442 | |
5443 | if (fd != NULL) { |
5444 | // Update the .add.spl file. |
5445 | mkspell(1, &fname, false, true, true); |
5446 | |
5447 | // If the .add file is edited somewhere, reload it. |
5448 | if (buf != NULL) |
5449 | buf_reload(buf, buf->b_orig_mode); |
5450 | |
5451 | redraw_all_later(SOME_VALID); |
5452 | } |
5453 | xfree(fnamebuf); |
5454 | } |
5455 | |
5456 | // Initialize 'spellfile' for the current buffer. |
5457 | static void init_spellfile(void) |
5458 | { |
5459 | char_u *buf; |
5460 | int l; |
5461 | char_u *fname; |
5462 | char_u *rtp; |
5463 | char_u *lend; |
5464 | bool aspath = false; |
5465 | char_u *lstart = curbuf->b_s.b_p_spl; |
5466 | |
5467 | if (*curwin->w_s->b_p_spl != NUL && !GA_EMPTY(&curwin->w_s->b_langp)) { |
5468 | buf = xmalloc(MAXPATHL); |
5469 | |
5470 | // Find the end of the language name. Exclude the region. If there |
5471 | // is a path separator remember the start of the tail. |
5472 | for (lend = curwin->w_s->b_p_spl; *lend != NUL |
5473 | && vim_strchr((char_u *)",._" , *lend) == NULL; ++lend) |
5474 | if (vim_ispathsep(*lend)) { |
5475 | aspath = true; |
5476 | lstart = lend + 1; |
5477 | } |
5478 | |
5479 | // Loop over all entries in 'runtimepath'. Use the first one where we |
5480 | // are allowed to write. |
5481 | rtp = p_rtp; |
5482 | while (*rtp != NUL) { |
5483 | if (aspath) |
5484 | // Use directory of an entry with path, e.g., for |
5485 | // "/dir/lg.utf-8.spl" use "/dir". |
5486 | STRLCPY(buf, curbuf->b_s.b_p_spl, |
5487 | lstart - curbuf->b_s.b_p_spl); |
5488 | else |
5489 | // Copy the path from 'runtimepath' to buf[]. |
5490 | copy_option_part(&rtp, buf, MAXPATHL, "," ); |
5491 | if (os_file_is_writable((char *)buf) == 2) { |
5492 | // Use the first language name from 'spelllang' and the |
5493 | // encoding used in the first loaded .spl file. |
5494 | if (aspath) |
5495 | STRLCPY(buf, curbuf->b_s.b_p_spl, |
5496 | lend - curbuf->b_s.b_p_spl + 1); |
5497 | else { |
5498 | // Create the "spell" directory if it doesn't exist yet. |
5499 | l = (int)STRLEN(buf); |
5500 | vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell" ); |
5501 | if (os_file_is_writable((char *)buf) != 2) { |
5502 | os_mkdir((char *)buf, 0755); |
5503 | } |
5504 | |
5505 | l = (int)STRLEN(buf); |
5506 | vim_snprintf((char *)buf + l, MAXPATHL - l, |
5507 | "/%.*s" , (int)(lend - lstart), lstart); |
5508 | } |
5509 | l = (int)STRLEN(buf); |
5510 | fname = LANGP_ENTRY(curwin->w_s->b_langp, 0) |
5511 | ->lp_slang->sl_fname; |
5512 | vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add" , |
5513 | ((fname != NULL |
5514 | && strstr((char *)path_tail(fname), ".ascii." ) != NULL) |
5515 | ? "ascii" |
5516 | : (const char *)spell_enc())); |
5517 | set_option_value("spellfile" , 0L, (const char *)buf, OPT_LOCAL); |
5518 | break; |
5519 | } |
5520 | aspath = false; |
5521 | } |
5522 | |
5523 | xfree(buf); |
5524 | } |
5525 | } |
5526 | |
5527 | // Set the spell character tables from strings in the affix file. |
5528 | static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp) |
5529 | { |
5530 | // We build the new tables here first, so that we can compare with the |
5531 | // previous one. |
5532 | spelltab_T new_st; |
5533 | char_u *pf = fol, *pl = low, *pu = upp; |
5534 | int f, l, u; |
5535 | |
5536 | clear_spell_chartab(&new_st); |
5537 | |
5538 | while (*pf != NUL) { |
5539 | if (*pl == NUL || *pu == NUL) { |
5540 | EMSG(_(e_affform)); |
5541 | return FAIL; |
5542 | } |
5543 | f = mb_ptr2char_adv((const char_u **)&pf); |
5544 | l = mb_ptr2char_adv((const char_u **)&pl); |
5545 | u = mb_ptr2char_adv((const char_u **)&pu); |
5546 | // Every character that appears is a word character. |
5547 | if (f < 256) |
5548 | new_st.st_isw[f] = true; |
5549 | if (l < 256) |
5550 | new_st.st_isw[l] = true; |
5551 | if (u < 256) |
5552 | new_st.st_isw[u] = true; |
5553 | |
5554 | // if "LOW" and "FOL" are not the same the "LOW" char needs |
5555 | // case-folding |
5556 | if (l < 256 && l != f) { |
5557 | if (f >= 256) { |
5558 | EMSG(_(e_affrange)); |
5559 | return FAIL; |
5560 | } |
5561 | new_st.st_fold[l] = f; |
5562 | } |
5563 | |
5564 | // if "UPP" and "FOL" are not the same the "UPP" char needs |
5565 | // case-folding, it's upper case and the "UPP" is the upper case of |
5566 | // "FOL" . |
5567 | if (u < 256 && u != f) { |
5568 | if (f >= 256) { |
5569 | EMSG(_(e_affrange)); |
5570 | return FAIL; |
5571 | } |
5572 | new_st.st_fold[u] = f; |
5573 | new_st.st_isu[u] = true; |
5574 | new_st.st_upper[f] = u; |
5575 | } |
5576 | } |
5577 | |
5578 | if (*pl != NUL || *pu != NUL) { |
5579 | EMSG(_(e_affform)); |
5580 | return FAIL; |
5581 | } |
5582 | |
5583 | return set_spell_finish(&new_st); |
5584 | } |
5585 | |
5586 | // Set the spell character tables from strings in the .spl file. |
5587 | static void |
5588 | set_spell_charflags ( |
5589 | char_u *flags, |
5590 | int cnt, // length of "flags" |
5591 | char_u *fol |
5592 | ) |
5593 | { |
5594 | // We build the new tables here first, so that we can compare with the |
5595 | // previous one. |
5596 | spelltab_T new_st; |
5597 | int i; |
5598 | char_u *p = fol; |
5599 | int c; |
5600 | |
5601 | clear_spell_chartab(&new_st); |
5602 | |
5603 | for (i = 0; i < 128; ++i) { |
5604 | if (i < cnt) { |
5605 | new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; |
5606 | new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; |
5607 | } |
5608 | |
5609 | if (*p != NUL) { |
5610 | c = mb_ptr2char_adv((const char_u **)&p); |
5611 | new_st.st_fold[i + 128] = c; |
5612 | if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) |
5613 | new_st.st_upper[c] = i + 128; |
5614 | } |
5615 | } |
5616 | |
5617 | (void)set_spell_finish(&new_st); |
5618 | } |
5619 | |
5620 | static int set_spell_finish(spelltab_T *new_st) |
5621 | { |
5622 | int i; |
5623 | |
5624 | if (did_set_spelltab) { |
5625 | // check that it's the same table |
5626 | for (i = 0; i < 256; ++i) { |
5627 | if (spelltab.st_isw[i] != new_st->st_isw[i] |
5628 | || spelltab.st_isu[i] != new_st->st_isu[i] |
5629 | || spelltab.st_fold[i] != new_st->st_fold[i] |
5630 | || spelltab.st_upper[i] != new_st->st_upper[i]) { |
5631 | EMSG(_("E763: Word characters differ between spell files" )); |
5632 | return FAIL; |
5633 | } |
5634 | } |
5635 | } else { |
5636 | // copy the new spelltab into the one being used |
5637 | spelltab = *new_st; |
5638 | did_set_spelltab = true; |
5639 | } |
5640 | |
5641 | return OK; |
5642 | } |
5643 | |
5644 | // Write the table with prefix conditions to the .spl file. |
5645 | // When "fd" is NULL only count the length of what is written. |
5646 | static int write_spell_prefcond(FILE *fd, garray_T *gap) |
5647 | { |
5648 | assert(gap->ga_len >= 0); |
5649 | |
5650 | if (fd != NULL) |
5651 | put_bytes(fd, (uintmax_t)gap->ga_len, 2); // <prefcondcnt> |
5652 | |
5653 | size_t totlen = 2 + (size_t)gap->ga_len; // <prefcondcnt> and <condlen> bytes |
5654 | size_t x = 1; // collect return value of fwrite() |
5655 | for (int i = 0; i < gap->ga_len; ++i) { |
5656 | // <prefcond> : <condlen> <condstr> |
5657 | char_u *p = ((char_u **)gap->ga_data)[i]; |
5658 | if (p != NULL) { |
5659 | size_t len = STRLEN(p); |
5660 | if (fd != NULL) { |
5661 | assert(len <= INT_MAX); |
5662 | fputc((int)len, fd); |
5663 | x &= fwrite(p, len, 1, fd); |
5664 | } |
5665 | totlen += len; |
5666 | } else if (fd != NULL) |
5667 | fputc(0, fd); |
5668 | } |
5669 | |
5670 | assert(totlen <= INT_MAX); |
5671 | return (int)totlen; |
5672 | } |
5673 | |
5674 | // Use map string "map" for languages "lp". |
5675 | static void set_map_str(slang_T *lp, char_u *map) |
5676 | { |
5677 | char_u *p; |
5678 | int headc = 0; |
5679 | int c; |
5680 | int i; |
5681 | |
5682 | if (*map == NUL) { |
5683 | lp->sl_has_map = false; |
5684 | return; |
5685 | } |
5686 | lp->sl_has_map = true; |
5687 | |
5688 | // Init the array and hash tables empty. |
5689 | for (i = 0; i < 256; ++i) |
5690 | lp->sl_map_array[i] = 0; |
5691 | hash_init(&lp->sl_map_hash); |
5692 | |
5693 | // The similar characters are stored separated with slashes: |
5694 | // "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and |
5695 | // before the same slash. For characters above 255 sl_map_hash is used. |
5696 | for (p = map; *p != NUL; ) { |
5697 | c = mb_cptr2char_adv((const char_u **)&p); |
5698 | if (c == '/') { |
5699 | headc = 0; |
5700 | } else { |
5701 | if (headc == 0) { |
5702 | headc = c; |
5703 | } |
5704 | |
5705 | // Characters above 255 don't fit in sl_map_array[], put them in |
5706 | // the hash table. Each entry is the char, a NUL the headchar and |
5707 | // a NUL. |
5708 | if (c >= 256) { |
5709 | int cl = mb_char2len(c); |
5710 | int headcl = mb_char2len(headc); |
5711 | char_u *b; |
5712 | hash_T hash; |
5713 | hashitem_T *hi; |
5714 | |
5715 | b = xmalloc(cl + headcl + 2); |
5716 | utf_char2bytes(c, b); |
5717 | b[cl] = NUL; |
5718 | utf_char2bytes(headc, b + cl + 1); |
5719 | b[cl + 1 + headcl] = NUL; |
5720 | hash = hash_hash(b); |
5721 | hi = hash_lookup(&lp->sl_map_hash, (const char *)b, STRLEN(b), hash); |
5722 | if (HASHITEM_EMPTY(hi)) { |
5723 | hash_add_item(&lp->sl_map_hash, hi, b, hash); |
5724 | } else { |
5725 | // This should have been checked when generating the .spl |
5726 | // file. |
5727 | EMSG(_("E783: duplicate char in MAP entry" )); |
5728 | xfree(b); |
5729 | } |
5730 | } else |
5731 | lp->sl_map_array[c] = headc; |
5732 | } |
5733 | } |
5734 | } |
5735 | |
5736 | |