spell_defs.h source code [neovim/src/nvim/spell_defs.h]

1	#ifndef NVIM_SPELL_DEFS_H
2	#define NVIM_SPELL_DEFS_H
3
4	#include <stdbool.h>
5	#include <stdint.h>
6
7	#include "nvim/buffer_defs.h"
8	#include "nvim/garray.h"
9	#include "nvim/regexp_defs.h"
10	#include "nvim/types.h"
11
12	#define MAXWLEN 254 // Assume max. word len is this many bytes.
13	// Some places assume a word length fits in a
14	// byte, thus it can't be above 255.
15
16	// Number of regions supported.
17	#define MAXREGIONS 8
18
19	// Type used for indexes in the word tree need to be at least 4 bytes. If int
20	// is 8 bytes we could use something smaller, but what?
21	typedef int idx_T;
22
23	# define SPL_FNAME_TMPL "%s.%s.spl"
24	# define SPL_FNAME_ADD ".add."
25	# define SPL_FNAME_ASCII ".ascii."
26
27	// Flags used for a word. Only the lowest byte can be used, the region byte
28	// comes above it.
29	#define WF_REGION 0x01 // region byte follows
30	#define WF_ONECAP 0x02 // word with one capital (or all capitals)
31	#define WF_ALLCAP 0x04 // word must be all capitals
32	#define WF_RARE 0x08 // rare word
33	#define WF_BANNED 0x10 // bad word
34	#define WF_AFX 0x20 // affix ID follows
35	#define WF_FIXCAP 0x40 // keep-case word, allcap not allowed
36	#define WF_KEEPCAP 0x80 // keep-case word
37
38	// for <flags2>, shifted up one byte to be used in wn_flags
39	#define WF_HAS_AFF 0x0100 // word includes affix
40	#define WF_NEEDCOMP 0x0200 // word only valid in compound
41	#define WF_NOSUGGEST 0x0400 // word not to be suggested
42	#define WF_COMPROOT 0x0800 // already compounded word, COMPOUNDROOT
43	#define WF_NOCOMPBEF 0x1000 // no compounding before this word
44	#define WF_NOCOMPAFT 0x2000 // no compounding after this word
45
46	// flags for <pflags>
47	#define WFP_RARE 0x01 // rare prefix
48	#define WFP_NC 0x02 // prefix is not combining
49	#define WFP_UP 0x04 // to-upper prefix
50	#define WFP_COMPPERMIT 0x08 // prefix with COMPOUNDPERMITFLAG
51	#define WFP_COMPFORBID 0x10 // prefix with COMPOUNDFORBIDFLAG
52
53	// Flags for postponed prefixes in "sl_pidxs". Must be above affixID (one
54	// byte) and prefcondnr (two bytes).
55	#define WF_RAREPFX (WFP_RARE << 24) // rare postponed prefix
56	#define WF_PFX_NC (WFP_NC << 24) // non-combining postponed prefix
57	#define WF_PFX_UP (WFP_UP << 24) // to-upper postponed prefix
58	#define WF_PFX_COMPPERMIT (WFP_COMPPERMIT << 24) // postponed prefix with
59	// COMPOUNDPERMITFLAG
60	#define WF_PFX_COMPFORBID (WFP_COMPFORBID << 24) // postponed prefix with
61	// COMPOUNDFORBIDFLAG
62
63
64	// flags for <compoptions>
65	#define COMP_CHECKDUP 1 // CHECKCOMPOUNDDUP
66	#define COMP_CHECKREP 2 // CHECKCOMPOUNDREP
67	#define COMP_CHECKCASE 4 // CHECKCOMPOUNDCASE
68	#define COMP_CHECKTRIPLE 8 // CHECKCOMPOUNDTRIPLE
69
70	// Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep,
71	// si_repsal, sl_rep, and si_sal. Not for sl_sal!
72	// One replacement: from "ft_from" to "ft_to".
73	typedef struct fromto_S {
74	char_u *ft_from;
75	char_u *ft_to;
76	} fromto_T;
77
78	// Info from "SAL" entries in ".aff" file used in sl_sal.
79	// The info is split for quick processing by spell_soundfold().
80	// Note that "sm_oneof" and "sm_rules" point into sm_lead.
81	typedef struct salitem_S {
82	char_u sm_lead; // leading letters*
83	int sm_leadlen; // length of "sm_lead"
84	char_u sm_oneof; // letters from () or NULL*
85	char_u sm_rules; // rules like ^, $, priority*
86	char_u sm_to; // replacement.*
87	int sm_lead_w; // wide character copy of "sm_lead"*
88	int sm_oneof_w; // wide character copy of "sm_oneof"*
89	int sm_to_w; // wide character copy of "sm_to"*
90	} salitem_T;
91
92	typedef int salfirst_T;
93
94	// Values for SP_ERROR are negative, positive values are used by*
95	// read_cnt_string().
96	#define SP_TRUNCERROR -1 // spell file truncated error
97	#define SP_FORMERROR -2 // format error in spell file
98	#define SP_OTHERERROR -3 // other error while reading spell file
99
100	// Structure used to store words and other info for one language, loaded from
101	// a .spl file.
102	// The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
103	// case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words.
104	//
105	// The "byts" array stores the possible bytes in each tree node, preceded by
106	// the number of possible bytes, sorted on byte value:
107	// <len> <byte1> <byte2> ...
108	// The "idxs" array stores the index of the child node corresponding to the
109	// byte in "byts".
110	// Exception: when the byte is zero, the word may end here and "idxs" holds
111	// the flags, region mask and affixID for the word. There may be several
112	// zeros in sequence for alternative flag/region/affixID combinations.
113	typedef struct slang_S slang_T;
114
115	struct slang_S {
116	slang_T sl_next; // next language*
117	char_u sl_name; // language name "en", "en.rare", "nl", etc.*
118	char_u sl_fname; // name of .spl file*
119	bool sl_add; // true if it's a .add file.
120
121	char_u sl_fbyts; // case-folded word bytes*
122	idx_T sl_fidxs; // case-folded word indexes*
123	char_u sl_kbyts; // keep-case word bytes*
124	idx_T sl_kidxs; // keep-case word indexes*
125	char_u sl_pbyts; // prefix tree word bytes*
126	idx_T sl_pidxs; // prefix tree word indexes*
127
128	char_u sl_info; // infotext string or NULL*
129
130	char_u sl_regions[MAXREGIONS * `2` + `1`];
131	// table with up to 8 region names plus NUL
132
133	char_u sl_midword; // MIDWORD string or NULL*
134
135	hashtab_T sl_wordcount; // hashtable with word count, wordcount_T
136
137	int sl_compmax; // COMPOUNDWORDMAX (default: MAXWLEN)
138	int sl_compminlen; // COMPOUNDMIN (default: 0)
139	int sl_compsylmax; // COMPOUNDSYLMAX (default: MAXWLEN)
140	int sl_compoptions; // COMP_ flags*
141	garray_T sl_comppat; // CHECKCOMPOUNDPATTERN items
142	regprog_T sl_compprog; // COMPOUNDRULE turned into a regexp progrm*
143	// (NULL when no compounding)
144	char_u sl_comprules; // all COMPOUNDRULE concatenated (or NULL)*
145	char_u sl_compstartflags; // flags for first compound word*
146	char_u sl_compallflags; // all flags for compound words*
147	bool sl_nobreak; // When true: no spaces between words
148	char_u sl_syllable; // SYLLABLE repeatable chars or NULL*
149	garray_T sl_syl_items; // syllable items
150
151	int sl_prefixcnt; // number of items in "sl_prefprog"
152	regprog_T *sl_prefprog; // table with regprogs for prefixes*
153
154	garray_T sl_rep; // list of fromto_T entries from REP lines
155	int16_t sl_rep_first[`256`]; // indexes where byte first appears, -1 if
156	// there is none
157	garray_T sl_sal; // list of salitem_T entries from SAL lines
158	salfirst_T sl_sal_first[`256`]; // indexes where byte first appears, -1 if
159	// there is none
160	bool sl_followup; // SAL followup
161	bool sl_collapse; // SAL collapse_result
162	bool sl_rem_accents; // SAL remove_accents
163	bool sl_sofo; // SOFOFROM and SOFOTO instead of SAL items:
164	// "sl_sal_first" maps chars, when has_mbyte
165	// "sl_sal" is a list of wide char lists.
166	garray_T sl_repsal; // list of fromto_T entries from REPSAL lines
167	int16_t sl_repsal_first[`256`]; // sl_rep_first for REPSAL lines
168	bool sl_nosplitsugs; // don't suggest splitting a word
169	bool sl_nocompoundsugs; // don't suggest compounding
170
171	// Info from the .sug file. Loaded on demand.
172	time_t sl_sugtime; // timestamp for .sug file
173	char_u sl_sbyts; // soundfolded word bytes*
174	idx_T sl_sidxs; // soundfolded word indexes*
175	buf_T sl_sugbuf; // buffer with word number table*
176	bool sl_sugloaded; // true when .sug file was loaded or failed to
177	// load
178
179	bool sl_has_map; // true, if there is a MAP line
180	hashtab_T sl_map_hash; // MAP for multi-byte chars
181	int sl_map_array[`256`]; // MAP for first 256 chars
182	hashtab_T sl_sounddone; // table with soundfolded words that have
183	// handled, see add_sound_suggest()
184	};
185
186	// Structure used in "b_langp", filled from 'spelllang'.
187	typedef struct langp_S {
188	slang_T lp_slang; // info for this language*
189	slang_T lp_sallang; // language used for sound folding or NULL*
190	slang_T lp_replang; // language used for REP items or NULL*
191	int lp_region; // bitmask for region or REGION_ALL
192	} langp_T;
193
194	#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
195
196	#define VIMSUGMAGIC "VIMsug" // string at start of Vim .sug file
197	#define VIMSUGMAGICL 6
198	#define VIMSUGVERSION 1
199
200	#define REGION_ALL 0xff // word valid in all regions
201
202	// The tables used for recognizing word characters according to spelling.
203	// These are only used for the first 256 characters of 'encoding'.
204	typedef struct {
205	bool st_isw[`256`]; // flags: is word char
206	bool st_isu[`256`]; // flags: is uppercase char
207	char_u st_fold[`256`]; // chars: folded case
208	char_u st_upper[`256`]; // chars: upper case
209	} spelltab_T;
210
211	// For finding suggestions: At each node in the tree these states are tried:
212	typedef enum {
213	STATE_START = `0`, // At start of node check for NUL bytes (goodword
214	// ends); if badword ends there is a match, otherwise
215	// try splitting word.
216	STATE_NOPREFIX, // try without prefix
217	STATE_SPLITUNDO, // Undo splitting.
218	STATE_ENDNUL, // Past NUL bytes at start of the node.
219	STATE_PLAIN, // Use each byte of the node.
220	STATE_DEL, // Delete a byte from the bad word.
221	STATE_INS_PREP, // Prepare for inserting bytes.
222	STATE_INS, // Insert a byte in the bad word.
223	STATE_SWAP, // Swap two bytes.
224	STATE_UNSWAP, // Undo swap two characters.
225	STATE_SWAP3, // Swap two characters over three.
226	STATE_UNSWAP3, // Undo Swap two characters over three.
227	STATE_UNROT3L, // Undo rotate three characters left
228	STATE_UNROT3R, // Undo rotate three characters right
229	STATE_REP_INI, // Prepare for using REP items.
230	STATE_REP, // Use matching REP items from the .aff file.
231	STATE_REP_UNDO, // Undo a REP item replacement.
232	STATE_FINAL // End of this node.
233	} state_T;
234
235	// Struct to keep the state at each level in suggest_try_change().
236	typedef struct trystate_S {
237	state_T ts_state; // state at this level, STATE_
238	int ts_score; // score
239	idx_T ts_arridx; // index in tree array, start of node
240	short ts_curi; // index in list of child nodes
241	char_u ts_fidx; // index in fword[], case-folded bad word
242	char_u ts_fidxtry; // ts_fidx at which bytes may be changed
243	char_u ts_twordlen; // valid length of tword[]
244	char_u ts_prefixdepth; // stack depth for end of prefix or
245	// PFD_PREFIXTREE or PFD_NOPREFIX
246	char_u ts_flags; // TSF_ flags
247	char_u ts_tcharlen; // number of bytes in tword character
248	char_u ts_tcharidx; // current byte index in tword character
249	char_u ts_isdiff; // DIFF_ values
250	char_u ts_fcharstart; // index in fword where badword char started
251	char_u ts_prewordlen; // length of word in "preword[]"
252	char_u ts_splitoff; // index in "tword" after last split
253	char_u ts_splitfidx; // "ts_fidx" at word split
254	char_u ts_complen; // nr of compound words used
255	char_u ts_compsplit; // index for "compflags" where word was spit
256	char_u ts_save_badflags; // su_badflags saved here
257	char_u ts_delidx; // index in fword for char that was deleted,
258	// valid when "ts_flags" has TSF_DIDDEL
259	} trystate_T;
260
261	// Use our own character-case definitions, because the current locale may
262	// differ from what the .spl file uses.
263	// These must not be called with negative number!
264	#include <wchar.h> // for towupper() and towlower()
265	// Multi-byte implementation. For Unicode we can call utf_(), but don't do*
266	// that for ASCII, because we don't want to use 'casemap' here. Otherwise use
267	// the "w" library function for characters above 255.
268	#define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
269	: (c) < \
270	256 ? (int)spelltab.st_fold[c] : (int)towlower(c))
271
272	#define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? mb_toupper(c) \
273	: (c) < \
274	256 ? (int)spelltab.st_upper[c] : (int)towupper(c))
275
276	#define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? mb_isupper(c) \
277	: (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
278
279	// First language that is loaded, start of the linked list of loaded
280	// languages.
281	extern slang_T *first_lang;
282
283	// file used for "zG" and "zW"
284	extern char_u *int_wordlist;
285
286	extern spelltab_T spelltab;
287	extern int did_set_spelltab;
288
289	extern char *e_format;
290
291	#endif // NVIM_SPELL_DEFS_H
292

Browse the source code of neovim/src/nvim/spell_defs.h