WordList.cxx source code [DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexlib/WordList.cxx]

1	// Scintilla source code edit control
2	/* @file WordList.cxx*
3	** Hold a list of words.
4	**/
5	// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6	// The License.txt file describes the conditions under which this software may be distributed.
7
8	#include <cstdlib>
9	#include <cassert>
10	#include <cstring>
11
12	#include <algorithm>
13	#include <iterator>
14	#include <memory>
15
16	#include "WordList.h"
17
18	using namespace Lexilla;
19
20	namespace {
21
22	/**
23	* Creates an array that points into each word in the string and puts \0 terminators
24	* after each word.
25	*/
26	std::unique_ptr<char []> ArrayFromWordList(char* wordlist, size_t slen, size_t len, bool onlyLineEnds = false) {
27	size_t words = `0`;
28	// For rapid determination of whether a character is a separator, build
29	// a look up table.
30	bool wordSeparator[`256`] = {}; // Initialise all to false.
31	wordSeparator[static_cast<unsigned int>(`'\r'`)] = true;
32	wordSeparator[static_cast<unsigned int>(`'\n'`)] = true;
33	if (!onlyLineEnds) {
34	wordSeparator[static_cast<unsigned int>(`' '`)] = true;
35	wordSeparator[static_cast<unsigned int>(`'\t'`)] = true;
36	}
37	unsigned char prev = `'\n'`;
38	for (int j = `0`; wordlist[j]; j++) {
39	const unsigned char curr = wordlist[j];
40	if (!wordSeparator[curr] && wordSeparator[prev])
41	words++;
42	prev = curr;
43	}
44	std::unique_ptr<char []> keywords = std::make_unique<char* *[]>(words + `1`);
45	size_t wordsStore = `0`;
46	if (words) {
47	unsigned char previous = `'\0'`;
48	for (size_t k = `0`; k < slen; k++) {
49	if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) {
50	if (!previous) {
51	keywords [wordsStore] = &wordlist[k];
52	wordsStore++;
53	}
54	} else {
55	wordlist[k] = `'\0'`;
56	}
57	previous = wordlist[k];
58	}
59	}
60	assert(wordsStore < (words + `1`));
61	keywords [wordsStore] = &wordlist[slen];
62	*len = wordsStore;
63	return keywords;
64	}
65
66	bool cmpWords(const char a, const* char b) noexcept* {
67	return strcmp(a, b) < `0`;
68	}
69
70	}
71
72	WordList::WordList(bool onlyLineEnds_) noexcept :
73	words(nullptr), list(nullptr), len(`0`), onlyLineEnds(onlyLineEnds_) {
74	// Prevent warnings by static analyzers about uninitialized starts.
75	starts[`0`] = -`1`;
76	}
77
78	WordList::~WordList() {
79	Clear();
80	}
81
82	WordList::operator bool() const noexcept {
83	return len != `0`;
84	}
85
86	bool WordList::operator!=(const WordList &other) const noexcept {
87	if (len != other.len)
88	return true;
89	for (size_t i=`0`; i<len; i++) {
90	if (strcmp(words[i], other.words[i]) != `0`)
91	return true;
92	}
93	return false;
94	}
95
96	int WordList::Length() const noexcept {
97	return static_cast<int>(len);
98	}
99
100	void WordList::Clear() noexcept {
101	delete []list;
102	list = nullptr;
103	delete []words;
104	words = nullptr;
105	len = `0`;
106	}
107
108	bool WordList::Set(const char *s) {
109	const size_t lenS = strlen(s) + `1`;
110	std::unique_ptr<char[]> listTemp = std::make_unique<char[]>(lenS);
111	memcpy(listTemp.get(), s, lenS);
112	size_t lenTemp = `0`;
113	std::unique_ptr<char *[]> wordsTemp = ArrayFromWordList(listTemp.get(), lenS - `1`, &lenTemp, onlyLineEnds);
114	std::sort(wordsTemp.get(), wordsTemp.get() + lenTemp, cmpWords);
115
116	if (lenTemp == len) {
117	bool changed = false;
118	for (size_t i = `0`; i < lenTemp; i++) {
119	if (strcmp(words[i], wordsTemp [i]) != `0`) {
120	changed = true;
121	break;
122	}
123	}
124	if (!changed) {
125	return false;
126	}
127	}
128
129	Clear();
130	words = wordsTemp.release();
131	list = listTemp.release();
132	len = lenTemp;
133	std::fill(starts, std::end(starts), -`1`);
134	for (int l = static_cast<int>(len - `1`); l >= `0`; l--) {
135	unsigned char indexChar = words[l][`0`];
136	starts[indexChar] = l;
137	}
138	return true;
139	}
140
141	/* Check whether a string is in the list.*
142	* List elements are either exact matches or prefixes.
143	* Prefix elements start with '^' and match all strings that start with the rest of the element
144	* so '^GTK_' matches 'GTK_X', 'GTK_MAJOR_VERSION', and 'GTK_'.
145	*/
146	bool WordList::InList(const char s) const* noexcept {
147	if (!words)
148	return false;
149	const unsigned char firstChar = s[`0`];
150	int j = starts[firstChar];
151	if (j >= `0`) {
152	while (words[j][`0`] == firstChar) {
153	if (s[`1`] == words[j][`1`]) {
154	const char *a = words[j] + `1`;
155	const char *b = s + `1`;
156	while (a && a == *b) {
157	a++;
158	b++;
159	}
160	if (!a && !b)
161	return true;
162	}
163	j++;
164	}
165	}
166	j = starts[static_cast<unsigned int>(`'^'`)];
167	if (j >= `0`) {
168	while (words[j][`0`] == `'^'`) {
169	const char *a = words[j] + `1`;
170	const char *b = s;
171	while (a && a == *b) {
172	a++;
173	b++;
174	}
175	if (!*a)
176	return true;
177	j++;
178	}
179	}
180	return false;
181	}
182
183	/* similar to InList, but word s can be a substring of keyword.*
184	* eg. the keyword define is defined as def~ine. This means the word must start
185	* with def to be a keyword, but also defi, defin and define are valid.
186	* The marker is ~ in this case.
187	*/
188	bool WordList::InListAbbreviated(const char s, const* char marker) const noexcept {
189	if (!words)
190	return false;
191	const unsigned char firstChar = s[`0`];
192	int j = starts[firstChar];
193	if (j >= `0`) {
194	while (words[j][`0`] == firstChar) {
195	bool isSubword = false;
196	int start = `1`;
197	if (words[j][`1`] == marker) {
198	isSubword = true;
199	start++;
200	}
201	if (s[`1`] == words[j][start]) {
202	const char *a = words[j] + start;
203	const char *b = s + `1`;
204	while (a && a == *b) {
205	a++;
206	if (*a == marker) {
207	isSubword = true;
208	a++;
209	}
210	b++;
211	}
212	if ((!a \|\| isSubword) && !b)
213	return true;
214	}
215	j++;
216	}
217	}
218	j = starts[static_cast<unsigned int>(`'^'`)];
219	if (j >= `0`) {
220	while (words[j][`0`] == `'^'`) {
221	const char *a = words[j] + `1`;
222	const char *b = s;
223	while (a && a == *b) {
224	a++;
225	b++;
226	}
227	if (!*a)
228	return true;
229	j++;
230	}
231	}
232	return false;
233	}
234
235	/* similar to InListAbbreviated, but word s can be a abridged version of a keyword.*
236	* eg. the keyword is defined as "after.~:". This means the word must have a prefix (begins with) of
237	* "after." and suffix (ends with) of ":" to be a keyword, Hence "after.field:" , "after.form.item:" are valid.
238	* Similarly "~.is.valid" keyword is suffix only... hence "field.is.valid" , "form.is.valid" are valid.
239	* The marker is ~ in this case.
240	* No multiple markers check is done and wont work.
241	*/
242	bool WordList::InListAbridged(const char s, const* char marker) const noexcept {
243	if (!words)
244	return false;
245	const unsigned char firstChar = s[`0`];
246	int j = starts[firstChar];
247	if (j >= `0`) {
248	while (words[j][`0`] == firstChar) {
249	const char *a = words[j];
250	const char *b = s;
251	while (a && a == *b) {
252	a++;
253	if (*a == marker) {
254	a++;
255	const size_t suffixLengthA = strlen(a);
256	const size_t suffixLengthB = strlen(b);
257	if (suffixLengthA >= suffixLengthB)
258	break;
259	b = b + suffixLengthB - suffixLengthA - `1`;
260	}
261	b++;
262	}
263	if (!a && !b)
264	return true;
265	j++;
266	}
267	}
268
269	j = starts[static_cast<unsigned int>(marker)];
270	if (j >= `0`) {
271	while (words[j][`0`] == marker) {
272	const char *a = words[j] + `1`;
273	const char *b = s;
274	const size_t suffixLengthA = strlen(a);
275	const size_t suffixLengthB = strlen(b);
276	if (suffixLengthA > suffixLengthB) {
277	j++;
278	continue;
279	}
280	b = b + suffixLengthB - suffixLengthA;
281
282	while (a && a == *b) {
283	a++;
284	b++;
285	}
286	if (!a && !b)
287	return true;
288	j++;
289	}
290	}
291
292	return false;
293	}
294
295	const char WordList::WordAt(int* n) const noexcept {
296	return words[n];
297	}
298
299

Browse the source code of DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexlib/WordList.cxx