LexPython.cxx source code [DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexPython.cxx]

1	// Scintilla source code edit control
2	/* @file LexPython.cxx*
3	** Lexer for Python.
4	**/
5	// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6	// The License.txt file describes the conditions under which this software may be distributed.
7
8	#include <cstdlib>
9	#include <cassert>
10	#include <cstring>
11
12	#include <string>
13	#include <string_view>
14	#include <vector>
15	#include <map>
16	#include <algorithm>
17	#include <functional>
18
19	#include "ILexer.h"
20	#include "Scintilla.h"
21	#include "SciLexer.h"
22
23	#include "StringCopy.h"
24	#include "WordList.h"
25	#include "LexAccessor.h"
26	#include "Accessor.h"
27	#include "StyleContext.h"
28	#include "CharacterSet.h"
29	#include "CharacterCategory.h"
30	#include "LexerModule.h"
31	#include "OptionSet.h"
32	#include "SubStyles.h"
33	#include "DefaultLexer.h"
34
35	using namespace Scintilla;
36	using namespace Lexilla;
37
38	namespace {
39	// Use an unnamed namespace to protect the functions and classes from name conflicts
40
41	/ Notes on f-strings: f-strings are strings prefixed with f (e.g. f'') that may*
42	have arbitrary expressions in {}. The tokens in the expressions are lexed as if
43	they were outside of any string. Expressions may contain { and } characters as
44	long as there is a closing } for every {, may be 2+ lines in a triple quoted
45	string, and may have a formatting specifier following a ! or :, but both !
46	and : are valid inside of a bracketed expression and != is a valid
47	expression token even outside of a bracketed expression.
48
49	When in an f-string expression, the lexer keeps track of the state value of
50	the f-string and the nesting count for the expression (# of [, (, { seen - # of
51	}, ), ] seen). f-strings may be nested (e.g. f'{ a + f"{1+2}"') so a stack of
52	states and nesting counts is kept. If a f-string expression continues beyond
53	the end of a line, this stack is saved in a std::map that maps a line number to
54	the stack at the end of that line. std::vector is used for the stack.
55
56	The PEP for f-strings is at https://www.python.org/dev/peps/pep-0498/
57	*/
58	struct SingleFStringExpState {
59	int state;
60	int nestingCount;
61	};
62
63	/ kwCDef, kwCTypeName only used for Cython /
64	enum kwType { kwOther, kwClass, kwDef, kwImport, kwCDef, kwCTypeName, kwCPDef };
65
66	enum literalsAllowed { litNone = `0`, litU = `1`, litB = `2`, litF = `4` };
67
68	constexpr int indicatorWhitespace = `1`;
69
70	bool IsPyComment(Accessor &styler, Sci_Position pos, Sci_Position len) {
71	return len > `0` && styler [pos] == `'#'`;
72	}
73
74	constexpr bool IsPyStringTypeChar(int ch, literalsAllowed allowed) noexcept {
75	return
76	((allowed & litB) && (ch == `'b'` \|\| ch == `'B'`)) \|\|
77	((allowed & litU) && (ch == `'u'` \|\| ch == `'U'`)) \|\|
78	((allowed & litF) && (ch == `'f'` \|\| ch == `'F'`));
79	}
80
81	bool IsPyStringStart(int ch, int chNext, int chNext2, literalsAllowed allowed) noexcept {
82	if (ch == `'\''` \|\| ch == `'"'`)
83	return true;
84	if (IsPyStringTypeChar(ch, allowed)) {
85	if (chNext == `'"'` \|\| chNext == `'\''`)
86	return true;
87	if ((chNext == `'r'` \|\| chNext == `'R'`) && (chNext2 == `'"'` \|\| chNext2 == `'\''`))
88	return true;
89	}
90	if ((ch == `'r'` \|\| ch == `'R'`) && (chNext == `'"'` \|\| chNext == `'\''`))
91	return true;
92
93	return false;
94	}
95
96	constexpr bool IsPyFStringState(int st) noexcept {
97	return ((st == SCE_P_FCHARACTER) \|\| (st == SCE_P_FSTRING) \|\|
98	(st == SCE_P_FTRIPLE) \|\| (st == SCE_P_FTRIPLEDOUBLE));
99	}
100
101	constexpr bool IsPySingleQuoteStringState(int st) noexcept {
102	return ((st == SCE_P_CHARACTER) \|\| (st == SCE_P_STRING) \|\|
103	(st == SCE_P_FCHARACTER) \|\| (st == SCE_P_FSTRING));
104	}
105
106	constexpr bool IsPyTripleQuoteStringState(int st) noexcept {
107	return ((st == SCE_P_TRIPLE) \|\| (st == SCE_P_TRIPLEDOUBLE) \|\|
108	(st == SCE_P_FTRIPLE) \|\| (st == SCE_P_FTRIPLEDOUBLE));
109	}
110
111	char GetPyStringQuoteChar(int st) noexcept {
112	if ((st == SCE_P_CHARACTER) \|\| (st == SCE_P_FCHARACTER) \|\|
113	(st == SCE_P_TRIPLE) \|\| (st == SCE_P_FTRIPLE))
114	return `'\''`;
115	if ((st == SCE_P_STRING) \|\| (st == SCE_P_FSTRING) \|\|
116	(st == SCE_P_TRIPLEDOUBLE) \|\| (st == SCE_P_FTRIPLEDOUBLE))
117	return `'"'`;
118
119	return `'\0'`;
120	}
121
122	void PushStateToStack(int state, std::vector<SingleFStringExpState> &stack, SingleFStringExpState *&currentFStringExp) {
123	SingleFStringExpState single = {state, `0`};
124	stack.push_back(single);
125
126	currentFStringExp = &stack.back();
127	}
128
129	int PopFromStateStack(std::vector<SingleFStringExpState> &stack, SingleFStringExpState &currentFStringExp) noexcept* {
130	int state = `0`;
131
132	if (!stack.empty()) {
133	state = stack.back().state;
134	stack.pop_back();
135	}
136
137	if (stack.empty()) {
138	currentFStringExp = nullptr;
139	} else {
140	currentFStringExp = &stack.back();
141	}
142
143	return state;
144	}
145
146	/ Return the state to use for the string starting at i; nextIndex will be set to the first index following the quote(s) /*
147	int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex, literalsAllowed allowed) {
148	char ch = styler.SafeGetCharAt(i);
149	char chNext = styler.SafeGetCharAt(i + `1`);
150	const int firstIsF = (ch == `'f'` \|\| ch == `'F'`);
151
152	// Advance beyond r, u, or ur prefix (or r, b, or br in Python 2.7+ and r, f, or fr in Python 3.6+), but bail if there are any unexpected chars
153	if (ch == `'r'` \|\| ch == `'R'`) {
154	i++;
155	ch = styler.SafeGetCharAt(i);
156	chNext = styler.SafeGetCharAt(i + `1`);
157	} else if (IsPyStringTypeChar(ch, allowed)) {
158	if (chNext == `'r'` \|\| chNext == `'R'`)
159	i += `2`;
160	else
161	i += `1`;
162	ch = styler.SafeGetCharAt(i);
163	chNext = styler.SafeGetCharAt(i + `1`);
164	}
165
166	if (ch != `'"'` && ch != `'\''`) {
167	*nextIndex = i + `1`;
168	return SCE_P_DEFAULT;
169	}
170
171	if (ch == chNext && ch == styler.SafeGetCharAt(i + `2`)) {
172	*nextIndex = i + `3`;
173
174	if (ch == `'"'`)
175	return (firstIsF ? SCE_P_FTRIPLEDOUBLE : SCE_P_TRIPLEDOUBLE);
176	else
177	return (firstIsF ? SCE_P_FTRIPLE : SCE_P_TRIPLE);
178	} else {
179	*nextIndex = i + `1`;
180
181	if (ch == `'"'`)
182	return (firstIsF ? SCE_P_FSTRING : SCE_P_STRING);
183	else
184	return (firstIsF ? SCE_P_FCHARACTER : SCE_P_CHARACTER);
185	}
186	}
187
188	inline bool IsAWordChar(int ch, bool unicodeIdentifiers) {
189	if (IsASCII(ch))
190	return (IsAlphaNumeric(ch) \|\| ch == `'.'` \|\| ch == `'_'`);
191
192	if (!unicodeIdentifiers)
193	return false;
194
195	// Python uses the XID_Continue set from Unicode data
196	return IsXidContinue(ch);
197	}
198
199	inline bool IsAWordStart(int ch, bool unicodeIdentifiers) {
200	if (IsASCII(ch))
201	return (IsUpperOrLowerCase(ch) \|\| ch == `'_'`);
202
203	if (!unicodeIdentifiers)
204	return false;
205
206	// Python uses the XID_Start set from Unicode data
207	return IsXidStart(ch);
208	}
209
210	bool IsFirstNonWhitespace(Sci_Position pos, Accessor &styler) {
211	const Sci_Position line = styler.GetLine(pos);
212	const Sci_Position start_pos = styler.LineStart(line);
213	for (Sci_Position i = start_pos; i < pos; i++) {
214	const char ch = styler [i];
215	if (!(ch == `' '` \|\| ch == `'\t'`))
216	return false;
217	}
218	return true;
219	}
220
221	// Options used for LexerPython
222	struct OptionsPython {
223	int whingeLevel;
224	bool base2or8Literals;
225	bool stringsU;
226	bool stringsB;
227	bool stringsF;
228	bool stringsOverNewline;
229	bool keywords2NoSubIdentifiers;
230	bool fold;
231	bool foldQuotes;
232	bool foldCompact;
233	bool unicodeIdentifiers;
234
235	OptionsPython() noexcept {
236	whingeLevel = `0`;
237	base2or8Literals = true;
238	stringsU = true;
239	stringsB = true;
240	stringsF = true;
241	stringsOverNewline = false;
242	keywords2NoSubIdentifiers = false;
243	fold = false;
244	foldQuotes = false;
245	foldCompact = false;
246	unicodeIdentifiers = true;
247	}
248
249	literalsAllowed AllowedLiterals() const noexcept {
250	literalsAllowed allowedLiterals = stringsU ? litU : litNone;
251	if (stringsB)
252	allowedLiterals = static_cast<literalsAllowed>(allowedLiterals \| litB);
253	if (stringsF)
254	allowedLiterals = static_cast<literalsAllowed>(allowedLiterals \| litF);
255	return allowedLiterals;
256	}
257	};
258
259	const char *const pythonWordListDesc[] = {
260	"Keywords",
261	"Highlighted identifiers",
262	nullptr
263	};
264
265	struct OptionSetPython : public OptionSet<OptionsPython> {
266	OptionSetPython() {
267	DefineProperty("tab.timmy.whinge.level", &OptionsPython::whingeLevel,
268	"For Python code, checks whether indenting is consistent. "
269	"The default, 0 turns off indentation checking, "
270	"1 checks whether each line is potentially inconsistent with the previous line, "
271	"2 checks whether any space characters occur before a tab character in the indentation, "
272	"3 checks whether any spaces are in the indentation, and "
273	"4 checks for any tab characters in the indentation. "
274	"1 is a good level to use.");
275
276	DefineProperty("lexer.python.literals.binary", &OptionsPython::base2or8Literals,
277	"Set to 0 to not recognise Python 3 binary and octal literals: 0b1011 0o712.");
278
279	DefineProperty("lexer.python.strings.u", &OptionsPython::stringsU,
280	"Set to 0 to not recognise Python Unicode literals u\"x\" as used before Python 3.");
281
282	DefineProperty("lexer.python.strings.b", &OptionsPython::stringsB,
283	"Set to 0 to not recognise Python 3 bytes literals b\"x\".");
284
285	DefineProperty("lexer.python.strings.f", &OptionsPython::stringsF,
286	"Set to 0 to not recognise Python 3.6 f-string literals f\"var={var}\".");
287
288	DefineProperty("lexer.python.strings.over.newline", &OptionsPython::stringsOverNewline,
289	"Set to 1 to allow strings to span newline characters.");
290
291	DefineProperty("lexer.python.keywords2.no.sub.identifiers", &OptionsPython::keywords2NoSubIdentifiers,
292	"When enabled, it will not style keywords2 items that are used as a sub-identifier. "
293	"Example: when set, will not highlight \"foo.open\" when \"open\" is a keywords2 item.");
294
295	DefineProperty("fold", &OptionsPython::fold);
296
297	DefineProperty("fold.quotes.python", &OptionsPython::foldQuotes,
298	"This option enables folding multi-line quoted strings when using the Python lexer.");
299
300	DefineProperty("fold.compact", &OptionsPython::foldCompact);
301
302	DefineProperty("lexer.python.unicode.identifiers", &OptionsPython::unicodeIdentifiers,
303	"Set to 0 to not recognise Python 3 Unicode identifiers.");
304
305	DefineWordListSets(pythonWordListDesc);
306	}
307	};
308
309	const char styleSubable[] = { SCE_P_IDENTIFIER, `0` };
310
311	LexicalClass lexicalClasses[] = {
312	// Lexer Python SCLEX_PYTHON SCE_P_:
313	`0`, "SCE_P_DEFAULT", "default", "White space",
314	`1`, "SCE_P_COMMENTLINE", "comment line", "Comment",
315	`2`, "SCE_P_NUMBER", "literal numeric", "Number",
316	`3`, "SCE_P_STRING", "literal string", "String",
317	`4`, "SCE_P_CHARACTER", "literal string", "Single quoted string",
318	`5`, "SCE_P_WORD", "keyword", "Keyword",
319	`6`, "SCE_P_TRIPLE", "literal string", "Triple quotes",
320	`7`, "SCE_P_TRIPLEDOUBLE", "literal string", "Triple double quotes",
321	`8`, "SCE_P_CLASSNAME", "identifier", "Class name definition",
322	`9`, "SCE_P_DEFNAME", "identifier", "Function or method name definition",
323	`10`, "SCE_P_OPERATOR", "operator", "Operators",
324	`11`, "SCE_P_IDENTIFIER", "identifier", "Identifiers",
325	`12`, "SCE_P_COMMENTBLOCK", "comment", "Comment-blocks",
326	`13`, "SCE_P_STRINGEOL", "error literal string", "End of line where string is not closed",
327	`14`, "SCE_P_WORD2", "identifier", "Highlighted identifiers",
328	`15`, "SCE_P_DECORATOR", "preprocessor", "Decorators",
329	`16`, "SCE_P_FSTRING", "literal string interpolated", "F-String",
330	`17`, "SCE_P_FCHARACTER", "literal string interpolated", "Single quoted f-string",
331	`18`, "SCE_P_FTRIPLE", "literal string interpolated", "Triple quoted f-string",
332	`19`, "SCE_P_FTRIPLEDOUBLE", "literal string interpolated", "Triple double quoted f-string",
333	};
334
335	}
336
337	class LexerPython : public DefaultLexer {
338	WordList keywords;
339	WordList keywords2;
340	OptionsPython options;
341	OptionSetPython osPython;
342	enum { ssIdentifier };
343	SubStyles subStyles;
344	std::map<Sci_Position, std::vector<SingleFStringExpState> > ftripleStateAtEol;
345	public:
346	explicit LexerPython() :
347	DefaultLexer ("python", SCLEX_PYTHON, lexicalClasses, ELEMENTS(lexicalClasses)),
348	subStyles (styleSubable, `0x80`, `0x40`, `0`) {
349	}
350	~LexerPython() override {
351	}
352	void SCI_METHOD Release() override {
353	delete this;
354	}
355	int SCI_METHOD Version() const override {
356	return lvRelease5;
357	}
358	const char *SCI_METHOD PropertyNames() override {
359	return osPython.PropertyNames();
360	}
361	int SCI_METHOD PropertyType(const char *name) override {
362	return osPython.PropertyType(name);
363	}
364	const char SCI_METHOD DescribeProperty(const* char *name) override {
365	return osPython.DescribeProperty(name);
366	}
367	Sci_Position SCI_METHOD PropertySet(const char key, const* char *val) override;
368	const char * SCI_METHOD PropertyGet(const char *key) override {
369	return osPython.PropertyGet(key);
370	}
371	const char *SCI_METHOD DescribeWordListSets() override {
372	return osPython.DescribeWordListSets();
373	}
374	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
375	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
376	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
377
378	void SCI_METHOD PrivateCall(int, void* *) override {
379	return nullptr;
380	}
381
382	int SCI_METHOD LineEndTypesSupported() override {
383	return SC_LINE_END_TYPE_UNICODE;
384	}
385
386	int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
387	return subStyles.Allocate(styleBase, numberStyles);
388	}
389	int SCI_METHOD SubStylesStart(int styleBase) override {
390	return subStyles.Start(styleBase);
391	}
392	int SCI_METHOD SubStylesLength(int styleBase) override {
393	return subStyles.Length(styleBase);
394	}
395	int SCI_METHOD StyleFromSubStyle(int subStyle) override {
396	const int styleBase = subStyles.BaseStyle(subStyle);
397	return styleBase;
398	}
399	int SCI_METHOD PrimaryStyleFromStyle(int style) override {
400	return style;
401	}
402	void SCI_METHOD FreeSubStyles() override {
403	subStyles.Free();
404	}
405	void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
406	subStyles.SetIdentifiers(style, identifiers);
407	}
408	int SCI_METHOD DistanceToSecondaryStyles() override {
409	return `0`;
410	}
411	const char *SCI_METHOD GetSubStyleBases() override {
412	return styleSubable;
413	}
414
415	static ILexer5 *LexerFactoryPython() {
416	return new LexerPython ();
417	}
418
419	private:
420	void ProcessLineEnd(StyleContext &sc, std::vector<SingleFStringExpState> &fstringStateStack, SingleFStringExpState &currentFStringExp, bool* &inContinuedString);
421	};
422
423	Sci_Position SCI_METHOD LexerPython::PropertySet(const char key, const* char *val) {
424	if (osPython.PropertySet(&options, key, val)) {
425	return `0`;
426	}
427	return -`1`;
428	}
429
430	Sci_Position SCI_METHOD LexerPython::WordListSet(int n, const char *wl) {
431	WordList wordListN = nullptr*;
432	switch (n) {
433	case `0`:
434	wordListN = &keywords;
435	break;
436	case `1`:
437	wordListN = &keywords2;
438	break;
439	default:
440	break;
441	}
442	Sci_Position firstModification = -`1`;
443	if (wordListN) {
444	WordList wlNew;
445	wlNew.Set(wl);
446	if (*wordListN != wlNew) {
447	wordListN->Set(wl);
448	firstModification = `0`;
449	}
450	}
451	return firstModification;
452	}
453
454	void LexerPython::ProcessLineEnd(StyleContext &sc, std::vector<SingleFStringExpState> &fstringStateStack, SingleFStringExpState &currentFStringExp, bool* &inContinuedString) {
455	long deepestSingleStateIndex = -`1`;
456	unsigned long i;
457
458	// Find the deepest single quote state because that string will end; no \ continuation in f-string
459	for (i = `0`; i < fstringStateStack.size(); i++) {
460	if (IsPySingleQuoteStringState(fstringStateStack [i].state)) {
461	deepestSingleStateIndex = i;
462	break;
463	}
464	}
465
466	if (deepestSingleStateIndex != -`1`) {
467	sc.SetState(fstringStateStack [deepestSingleStateIndex].state);
468	while (fstringStateStack.size() > static_cast<unsigned long>(deepestSingleStateIndex)) {
469	PopFromStateStack(fstringStateStack, currentFStringExp);
470	}
471	}
472	if (!fstringStateStack.empty()) {
473	std::pair<Sci_Position, std::vector<SingleFStringExpState> > val;
474	val.first = sc.currentLine;
475	val.second = fstringStateStack;
476
477	ftripleStateAtEol.insert(val);
478	}
479
480	if ((sc.state == SCE_P_DEFAULT)
481	\|\| IsPyTripleQuoteStringState(sc.state)) {
482	// Perform colourisation of white space and triple quoted strings at end of each line to allow
483	// tab marking to work inside white space and triple quoted strings
484	sc.SetState(sc.state);
485	}
486	if (IsPySingleQuoteStringState(sc.state)) {
487	if (inContinuedString \|\| options.stringsOverNewline) {
488	inContinuedString = false;
489	} else {
490	sc.ChangeState(SCE_P_STRINGEOL);
491	sc.ForwardSetState(SCE_P_DEFAULT);
492	}
493	}
494	}
495
496	void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
497	Accessor styler(pAccess, nullptr);
498
499	// Track whether in f-string expression; vector is used for a stack to
500	// handle nested f-strings such as f"""{f'''{f"{f'{1}'}"}'''}"""
501	std::vector<SingleFStringExpState> fstringStateStack;
502	SingleFStringExpState currentFStringExp = nullptr*;
503
504	const Sci_Position endPos = startPos + length;
505
506	// Backtrack to previous line in case need to fix its tab whinging
507	Sci_Position lineCurrent = styler.GetLine(startPos);
508	if (startPos > `0`) {
509	if (lineCurrent > `0`) {
510	lineCurrent--;
511	// Look for backslash-continued lines
512	while (lineCurrent > `0`) {
513	const Sci_Position eolPos = styler.LineStart(lineCurrent) - `1`;
514	const int eolStyle = styler.StyleAt(eolPos);
515	if (eolStyle == SCE_P_STRING
516	\|\| eolStyle == SCE_P_CHARACTER
517	\|\| eolStyle == SCE_P_STRINGEOL) {
518	lineCurrent -= `1`;
519	} else {
520	break;
521	}
522	}
523	startPos = styler.LineStart(lineCurrent);
524	}
525	initStyle = startPos == `0` ? SCE_P_DEFAULT : styler.StyleAt(startPos - `1`);
526	}
527
528	const literalsAllowed allowedLiterals = options.AllowedLiterals();
529
530	initStyle = initStyle & `31`;
531	if (initStyle == SCE_P_STRINGEOL) {
532	initStyle = SCE_P_DEFAULT;
533	}
534
535	// Set up fstate stack from last line and remove any subsequent ftriple at eol states
536	std::map<Sci_Position, std::vector<SingleFStringExpState> >::iterator it;
537	it = ftripleStateAtEol.find(lineCurrent - `1`);
538	if (it != ftripleStateAtEol.end() && !it ->second.empty()) {
539	fstringStateStack = it ->second;
540	currentFStringExp = &fstringStateStack.back();
541	}
542	it = ftripleStateAtEol.lower_bound(lineCurrent);
543	if (it != ftripleStateAtEol.end()) {
544	ftripleStateAtEol.erase(it, ftripleStateAtEol.end());
545	}
546
547	kwType kwLast = kwOther;
548	int spaceFlags = `0`;
549	styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
550	bool base_n_number = false;
551
552	const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_P_IDENTIFIER);
553
554	StyleContext sc(startPos, endPos - startPos, initStyle, styler);
555
556	bool indentGood = true;
557	Sci_Position startIndicator = sc.currentPos;
558	bool inContinuedString = false;
559
560	for (; sc.More(); sc.Forward()) {
561
562	if (sc.atLineStart) {
563	styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
564	indentGood = true;
565	if (options.whingeLevel == `1`) {
566	indentGood = (spaceFlags & wsInconsistent) == `0`;
567	} else if (options.whingeLevel == `2`) {
568	indentGood = (spaceFlags & wsSpaceTab) == `0`;
569	} else if (options.whingeLevel == `3`) {
570	indentGood = (spaceFlags & wsSpace) == `0`;
571	} else if (options.whingeLevel == `4`) {
572	indentGood = (spaceFlags & wsTab) == `0`;
573	}
574	if (!indentGood) {
575	styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, `0`);
576	startIndicator = sc.currentPos;
577	}
578	}
579
580	if (sc.atLineEnd) {
581	ProcessLineEnd(sc, fstringStateStack, currentFStringExp, inContinuedString);
582	lineCurrent++;
583	if (!sc.More())
584	break;
585	}
586
587	bool needEOLCheck = false;
588
589
590	if (sc.state == SCE_P_OPERATOR) {
591	kwLast = kwOther;
592	sc.SetState(SCE_P_DEFAULT);
593	} else if (sc.state == SCE_P_NUMBER) {
594	if (!IsAWordChar(sc.ch, false) &&
595	!(!base_n_number && ((sc.ch == `'+'` \|\| sc.ch == `'-'`) && (sc.chPrev == `'e'` \|\| sc.chPrev == `'E'`)))) {
596	sc.SetState(SCE_P_DEFAULT);
597	}
598	} else if (sc.state == SCE_P_IDENTIFIER) {
599	if ((sc.ch == `'.'`) \|\| (!IsAWordChar(sc.ch, options.unicodeIdentifiers))) {
600	char s[`100`];
601	sc.GetCurrent(s, sizeof(s));
602	int style = SCE_P_IDENTIFIER;
603	if ((kwLast == kwImport) && (strcmp(s, "as") == `0`)) {
604	style = SCE_P_WORD;
605	} else if (keywords.InList(s)) {
606	style = SCE_P_WORD;
607	} else if (kwLast == kwClass) {
608	style = SCE_P_CLASSNAME;
609	} else if (kwLast == kwDef) {
610	style = SCE_P_DEFNAME;
611	} else if (kwLast == kwCDef \|\| kwLast == kwCPDef) {
612	Sci_Position pos = sc.currentPos;
613	unsigned char ch = styler.SafeGetCharAt(pos, `'\0'`);
614	while (ch != `'\0'`) {
615	if (ch == `'('`) {
616	style = SCE_P_DEFNAME;
617	break;
618	} else if (ch == `':'`) {
619	style = SCE_P_CLASSNAME;
620	break;
621	} else if (ch == `' '` \|\| ch == `'\t'` \|\| ch == `'\n'` \|\| ch == `'\r'`) {
622	pos++;
623	ch = styler.SafeGetCharAt(pos, `'\0'`);
624	} else {
625	break;
626	}
627	}
628	} else if (keywords2.InList(s)) {
629	if (options.keywords2NoSubIdentifiers) {
630	// We don't want to highlight keywords2
631	// that are used as a sub-identifier,
632	// i.e. not open in "foo.open".
633	const Sci_Position pos = styler.GetStartSegment() - `1`;
634	if (pos < `0` \|\| (styler.SafeGetCharAt(pos, `'\0'`) != `'.'`))
635	style = SCE_P_WORD2;
636	} else {
637	style = SCE_P_WORD2;
638	}
639	} else {
640	const int subStyle = classifierIdentifiers.ValueFor(s);
641	if (subStyle >= `0`) {
642	style = subStyle;
643	}
644	}
645	sc.ChangeState(style);
646	sc.SetState(SCE_P_DEFAULT);
647	if (style == SCE_P_WORD) {
648	if (`0` == strcmp(s, "class"))
649	kwLast = kwClass;
650	else if (`0` == strcmp(s, "def"))
651	kwLast = kwDef;
652	else if (`0` == strcmp(s, "import"))
653	kwLast = kwImport;
654	else if (`0` == strcmp(s, "cdef"))
655	kwLast = kwCDef;
656	else if (`0` == strcmp(s, "cpdef"))
657	kwLast = kwCPDef;
658	else if (`0` == strcmp(s, "cimport"))
659	kwLast = kwImport;
660	else if (kwLast != kwCDef && kwLast != kwCPDef)
661	kwLast = kwOther;
662	} else if (kwLast != kwCDef && kwLast != kwCPDef) {
663	kwLast = kwOther;
664	}
665	}
666	} else if ((sc.state == SCE_P_COMMENTLINE) \|\| (sc.state == SCE_P_COMMENTBLOCK)) {
667	if (sc.ch == `'\r'` \|\| sc.ch == `'\n'`) {
668	sc.SetState(SCE_P_DEFAULT);
669	}
670	} else if (sc.state == SCE_P_DECORATOR) {
671	if (!IsAWordStart(sc.ch, options.unicodeIdentifiers)) {
672	sc.SetState(SCE_P_DEFAULT);
673	}
674	} else if (IsPySingleQuoteStringState(sc.state)) {
675	if (sc.ch == `'\\'`) {
676	if ((sc.chNext == `'\r'`) && (sc.GetRelative(`2`) == `'\n'`)) {
677	sc.Forward();
678	}
679	if (sc.chNext == `'\n'` \|\| sc.chNext == `'\r'`) {
680	inContinuedString = true;
681	} else {
682	// Don't roll over the newline.
683	sc.Forward();
684	}
685	} else if (sc.ch == GetPyStringQuoteChar(sc.state)) {
686	sc.ForwardSetState(SCE_P_DEFAULT);
687	needEOLCheck = true;
688	}
689	} else if ((sc.state == SCE_P_TRIPLE) \|\| (sc.state == SCE_P_FTRIPLE)) {
690	if (sc.ch == `'\\'`) {
691	sc.Forward();
692	} else if (sc.Match(R"(''')")) {
693	sc.Forward();
694	sc.Forward();
695	sc.ForwardSetState(SCE_P_DEFAULT);
696	needEOLCheck = true;
697	}
698	} else if ((sc.state == SCE_P_TRIPLEDOUBLE) \|\| (sc.state == SCE_P_FTRIPLEDOUBLE)) {
699	if (sc.ch == `'\\'`) {
700	sc.Forward();
701	} else if (sc.Match(R"(""")")) {
702	sc.Forward();
703	sc.Forward();
704	sc.ForwardSetState(SCE_P_DEFAULT);
705	needEOLCheck = true;
706	}
707	}
708
709	// Note if used and not if else because string states also match
710	// some of the above clauses
711	if (IsPyFStringState(sc.state) && sc.ch == `'{'`) {
712	if (sc.chNext == `'{'`) {
713	sc.Forward();
714	} else {
715	PushStateToStack(sc.state, fstringStateStack, currentFStringExp);
716	sc.ForwardSetState(SCE_P_DEFAULT);
717	}
718	needEOLCheck = true;
719	}
720
721	// If in an f-string expression, check for the ending quote(s)
722	// and end f-string to handle syntactically incorrect cases like
723	// f'{' and f"""{"""
724	if (!fstringStateStack.empty() && (sc.ch == `'\''` \|\| sc.ch == `'"'`)) {
725	long matching_stack_i = -`1`;
726	for (unsigned long stack_i = `0`; stack_i < fstringStateStack.size() && matching_stack_i == -`1`; stack_i++) {
727	const int stack_state = fstringStateStack [stack_i].state;
728	const char quote = GetPyStringQuoteChar(stack_state);
729	if (sc.ch == quote) {
730	if (IsPySingleQuoteStringState(stack_state)) {
731	matching_stack_i = stack_i;
732	} else if (quote == `'"'` ? sc.Match(R"(""")") : sc.Match("'''")) {
733	matching_stack_i = stack_i;
734	}
735	}
736	}
737
738	if (matching_stack_i != -`1`) {
739	sc.SetState(fstringStateStack [matching_stack_i].state);
740	if (IsPyTripleQuoteStringState(fstringStateStack [matching_stack_i].state)) {
741	sc.Forward();
742	sc.Forward();
743	}
744	sc.ForwardSetState(SCE_P_DEFAULT);
745	needEOLCheck = true;
746
747	while (fstringStateStack.size() > static_cast<unsigned long>(matching_stack_i)) {
748	PopFromStateStack(fstringStateStack, currentFStringExp);
749	}
750	}
751	}
752	// End of code to find the end of a state
753
754	if (!indentGood && !IsASpaceOrTab(sc.ch)) {
755	styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, `1`);
756	startIndicator = sc.currentPos;
757	indentGood = true;
758	}
759
760	// One cdef or cpdef line, clear kwLast only at end of line
761	if ((kwLast == kwCDef \|\| kwLast == kwCPDef) && sc.atLineEnd) {
762	kwLast = kwOther;
763	}
764
765	// State exit code may have moved on to end of line
766	if (needEOLCheck && sc.atLineEnd) {
767	ProcessLineEnd(sc, fstringStateStack, currentFStringExp, inContinuedString);
768	lineCurrent++;
769	styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
770	if (!sc.More())
771	break;
772	}
773
774	// If in f-string expression, check for }, :, ! to resume f-string state or update nesting count
775	if (currentFStringExp && !IsPySingleQuoteStringState(sc.state) && !IsPyTripleQuoteStringState(sc.state)) {
776	if (currentFStringExp->nestingCount == `0` && (sc.ch == `'}'` \|\| sc.ch == `':'` \|\| (sc.ch == `'!'` && sc.chNext != `'='`))) {
777	sc.SetState(PopFromStateStack(fstringStateStack, currentFStringExp));
778	} else {
779	if (sc.ch == `'{'` \|\| sc.ch == `'['` \|\| sc.ch == `'('`) {
780	currentFStringExp->nestingCount++;
781	} else if (sc.ch == `'}'` \|\| sc.ch == `']'` \|\| sc.ch == `')'`) {
782	currentFStringExp->nestingCount--;
783	}
784	}
785	}
786
787	// Check for a new state starting character
788	if (sc.state == SCE_P_DEFAULT) {
789	if (IsADigit(sc.ch) \|\| (sc.ch == `'.'` && IsADigit(sc.chNext))) {
790	if (sc.ch == `'0'` && (sc.chNext == `'x'` \|\| sc.chNext == `'X'`)) {
791	base_n_number = true;
792	sc.SetState(SCE_P_NUMBER);
793	} else if (sc.ch == `'0'` &&
794	(sc.chNext == `'o'` \|\| sc.chNext == `'O'` \|\| sc.chNext == `'b'` \|\| sc.chNext == `'B'`)) {
795	if (options.base2or8Literals) {
796	base_n_number = true;
797	sc.SetState(SCE_P_NUMBER);
798	} else {
799	sc.SetState(SCE_P_NUMBER);
800	sc.ForwardSetState(SCE_P_IDENTIFIER);
801	}
802	} else {
803	base_n_number = false;
804	sc.SetState(SCE_P_NUMBER);
805	}
806	} else if (isoperator(sc.ch) \|\| sc.ch == '`') {
807	sc.SetState(SCE_P_OPERATOR);
808	} else if (sc.ch == `'#'`) {
809	sc.SetState(sc.chNext == `'#'` ? SCE_P_COMMENTBLOCK : SCE_P_COMMENTLINE);
810	} else if (sc.ch == `'@'`) {
811	if (IsFirstNonWhitespace(sc.currentPos, styler))
812	sc.SetState(SCE_P_DECORATOR);
813	else
814	sc.SetState(SCE_P_OPERATOR);
815	} else if (IsPyStringStart(sc.ch, sc.chNext, sc.GetRelative(`2`), allowedLiterals)) {
816	Sci_PositionU nextIndex = `0`;
817	sc.SetState(GetPyStringState(styler, sc.currentPos, &nextIndex, allowedLiterals));
818	while (nextIndex > (sc.currentPos + `1`) && sc.More()) {
819	sc.Forward();
820	}
821	} else if (IsAWordStart(sc.ch, options.unicodeIdentifiers)) {
822	sc.SetState(SCE_P_IDENTIFIER);
823	}
824	}
825	}
826	styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, `0`);
827	sc.Complete();
828	}
829
830	static bool IsCommentLine(Sci_Position line, Accessor &styler) {
831	const Sci_Position pos = styler.LineStart(line);
832	const Sci_Position eol_pos = styler.LineStart(line + `1`) - `1`;
833	for (Sci_Position i = pos; i < eol_pos; i++) {
834	const char ch = styler [i];
835	if (ch == `'#'`)
836	return true;
837	else if (ch != `' '` && ch != `'\t'`)
838	return false;
839	}
840	return false;
841	}
842
843	static bool IsQuoteLine(Sci_Position line, const Accessor &styler) {
844	const int style = styler.StyleAt(styler.LineStart(line)) & `31`;
845	return IsPyTripleQuoteStringState(style);
846	}
847
848
849	void SCI_METHOD LexerPython::Fold(Sci_PositionU startPos, Sci_Position length, int /initStyle - unused/, IDocument *pAccess) {
850	if (!options.fold)
851	return;
852
853	Accessor styler(pAccess, nullptr);
854
855	const Sci_Position maxPos = startPos + length;
856	const Sci_Position maxLines = (maxPos == styler.Length()) ? styler.GetLine(maxPos) : styler.GetLine(maxPos - `1`); // Requested last line
857	const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line
858
859	// Backtrack to previous non-blank line so we can determine indent level
860	// for any white space lines (needed esp. within triple quoted strings)
861	// and so we can fix any preceding fold level (which is why we go back
862	// at least one line in all cases)
863	int spaceFlags = `0`;
864	Sci_Position lineCurrent = styler.GetLine(startPos);
865	int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, nullptr);
866	while (lineCurrent > `0`) {
867	lineCurrent--;
868	indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, nullptr);
869	if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG) &&
870	(!IsCommentLine(lineCurrent, styler)) &&
871	(!IsQuoteLine(lineCurrent, styler)))
872	break;
873	}
874	int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
875
876	// Set up initial loop state
877	startPos = styler.LineStart(lineCurrent);
878	int prev_state = SCE_P_DEFAULT & `31`;
879	if (lineCurrent >= `1`)
880	prev_state = styler.StyleAt(startPos - `1`) & `31`;
881	int prevQuote = options.foldQuotes && IsPyTripleQuoteStringState(prev_state);
882
883	// Process all characters to end of requested range or end of any triple quote
884	//that hangs over the end of the range. Cap processing in all cases
885	// to end of document (in case of unclosed quote at end).
886	while ((lineCurrent <= docLines) && ((lineCurrent <= maxLines) \|\| prevQuote)) {
887
888	// Gather info
889	int lev = indentCurrent;
890	Sci_Position lineNext = lineCurrent + `1`;
891	int indentNext = indentCurrent;
892	int quote = false;
893	if (lineNext <= docLines) {
894	// Information about next line is only available if not at end of document
895	indentNext = styler.IndentAmount(lineNext, &spaceFlags, nullptr);
896	const Sci_Position lookAtPos = (styler.LineStart(lineNext) == styler.Length()) ? styler.Length() - `1` : styler.LineStart(lineNext);
897	const int style = styler.StyleAt(lookAtPos) & `31`;
898	quote = options.foldQuotes && IsPyTripleQuoteStringState(style);
899	}
900	const bool quote_start = (quote && !prevQuote);
901	const bool quote_continue = (quote && prevQuote);
902	if (!quote \|\| !prevQuote)
903	indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
904	if (quote)
905	indentNext = indentCurrentLevel;
906	if (indentNext & SC_FOLDLEVELWHITEFLAG)
907	indentNext = SC_FOLDLEVELWHITEFLAG \| indentCurrentLevel;
908
909	if (quote_start) {
910	// Place fold point at start of triple quoted string
911	lev \|= SC_FOLDLEVELHEADERFLAG;
912	} else if (quote_continue \|\| prevQuote) {
913	// Add level to rest of lines in the string
914	lev = lev + `1`;
915	}
916
917	// Skip past any blank lines for next indent level info; we skip also
918	// comments (all comments, not just those starting in column 0)
919	// which effectively folds them into surrounding code rather
920	// than screwing up folding. If comments end file, use the min
921	// comment indent as the level after
922
923	int minCommentLevel = indentCurrentLevel;
924	while (!quote &&
925	(lineNext < docLines) &&
926	((indentNext & SC_FOLDLEVELWHITEFLAG) \|\| (IsCommentLine(lineNext, styler)))) {
927
928	if (IsCommentLine(lineNext, styler) && indentNext < minCommentLevel) {
929	minCommentLevel = indentNext;
930	}
931
932	lineNext++;
933	indentNext = styler.IndentAmount(lineNext, &spaceFlags, nullptr);
934	}
935
936	const int levelAfterComments = ((lineNext < docLines) ? indentNext & SC_FOLDLEVELNUMBERMASK : minCommentLevel);
937	const int levelBeforeComments = std::max(indentCurrentLevel, levelAfterComments);
938
939	// Now set all the indent levels on the lines we skipped
940	// Do this from end to start. Once we encounter one line
941	// which is indented more than the line after the end of
942	// the comment-block, use the level of the block before
943
944	Sci_Position skipLine = lineNext;
945	int skipLevel = levelAfterComments;
946
947	while (--skipLine > lineCurrent) {
948	const int skipLineIndent = styler.IndentAmount(skipLine, &spaceFlags, nullptr);
949
950	if (options.foldCompact) {
951	if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments)
952	skipLevel = levelBeforeComments;
953
954	const int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
955
956	styler.SetLevel(skipLine, skipLevel \| whiteFlag);
957	} else {
958	if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments &&
959	!(skipLineIndent & SC_FOLDLEVELWHITEFLAG) &&
960	!IsCommentLine(skipLine, styler))
961	skipLevel = levelBeforeComments;
962
963	styler.SetLevel(skipLine, skipLevel);
964	}
965	}
966
967	// Set fold header on non-quote line
968	if (!quote && !(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
969	if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
970	lev \|= SC_FOLDLEVELHEADERFLAG;
971	}
972
973	// Keep track of triple quote state of previous line
974	prevQuote = quote;
975
976	// Set fold level for this line and move to next line
977	styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
978	indentCurrent = indentNext;
979	lineCurrent = lineNext;
980	}
981
982	// NOTE: Cannot set level of last line here because indentCurrent doesn't have
983	// header flag set; the loop above is crafted to take care of this case!
984	//styler.SetLevel(lineCurrent, indentCurrent);
985	}
986
987	LexerModule lmPython(SCLEX_PYTHON, LexerPython::LexerFactoryPython, "python",
988	pythonWordListDesc);
989

Browse the source code of DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexPython.cxx