1 | // Scintilla source code edit control |
2 | /** @file LexPython.cxx |
3 | ** Lexer for Python. |
4 | **/ |
5 | // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org> |
6 | // The License.txt file describes the conditions under which this software may be distributed. |
7 | |
8 | #include <cstdlib> |
9 | #include <cassert> |
10 | #include <cstring> |
11 | |
12 | #include <string> |
13 | #include <string_view> |
14 | #include <vector> |
15 | #include <map> |
16 | #include <algorithm> |
17 | #include <functional> |
18 | |
19 | #include "ILexer.h" |
20 | #include "Scintilla.h" |
21 | #include "SciLexer.h" |
22 | |
23 | #include "StringCopy.h" |
24 | #include "WordList.h" |
25 | #include "LexAccessor.h" |
26 | #include "Accessor.h" |
27 | #include "StyleContext.h" |
28 | #include "CharacterSet.h" |
29 | #include "CharacterCategory.h" |
30 | #include "LexerModule.h" |
31 | #include "OptionSet.h" |
32 | #include "SubStyles.h" |
33 | #include "DefaultLexer.h" |
34 | |
35 | using namespace Scintilla; |
36 | using namespace Lexilla; |
37 | |
38 | namespace { |
39 | // Use an unnamed namespace to protect the functions and classes from name conflicts |
40 | |
41 | /* Notes on f-strings: f-strings are strings prefixed with f (e.g. f'') that may |
42 | have arbitrary expressions in {}. The tokens in the expressions are lexed as if |
43 | they were outside of any string. Expressions may contain { and } characters as |
44 | long as there is a closing } for every {, may be 2+ lines in a triple quoted |
45 | string, and may have a formatting specifier following a ! or :, but both ! |
46 | and : are valid inside of a bracketed expression and != is a valid |
47 | expression token even outside of a bracketed expression. |
48 | |
49 | When in an f-string expression, the lexer keeps track of the state value of |
50 | the f-string and the nesting count for the expression (# of [, (, { seen - # of |
51 | }, ), ] seen). f-strings may be nested (e.g. f'{ a + f"{1+2}"') so a stack of |
52 | states and nesting counts is kept. If a f-string expression continues beyond |
53 | the end of a line, this stack is saved in a std::map that maps a line number to |
54 | the stack at the end of that line. std::vector is used for the stack. |
55 | |
56 | The PEP for f-strings is at https://www.python.org/dev/peps/pep-0498/ |
57 | */ |
58 | struct SingleFStringExpState { |
59 | int state; |
60 | int nestingCount; |
61 | }; |
62 | |
63 | /* kwCDef, kwCTypeName only used for Cython */ |
64 | enum kwType { kwOther, kwClass, kwDef, kwImport, kwCDef, kwCTypeName, kwCPDef }; |
65 | |
66 | enum literalsAllowed { litNone = 0, litU = 1, litB = 2, litF = 4 }; |
67 | |
68 | constexpr int indicatorWhitespace = 1; |
69 | |
70 | bool (Accessor &styler, Sci_Position pos, Sci_Position len) { |
71 | return len > 0 && styler[pos] == '#'; |
72 | } |
73 | |
74 | constexpr bool IsPyStringTypeChar(int ch, literalsAllowed allowed) noexcept { |
75 | return |
76 | ((allowed & litB) && (ch == 'b' || ch == 'B')) || |
77 | ((allowed & litU) && (ch == 'u' || ch == 'U')) || |
78 | ((allowed & litF) && (ch == 'f' || ch == 'F')); |
79 | } |
80 | |
81 | bool IsPyStringStart(int ch, int chNext, int chNext2, literalsAllowed allowed) noexcept { |
82 | if (ch == '\'' || ch == '"') |
83 | return true; |
84 | if (IsPyStringTypeChar(ch, allowed)) { |
85 | if (chNext == '"' || chNext == '\'') |
86 | return true; |
87 | if ((chNext == 'r' || chNext == 'R') && (chNext2 == '"' || chNext2 == '\'')) |
88 | return true; |
89 | } |
90 | if ((ch == 'r' || ch == 'R') && (chNext == '"' || chNext == '\'')) |
91 | return true; |
92 | |
93 | return false; |
94 | } |
95 | |
96 | constexpr bool IsPyFStringState(int st) noexcept { |
97 | return ((st == SCE_P_FCHARACTER) || (st == SCE_P_FSTRING) || |
98 | (st == SCE_P_FTRIPLE) || (st == SCE_P_FTRIPLEDOUBLE)); |
99 | } |
100 | |
101 | constexpr bool IsPySingleQuoteStringState(int st) noexcept { |
102 | return ((st == SCE_P_CHARACTER) || (st == SCE_P_STRING) || |
103 | (st == SCE_P_FCHARACTER) || (st == SCE_P_FSTRING)); |
104 | } |
105 | |
106 | constexpr bool IsPyTripleQuoteStringState(int st) noexcept { |
107 | return ((st == SCE_P_TRIPLE) || (st == SCE_P_TRIPLEDOUBLE) || |
108 | (st == SCE_P_FTRIPLE) || (st == SCE_P_FTRIPLEDOUBLE)); |
109 | } |
110 | |
111 | char GetPyStringQuoteChar(int st) noexcept { |
112 | if ((st == SCE_P_CHARACTER) || (st == SCE_P_FCHARACTER) || |
113 | (st == SCE_P_TRIPLE) || (st == SCE_P_FTRIPLE)) |
114 | return '\''; |
115 | if ((st == SCE_P_STRING) || (st == SCE_P_FSTRING) || |
116 | (st == SCE_P_TRIPLEDOUBLE) || (st == SCE_P_FTRIPLEDOUBLE)) |
117 | return '"'; |
118 | |
119 | return '\0'; |
120 | } |
121 | |
122 | void PushStateToStack(int state, std::vector<SingleFStringExpState> &stack, SingleFStringExpState *¤tFStringExp) { |
123 | SingleFStringExpState single = {state, 0}; |
124 | stack.push_back(single); |
125 | |
126 | currentFStringExp = &stack.back(); |
127 | } |
128 | |
129 | int PopFromStateStack(std::vector<SingleFStringExpState> &stack, SingleFStringExpState *¤tFStringExp) noexcept { |
130 | int state = 0; |
131 | |
132 | if (!stack.empty()) { |
133 | state = stack.back().state; |
134 | stack.pop_back(); |
135 | } |
136 | |
137 | if (stack.empty()) { |
138 | currentFStringExp = nullptr; |
139 | } else { |
140 | currentFStringExp = &stack.back(); |
141 | } |
142 | |
143 | return state; |
144 | } |
145 | |
146 | /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */ |
147 | int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex, literalsAllowed allowed) { |
148 | char ch = styler.SafeGetCharAt(i); |
149 | char chNext = styler.SafeGetCharAt(i + 1); |
150 | const int firstIsF = (ch == 'f' || ch == 'F'); |
151 | |
152 | // Advance beyond r, u, or ur prefix (or r, b, or br in Python 2.7+ and r, f, or fr in Python 3.6+), but bail if there are any unexpected chars |
153 | if (ch == 'r' || ch == 'R') { |
154 | i++; |
155 | ch = styler.SafeGetCharAt(i); |
156 | chNext = styler.SafeGetCharAt(i + 1); |
157 | } else if (IsPyStringTypeChar(ch, allowed)) { |
158 | if (chNext == 'r' || chNext == 'R') |
159 | i += 2; |
160 | else |
161 | i += 1; |
162 | ch = styler.SafeGetCharAt(i); |
163 | chNext = styler.SafeGetCharAt(i + 1); |
164 | } |
165 | |
166 | if (ch != '"' && ch != '\'') { |
167 | *nextIndex = i + 1; |
168 | return SCE_P_DEFAULT; |
169 | } |
170 | |
171 | if (ch == chNext && ch == styler.SafeGetCharAt(i + 2)) { |
172 | *nextIndex = i + 3; |
173 | |
174 | if (ch == '"') |
175 | return (firstIsF ? SCE_P_FTRIPLEDOUBLE : SCE_P_TRIPLEDOUBLE); |
176 | else |
177 | return (firstIsF ? SCE_P_FTRIPLE : SCE_P_TRIPLE); |
178 | } else { |
179 | *nextIndex = i + 1; |
180 | |
181 | if (ch == '"') |
182 | return (firstIsF ? SCE_P_FSTRING : SCE_P_STRING); |
183 | else |
184 | return (firstIsF ? SCE_P_FCHARACTER : SCE_P_CHARACTER); |
185 | } |
186 | } |
187 | |
188 | inline bool IsAWordChar(int ch, bool unicodeIdentifiers) { |
189 | if (IsASCII(ch)) |
190 | return (IsAlphaNumeric(ch) || ch == '.' || ch == '_'); |
191 | |
192 | if (!unicodeIdentifiers) |
193 | return false; |
194 | |
195 | // Python uses the XID_Continue set from Unicode data |
196 | return IsXidContinue(ch); |
197 | } |
198 | |
199 | inline bool IsAWordStart(int ch, bool unicodeIdentifiers) { |
200 | if (IsASCII(ch)) |
201 | return (IsUpperOrLowerCase(ch) || ch == '_'); |
202 | |
203 | if (!unicodeIdentifiers) |
204 | return false; |
205 | |
206 | // Python uses the XID_Start set from Unicode data |
207 | return IsXidStart(ch); |
208 | } |
209 | |
210 | bool IsFirstNonWhitespace(Sci_Position pos, Accessor &styler) { |
211 | const Sci_Position line = styler.GetLine(pos); |
212 | const Sci_Position start_pos = styler.LineStart(line); |
213 | for (Sci_Position i = start_pos; i < pos; i++) { |
214 | const char ch = styler[i]; |
215 | if (!(ch == ' ' || ch == '\t')) |
216 | return false; |
217 | } |
218 | return true; |
219 | } |
220 | |
221 | // Options used for LexerPython |
222 | struct OptionsPython { |
223 | int whingeLevel; |
224 | bool base2or8Literals; |
225 | bool stringsU; |
226 | bool stringsB; |
227 | bool stringsF; |
228 | bool stringsOverNewline; |
229 | bool keywords2NoSubIdentifiers; |
230 | bool fold; |
231 | bool foldQuotes; |
232 | bool foldCompact; |
233 | bool unicodeIdentifiers; |
234 | |
235 | OptionsPython() noexcept { |
236 | whingeLevel = 0; |
237 | base2or8Literals = true; |
238 | stringsU = true; |
239 | stringsB = true; |
240 | stringsF = true; |
241 | stringsOverNewline = false; |
242 | keywords2NoSubIdentifiers = false; |
243 | fold = false; |
244 | foldQuotes = false; |
245 | foldCompact = false; |
246 | unicodeIdentifiers = true; |
247 | } |
248 | |
249 | literalsAllowed AllowedLiterals() const noexcept { |
250 | literalsAllowed allowedLiterals = stringsU ? litU : litNone; |
251 | if (stringsB) |
252 | allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litB); |
253 | if (stringsF) |
254 | allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litF); |
255 | return allowedLiterals; |
256 | } |
257 | }; |
258 | |
259 | const char *const pythonWordListDesc[] = { |
260 | "Keywords" , |
261 | "Highlighted identifiers" , |
262 | nullptr |
263 | }; |
264 | |
265 | struct OptionSetPython : public OptionSet<OptionsPython> { |
266 | OptionSetPython() { |
267 | DefineProperty("tab.timmy.whinge.level" , &OptionsPython::whingeLevel, |
268 | "For Python code, checks whether indenting is consistent. " |
269 | "The default, 0 turns off indentation checking, " |
270 | "1 checks whether each line is potentially inconsistent with the previous line, " |
271 | "2 checks whether any space characters occur before a tab character in the indentation, " |
272 | "3 checks whether any spaces are in the indentation, and " |
273 | "4 checks for any tab characters in the indentation. " |
274 | "1 is a good level to use." ); |
275 | |
276 | DefineProperty("lexer.python.literals.binary" , &OptionsPython::base2or8Literals, |
277 | "Set to 0 to not recognise Python 3 binary and octal literals: 0b1011 0o712." ); |
278 | |
279 | DefineProperty("lexer.python.strings.u" , &OptionsPython::stringsU, |
280 | "Set to 0 to not recognise Python Unicode literals u\"x\" as used before Python 3." ); |
281 | |
282 | DefineProperty("lexer.python.strings.b" , &OptionsPython::stringsB, |
283 | "Set to 0 to not recognise Python 3 bytes literals b\"x\"." ); |
284 | |
285 | DefineProperty("lexer.python.strings.f" , &OptionsPython::stringsF, |
286 | "Set to 0 to not recognise Python 3.6 f-string literals f\"var={var}\"." ); |
287 | |
288 | DefineProperty("lexer.python.strings.over.newline" , &OptionsPython::stringsOverNewline, |
289 | "Set to 1 to allow strings to span newline characters." ); |
290 | |
291 | DefineProperty("lexer.python.keywords2.no.sub.identifiers" , &OptionsPython::keywords2NoSubIdentifiers, |
292 | "When enabled, it will not style keywords2 items that are used as a sub-identifier. " |
293 | "Example: when set, will not highlight \"foo.open\" when \"open\" is a keywords2 item." ); |
294 | |
295 | DefineProperty("fold" , &OptionsPython::fold); |
296 | |
297 | DefineProperty("fold.quotes.python" , &OptionsPython::foldQuotes, |
298 | "This option enables folding multi-line quoted strings when using the Python lexer." ); |
299 | |
300 | DefineProperty("fold.compact" , &OptionsPython::foldCompact); |
301 | |
302 | DefineProperty("lexer.python.unicode.identifiers" , &OptionsPython::unicodeIdentifiers, |
303 | "Set to 0 to not recognise Python 3 Unicode identifiers." ); |
304 | |
305 | DefineWordListSets(pythonWordListDesc); |
306 | } |
307 | }; |
308 | |
309 | const char styleSubable[] = { SCE_P_IDENTIFIER, 0 }; |
310 | |
311 | LexicalClass lexicalClasses[] = { |
312 | // Lexer Python SCLEX_PYTHON SCE_P_: |
313 | 0, "SCE_P_DEFAULT" , "default" , "White space" , |
314 | 1, "SCE_P_COMMENTLINE" , "comment line" , "Comment" , |
315 | 2, "SCE_P_NUMBER" , "literal numeric" , "Number" , |
316 | 3, "SCE_P_STRING" , "literal string" , "String" , |
317 | 4, "SCE_P_CHARACTER" , "literal string" , "Single quoted string" , |
318 | 5, "SCE_P_WORD" , "keyword" , "Keyword" , |
319 | 6, "SCE_P_TRIPLE" , "literal string" , "Triple quotes" , |
320 | 7, "SCE_P_TRIPLEDOUBLE" , "literal string" , "Triple double quotes" , |
321 | 8, "SCE_P_CLASSNAME" , "identifier" , "Class name definition" , |
322 | 9, "SCE_P_DEFNAME" , "identifier" , "Function or method name definition" , |
323 | 10, "SCE_P_OPERATOR" , "operator" , "Operators" , |
324 | 11, "SCE_P_IDENTIFIER" , "identifier" , "Identifiers" , |
325 | 12, "SCE_P_COMMENTBLOCK" , "comment" , "Comment-blocks" , |
326 | 13, "SCE_P_STRINGEOL" , "error literal string" , "End of line where string is not closed" , |
327 | 14, "SCE_P_WORD2" , "identifier" , "Highlighted identifiers" , |
328 | 15, "SCE_P_DECORATOR" , "preprocessor" , "Decorators" , |
329 | 16, "SCE_P_FSTRING" , "literal string interpolated" , "F-String" , |
330 | 17, "SCE_P_FCHARACTER" , "literal string interpolated" , "Single quoted f-string" , |
331 | 18, "SCE_P_FTRIPLE" , "literal string interpolated" , "Triple quoted f-string" , |
332 | 19, "SCE_P_FTRIPLEDOUBLE" , "literal string interpolated" , "Triple double quoted f-string" , |
333 | }; |
334 | |
335 | } |
336 | |
337 | class LexerPython : public DefaultLexer { |
338 | WordList keywords; |
339 | WordList keywords2; |
340 | OptionsPython options; |
341 | OptionSetPython osPython; |
342 | enum { ssIdentifier }; |
343 | SubStyles subStyles; |
344 | std::map<Sci_Position, std::vector<SingleFStringExpState> > ftripleStateAtEol; |
345 | public: |
346 | explicit LexerPython() : |
347 | DefaultLexer("python" , SCLEX_PYTHON, lexicalClasses, ELEMENTS(lexicalClasses)), |
348 | subStyles(styleSubable, 0x80, 0x40, 0) { |
349 | } |
350 | ~LexerPython() override { |
351 | } |
352 | void SCI_METHOD Release() override { |
353 | delete this; |
354 | } |
355 | int SCI_METHOD Version() const override { |
356 | return lvRelease5; |
357 | } |
358 | const char *SCI_METHOD PropertyNames() override { |
359 | return osPython.PropertyNames(); |
360 | } |
361 | int SCI_METHOD PropertyType(const char *name) override { |
362 | return osPython.PropertyType(name); |
363 | } |
364 | const char *SCI_METHOD DescribeProperty(const char *name) override { |
365 | return osPython.DescribeProperty(name); |
366 | } |
367 | Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; |
368 | const char * SCI_METHOD PropertyGet(const char *key) override { |
369 | return osPython.PropertyGet(key); |
370 | } |
371 | const char *SCI_METHOD DescribeWordListSets() override { |
372 | return osPython.DescribeWordListSets(); |
373 | } |
374 | Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; |
375 | void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
376 | void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
377 | |
378 | void *SCI_METHOD PrivateCall(int, void *) override { |
379 | return nullptr; |
380 | } |
381 | |
382 | int SCI_METHOD LineEndTypesSupported() override { |
383 | return SC_LINE_END_TYPE_UNICODE; |
384 | } |
385 | |
386 | int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override { |
387 | return subStyles.Allocate(styleBase, numberStyles); |
388 | } |
389 | int SCI_METHOD SubStylesStart(int styleBase) override { |
390 | return subStyles.Start(styleBase); |
391 | } |
392 | int SCI_METHOD SubStylesLength(int styleBase) override { |
393 | return subStyles.Length(styleBase); |
394 | } |
395 | int SCI_METHOD StyleFromSubStyle(int subStyle) override { |
396 | const int styleBase = subStyles.BaseStyle(subStyle); |
397 | return styleBase; |
398 | } |
399 | int SCI_METHOD PrimaryStyleFromStyle(int style) override { |
400 | return style; |
401 | } |
402 | void SCI_METHOD FreeSubStyles() override { |
403 | subStyles.Free(); |
404 | } |
405 | void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override { |
406 | subStyles.SetIdentifiers(style, identifiers); |
407 | } |
408 | int SCI_METHOD DistanceToSecondaryStyles() override { |
409 | return 0; |
410 | } |
411 | const char *SCI_METHOD GetSubStyleBases() override { |
412 | return styleSubable; |
413 | } |
414 | |
415 | static ILexer5 *LexerFactoryPython() { |
416 | return new LexerPython(); |
417 | } |
418 | |
419 | private: |
420 | void ProcessLineEnd(StyleContext &sc, std::vector<SingleFStringExpState> &fstringStateStack, SingleFStringExpState *¤tFStringExp, bool &inContinuedString); |
421 | }; |
422 | |
423 | Sci_Position SCI_METHOD LexerPython::PropertySet(const char *key, const char *val) { |
424 | if (osPython.PropertySet(&options, key, val)) { |
425 | return 0; |
426 | } |
427 | return -1; |
428 | } |
429 | |
430 | Sci_Position SCI_METHOD LexerPython::WordListSet(int n, const char *wl) { |
431 | WordList *wordListN = nullptr; |
432 | switch (n) { |
433 | case 0: |
434 | wordListN = &keywords; |
435 | break; |
436 | case 1: |
437 | wordListN = &keywords2; |
438 | break; |
439 | default: |
440 | break; |
441 | } |
442 | Sci_Position firstModification = -1; |
443 | if (wordListN) { |
444 | WordList wlNew; |
445 | wlNew.Set(wl); |
446 | if (*wordListN != wlNew) { |
447 | wordListN->Set(wl); |
448 | firstModification = 0; |
449 | } |
450 | } |
451 | return firstModification; |
452 | } |
453 | |
454 | void LexerPython::ProcessLineEnd(StyleContext &sc, std::vector<SingleFStringExpState> &fstringStateStack, SingleFStringExpState *¤tFStringExp, bool &inContinuedString) { |
455 | long deepestSingleStateIndex = -1; |
456 | unsigned long i; |
457 | |
458 | // Find the deepest single quote state because that string will end; no \ continuation in f-string |
459 | for (i = 0; i < fstringStateStack.size(); i++) { |
460 | if (IsPySingleQuoteStringState(fstringStateStack[i].state)) { |
461 | deepestSingleStateIndex = i; |
462 | break; |
463 | } |
464 | } |
465 | |
466 | if (deepestSingleStateIndex != -1) { |
467 | sc.SetState(fstringStateStack[deepestSingleStateIndex].state); |
468 | while (fstringStateStack.size() > static_cast<unsigned long>(deepestSingleStateIndex)) { |
469 | PopFromStateStack(fstringStateStack, currentFStringExp); |
470 | } |
471 | } |
472 | if (!fstringStateStack.empty()) { |
473 | std::pair<Sci_Position, std::vector<SingleFStringExpState> > val; |
474 | val.first = sc.currentLine; |
475 | val.second = fstringStateStack; |
476 | |
477 | ftripleStateAtEol.insert(val); |
478 | } |
479 | |
480 | if ((sc.state == SCE_P_DEFAULT) |
481 | || IsPyTripleQuoteStringState(sc.state)) { |
482 | // Perform colourisation of white space and triple quoted strings at end of each line to allow |
483 | // tab marking to work inside white space and triple quoted strings |
484 | sc.SetState(sc.state); |
485 | } |
486 | if (IsPySingleQuoteStringState(sc.state)) { |
487 | if (inContinuedString || options.stringsOverNewline) { |
488 | inContinuedString = false; |
489 | } else { |
490 | sc.ChangeState(SCE_P_STRINGEOL); |
491 | sc.ForwardSetState(SCE_P_DEFAULT); |
492 | } |
493 | } |
494 | } |
495 | |
496 | void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { |
497 | Accessor styler(pAccess, nullptr); |
498 | |
499 | // Track whether in f-string expression; vector is used for a stack to |
500 | // handle nested f-strings such as f"""{f'''{f"{f'{1}'}"}'''}""" |
501 | std::vector<SingleFStringExpState> fstringStateStack; |
502 | SingleFStringExpState *currentFStringExp = nullptr; |
503 | |
504 | const Sci_Position endPos = startPos + length; |
505 | |
506 | // Backtrack to previous line in case need to fix its tab whinging |
507 | Sci_Position lineCurrent = styler.GetLine(startPos); |
508 | if (startPos > 0) { |
509 | if (lineCurrent > 0) { |
510 | lineCurrent--; |
511 | // Look for backslash-continued lines |
512 | while (lineCurrent > 0) { |
513 | const Sci_Position eolPos = styler.LineStart(lineCurrent) - 1; |
514 | const int eolStyle = styler.StyleAt(eolPos); |
515 | if (eolStyle == SCE_P_STRING |
516 | || eolStyle == SCE_P_CHARACTER |
517 | || eolStyle == SCE_P_STRINGEOL) { |
518 | lineCurrent -= 1; |
519 | } else { |
520 | break; |
521 | } |
522 | } |
523 | startPos = styler.LineStart(lineCurrent); |
524 | } |
525 | initStyle = startPos == 0 ? SCE_P_DEFAULT : styler.StyleAt(startPos - 1); |
526 | } |
527 | |
528 | const literalsAllowed allowedLiterals = options.AllowedLiterals(); |
529 | |
530 | initStyle = initStyle & 31; |
531 | if (initStyle == SCE_P_STRINGEOL) { |
532 | initStyle = SCE_P_DEFAULT; |
533 | } |
534 | |
535 | // Set up fstate stack from last line and remove any subsequent ftriple at eol states |
536 | std::map<Sci_Position, std::vector<SingleFStringExpState> >::iterator it; |
537 | it = ftripleStateAtEol.find(lineCurrent - 1); |
538 | if (it != ftripleStateAtEol.end() && !it->second.empty()) { |
539 | fstringStateStack = it->second; |
540 | currentFStringExp = &fstringStateStack.back(); |
541 | } |
542 | it = ftripleStateAtEol.lower_bound(lineCurrent); |
543 | if (it != ftripleStateAtEol.end()) { |
544 | ftripleStateAtEol.erase(it, ftripleStateAtEol.end()); |
545 | } |
546 | |
547 | kwType kwLast = kwOther; |
548 | int spaceFlags = 0; |
549 | styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment); |
550 | bool base_n_number = false; |
551 | |
552 | const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_P_IDENTIFIER); |
553 | |
554 | StyleContext sc(startPos, endPos - startPos, initStyle, styler); |
555 | |
556 | bool indentGood = true; |
557 | Sci_Position startIndicator = sc.currentPos; |
558 | bool inContinuedString = false; |
559 | |
560 | for (; sc.More(); sc.Forward()) { |
561 | |
562 | if (sc.atLineStart) { |
563 | styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment); |
564 | indentGood = true; |
565 | if (options.whingeLevel == 1) { |
566 | indentGood = (spaceFlags & wsInconsistent) == 0; |
567 | } else if (options.whingeLevel == 2) { |
568 | indentGood = (spaceFlags & wsSpaceTab) == 0; |
569 | } else if (options.whingeLevel == 3) { |
570 | indentGood = (spaceFlags & wsSpace) == 0; |
571 | } else if (options.whingeLevel == 4) { |
572 | indentGood = (spaceFlags & wsTab) == 0; |
573 | } |
574 | if (!indentGood) { |
575 | styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 0); |
576 | startIndicator = sc.currentPos; |
577 | } |
578 | } |
579 | |
580 | if (sc.atLineEnd) { |
581 | ProcessLineEnd(sc, fstringStateStack, currentFStringExp, inContinuedString); |
582 | lineCurrent++; |
583 | if (!sc.More()) |
584 | break; |
585 | } |
586 | |
587 | bool needEOLCheck = false; |
588 | |
589 | |
590 | if (sc.state == SCE_P_OPERATOR) { |
591 | kwLast = kwOther; |
592 | sc.SetState(SCE_P_DEFAULT); |
593 | } else if (sc.state == SCE_P_NUMBER) { |
594 | if (!IsAWordChar(sc.ch, false) && |
595 | !(!base_n_number && ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) { |
596 | sc.SetState(SCE_P_DEFAULT); |
597 | } |
598 | } else if (sc.state == SCE_P_IDENTIFIER) { |
599 | if ((sc.ch == '.') || (!IsAWordChar(sc.ch, options.unicodeIdentifiers))) { |
600 | char s[100]; |
601 | sc.GetCurrent(s, sizeof(s)); |
602 | int style = SCE_P_IDENTIFIER; |
603 | if ((kwLast == kwImport) && (strcmp(s, "as" ) == 0)) { |
604 | style = SCE_P_WORD; |
605 | } else if (keywords.InList(s)) { |
606 | style = SCE_P_WORD; |
607 | } else if (kwLast == kwClass) { |
608 | style = SCE_P_CLASSNAME; |
609 | } else if (kwLast == kwDef) { |
610 | style = SCE_P_DEFNAME; |
611 | } else if (kwLast == kwCDef || kwLast == kwCPDef) { |
612 | Sci_Position pos = sc.currentPos; |
613 | unsigned char ch = styler.SafeGetCharAt(pos, '\0'); |
614 | while (ch != '\0') { |
615 | if (ch == '(') { |
616 | style = SCE_P_DEFNAME; |
617 | break; |
618 | } else if (ch == ':') { |
619 | style = SCE_P_CLASSNAME; |
620 | break; |
621 | } else if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') { |
622 | pos++; |
623 | ch = styler.SafeGetCharAt(pos, '\0'); |
624 | } else { |
625 | break; |
626 | } |
627 | } |
628 | } else if (keywords2.InList(s)) { |
629 | if (options.keywords2NoSubIdentifiers) { |
630 | // We don't want to highlight keywords2 |
631 | // that are used as a sub-identifier, |
632 | // i.e. not open in "foo.open". |
633 | const Sci_Position pos = styler.GetStartSegment() - 1; |
634 | if (pos < 0 || (styler.SafeGetCharAt(pos, '\0') != '.')) |
635 | style = SCE_P_WORD2; |
636 | } else { |
637 | style = SCE_P_WORD2; |
638 | } |
639 | } else { |
640 | const int subStyle = classifierIdentifiers.ValueFor(s); |
641 | if (subStyle >= 0) { |
642 | style = subStyle; |
643 | } |
644 | } |
645 | sc.ChangeState(style); |
646 | sc.SetState(SCE_P_DEFAULT); |
647 | if (style == SCE_P_WORD) { |
648 | if (0 == strcmp(s, "class" )) |
649 | kwLast = kwClass; |
650 | else if (0 == strcmp(s, "def" )) |
651 | kwLast = kwDef; |
652 | else if (0 == strcmp(s, "import" )) |
653 | kwLast = kwImport; |
654 | else if (0 == strcmp(s, "cdef" )) |
655 | kwLast = kwCDef; |
656 | else if (0 == strcmp(s, "cpdef" )) |
657 | kwLast = kwCPDef; |
658 | else if (0 == strcmp(s, "cimport" )) |
659 | kwLast = kwImport; |
660 | else if (kwLast != kwCDef && kwLast != kwCPDef) |
661 | kwLast = kwOther; |
662 | } else if (kwLast != kwCDef && kwLast != kwCPDef) { |
663 | kwLast = kwOther; |
664 | } |
665 | } |
666 | } else if ((sc.state == SCE_P_COMMENTLINE) || (sc.state == SCE_P_COMMENTBLOCK)) { |
667 | if (sc.ch == '\r' || sc.ch == '\n') { |
668 | sc.SetState(SCE_P_DEFAULT); |
669 | } |
670 | } else if (sc.state == SCE_P_DECORATOR) { |
671 | if (!IsAWordStart(sc.ch, options.unicodeIdentifiers)) { |
672 | sc.SetState(SCE_P_DEFAULT); |
673 | } |
674 | } else if (IsPySingleQuoteStringState(sc.state)) { |
675 | if (sc.ch == '\\') { |
676 | if ((sc.chNext == '\r') && (sc.GetRelative(2) == '\n')) { |
677 | sc.Forward(); |
678 | } |
679 | if (sc.chNext == '\n' || sc.chNext == '\r') { |
680 | inContinuedString = true; |
681 | } else { |
682 | // Don't roll over the newline. |
683 | sc.Forward(); |
684 | } |
685 | } else if (sc.ch == GetPyStringQuoteChar(sc.state)) { |
686 | sc.ForwardSetState(SCE_P_DEFAULT); |
687 | needEOLCheck = true; |
688 | } |
689 | } else if ((sc.state == SCE_P_TRIPLE) || (sc.state == SCE_P_FTRIPLE)) { |
690 | if (sc.ch == '\\') { |
691 | sc.Forward(); |
692 | } else if (sc.Match(R"(''')" )) { |
693 | sc.Forward(); |
694 | sc.Forward(); |
695 | sc.ForwardSetState(SCE_P_DEFAULT); |
696 | needEOLCheck = true; |
697 | } |
698 | } else if ((sc.state == SCE_P_TRIPLEDOUBLE) || (sc.state == SCE_P_FTRIPLEDOUBLE)) { |
699 | if (sc.ch == '\\') { |
700 | sc.Forward(); |
701 | } else if (sc.Match(R"(""")" )) { |
702 | sc.Forward(); |
703 | sc.Forward(); |
704 | sc.ForwardSetState(SCE_P_DEFAULT); |
705 | needEOLCheck = true; |
706 | } |
707 | } |
708 | |
709 | // Note if used and not if else because string states also match |
710 | // some of the above clauses |
711 | if (IsPyFStringState(sc.state) && sc.ch == '{') { |
712 | if (sc.chNext == '{') { |
713 | sc.Forward(); |
714 | } else { |
715 | PushStateToStack(sc.state, fstringStateStack, currentFStringExp); |
716 | sc.ForwardSetState(SCE_P_DEFAULT); |
717 | } |
718 | needEOLCheck = true; |
719 | } |
720 | |
721 | // If in an f-string expression, check for the ending quote(s) |
722 | // and end f-string to handle syntactically incorrect cases like |
723 | // f'{' and f"""{""" |
724 | if (!fstringStateStack.empty() && (sc.ch == '\'' || sc.ch == '"')) { |
725 | long matching_stack_i = -1; |
726 | for (unsigned long stack_i = 0; stack_i < fstringStateStack.size() && matching_stack_i == -1; stack_i++) { |
727 | const int stack_state = fstringStateStack[stack_i].state; |
728 | const char quote = GetPyStringQuoteChar(stack_state); |
729 | if (sc.ch == quote) { |
730 | if (IsPySingleQuoteStringState(stack_state)) { |
731 | matching_stack_i = stack_i; |
732 | } else if (quote == '"' ? sc.Match(R"(""")" ) : sc.Match("'''" )) { |
733 | matching_stack_i = stack_i; |
734 | } |
735 | } |
736 | } |
737 | |
738 | if (matching_stack_i != -1) { |
739 | sc.SetState(fstringStateStack[matching_stack_i].state); |
740 | if (IsPyTripleQuoteStringState(fstringStateStack[matching_stack_i].state)) { |
741 | sc.Forward(); |
742 | sc.Forward(); |
743 | } |
744 | sc.ForwardSetState(SCE_P_DEFAULT); |
745 | needEOLCheck = true; |
746 | |
747 | while (fstringStateStack.size() > static_cast<unsigned long>(matching_stack_i)) { |
748 | PopFromStateStack(fstringStateStack, currentFStringExp); |
749 | } |
750 | } |
751 | } |
752 | // End of code to find the end of a state |
753 | |
754 | if (!indentGood && !IsASpaceOrTab(sc.ch)) { |
755 | styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 1); |
756 | startIndicator = sc.currentPos; |
757 | indentGood = true; |
758 | } |
759 | |
760 | // One cdef or cpdef line, clear kwLast only at end of line |
761 | if ((kwLast == kwCDef || kwLast == kwCPDef) && sc.atLineEnd) { |
762 | kwLast = kwOther; |
763 | } |
764 | |
765 | // State exit code may have moved on to end of line |
766 | if (needEOLCheck && sc.atLineEnd) { |
767 | ProcessLineEnd(sc, fstringStateStack, currentFStringExp, inContinuedString); |
768 | lineCurrent++; |
769 | styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment); |
770 | if (!sc.More()) |
771 | break; |
772 | } |
773 | |
774 | // If in f-string expression, check for }, :, ! to resume f-string state or update nesting count |
775 | if (currentFStringExp && !IsPySingleQuoteStringState(sc.state) && !IsPyTripleQuoteStringState(sc.state)) { |
776 | if (currentFStringExp->nestingCount == 0 && (sc.ch == '}' || sc.ch == ':' || (sc.ch == '!' && sc.chNext != '='))) { |
777 | sc.SetState(PopFromStateStack(fstringStateStack, currentFStringExp)); |
778 | } else { |
779 | if (sc.ch == '{' || sc.ch == '[' || sc.ch == '(') { |
780 | currentFStringExp->nestingCount++; |
781 | } else if (sc.ch == '}' || sc.ch == ']' || sc.ch == ')') { |
782 | currentFStringExp->nestingCount--; |
783 | } |
784 | } |
785 | } |
786 | |
787 | // Check for a new state starting character |
788 | if (sc.state == SCE_P_DEFAULT) { |
789 | if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) { |
790 | if (sc.ch == '0' && (sc.chNext == 'x' || sc.chNext == 'X')) { |
791 | base_n_number = true; |
792 | sc.SetState(SCE_P_NUMBER); |
793 | } else if (sc.ch == '0' && |
794 | (sc.chNext == 'o' || sc.chNext == 'O' || sc.chNext == 'b' || sc.chNext == 'B')) { |
795 | if (options.base2or8Literals) { |
796 | base_n_number = true; |
797 | sc.SetState(SCE_P_NUMBER); |
798 | } else { |
799 | sc.SetState(SCE_P_NUMBER); |
800 | sc.ForwardSetState(SCE_P_IDENTIFIER); |
801 | } |
802 | } else { |
803 | base_n_number = false; |
804 | sc.SetState(SCE_P_NUMBER); |
805 | } |
806 | } else if (isoperator(sc.ch) || sc.ch == '`') { |
807 | sc.SetState(SCE_P_OPERATOR); |
808 | } else if (sc.ch == '#') { |
809 | sc.SetState(sc.chNext == '#' ? SCE_P_COMMENTBLOCK : SCE_P_COMMENTLINE); |
810 | } else if (sc.ch == '@') { |
811 | if (IsFirstNonWhitespace(sc.currentPos, styler)) |
812 | sc.SetState(SCE_P_DECORATOR); |
813 | else |
814 | sc.SetState(SCE_P_OPERATOR); |
815 | } else if (IsPyStringStart(sc.ch, sc.chNext, sc.GetRelative(2), allowedLiterals)) { |
816 | Sci_PositionU nextIndex = 0; |
817 | sc.SetState(GetPyStringState(styler, sc.currentPos, &nextIndex, allowedLiterals)); |
818 | while (nextIndex > (sc.currentPos + 1) && sc.More()) { |
819 | sc.Forward(); |
820 | } |
821 | } else if (IsAWordStart(sc.ch, options.unicodeIdentifiers)) { |
822 | sc.SetState(SCE_P_IDENTIFIER); |
823 | } |
824 | } |
825 | } |
826 | styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 0); |
827 | sc.Complete(); |
828 | } |
829 | |
830 | static bool (Sci_Position line, Accessor &styler) { |
831 | const Sci_Position pos = styler.LineStart(line); |
832 | const Sci_Position eol_pos = styler.LineStart(line + 1) - 1; |
833 | for (Sci_Position i = pos; i < eol_pos; i++) { |
834 | const char ch = styler[i]; |
835 | if (ch == '#') |
836 | return true; |
837 | else if (ch != ' ' && ch != '\t') |
838 | return false; |
839 | } |
840 | return false; |
841 | } |
842 | |
843 | static bool IsQuoteLine(Sci_Position line, const Accessor &styler) { |
844 | const int style = styler.StyleAt(styler.LineStart(line)) & 31; |
845 | return IsPyTripleQuoteStringState(style); |
846 | } |
847 | |
848 | |
849 | void SCI_METHOD LexerPython::Fold(Sci_PositionU startPos, Sci_Position length, int /*initStyle - unused*/, IDocument *pAccess) { |
850 | if (!options.fold) |
851 | return; |
852 | |
853 | Accessor styler(pAccess, nullptr); |
854 | |
855 | const Sci_Position maxPos = startPos + length; |
856 | const Sci_Position maxLines = (maxPos == styler.Length()) ? styler.GetLine(maxPos) : styler.GetLine(maxPos - 1); // Requested last line |
857 | const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line |
858 | |
859 | // Backtrack to previous non-blank line so we can determine indent level |
860 | // for any white space lines (needed esp. within triple quoted strings) |
861 | // and so we can fix any preceding fold level (which is why we go back |
862 | // at least one line in all cases) |
863 | int spaceFlags = 0; |
864 | Sci_Position lineCurrent = styler.GetLine(startPos); |
865 | int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, nullptr); |
866 | while (lineCurrent > 0) { |
867 | lineCurrent--; |
868 | indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, nullptr); |
869 | if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG) && |
870 | (!IsCommentLine(lineCurrent, styler)) && |
871 | (!IsQuoteLine(lineCurrent, styler))) |
872 | break; |
873 | } |
874 | int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK; |
875 | |
876 | // Set up initial loop state |
877 | startPos = styler.LineStart(lineCurrent); |
878 | int prev_state = SCE_P_DEFAULT & 31; |
879 | if (lineCurrent >= 1) |
880 | prev_state = styler.StyleAt(startPos - 1) & 31; |
881 | int prevQuote = options.foldQuotes && IsPyTripleQuoteStringState(prev_state); |
882 | |
883 | // Process all characters to end of requested range or end of any triple quote |
884 | //that hangs over the end of the range. Cap processing in all cases |
885 | // to end of document (in case of unclosed quote at end). |
886 | while ((lineCurrent <= docLines) && ((lineCurrent <= maxLines) || prevQuote)) { |
887 | |
888 | // Gather info |
889 | int lev = indentCurrent; |
890 | Sci_Position lineNext = lineCurrent + 1; |
891 | int indentNext = indentCurrent; |
892 | int quote = false; |
893 | if (lineNext <= docLines) { |
894 | // Information about next line is only available if not at end of document |
895 | indentNext = styler.IndentAmount(lineNext, &spaceFlags, nullptr); |
896 | const Sci_Position lookAtPos = (styler.LineStart(lineNext) == styler.Length()) ? styler.Length() - 1 : styler.LineStart(lineNext); |
897 | const int style = styler.StyleAt(lookAtPos) & 31; |
898 | quote = options.foldQuotes && IsPyTripleQuoteStringState(style); |
899 | } |
900 | const bool quote_start = (quote && !prevQuote); |
901 | const bool quote_continue = (quote && prevQuote); |
902 | if (!quote || !prevQuote) |
903 | indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK; |
904 | if (quote) |
905 | indentNext = indentCurrentLevel; |
906 | if (indentNext & SC_FOLDLEVELWHITEFLAG) |
907 | indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel; |
908 | |
909 | if (quote_start) { |
910 | // Place fold point at start of triple quoted string |
911 | lev |= SC_FOLDLEVELHEADERFLAG; |
912 | } else if (quote_continue || prevQuote) { |
913 | // Add level to rest of lines in the string |
914 | lev = lev + 1; |
915 | } |
916 | |
917 | // Skip past any blank lines for next indent level info; we skip also |
918 | // comments (all comments, not just those starting in column 0) |
919 | // which effectively folds them into surrounding code rather |
920 | // than screwing up folding. If comments end file, use the min |
921 | // comment indent as the level after |
922 | |
923 | int = indentCurrentLevel; |
924 | while (!quote && |
925 | (lineNext < docLines) && |
926 | ((indentNext & SC_FOLDLEVELWHITEFLAG) || (IsCommentLine(lineNext, styler)))) { |
927 | |
928 | if (IsCommentLine(lineNext, styler) && indentNext < minCommentLevel) { |
929 | minCommentLevel = indentNext; |
930 | } |
931 | |
932 | lineNext++; |
933 | indentNext = styler.IndentAmount(lineNext, &spaceFlags, nullptr); |
934 | } |
935 | |
936 | const int = ((lineNext < docLines) ? indentNext & SC_FOLDLEVELNUMBERMASK : minCommentLevel); |
937 | const int = std::max(indentCurrentLevel, levelAfterComments); |
938 | |
939 | // Now set all the indent levels on the lines we skipped |
940 | // Do this from end to start. Once we encounter one line |
941 | // which is indented more than the line after the end of |
942 | // the comment-block, use the level of the block before |
943 | |
944 | Sci_Position skipLine = lineNext; |
945 | int skipLevel = levelAfterComments; |
946 | |
947 | while (--skipLine > lineCurrent) { |
948 | const int skipLineIndent = styler.IndentAmount(skipLine, &spaceFlags, nullptr); |
949 | |
950 | if (options.foldCompact) { |
951 | if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments) |
952 | skipLevel = levelBeforeComments; |
953 | |
954 | const int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG; |
955 | |
956 | styler.SetLevel(skipLine, skipLevel | whiteFlag); |
957 | } else { |
958 | if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments && |
959 | !(skipLineIndent & SC_FOLDLEVELWHITEFLAG) && |
960 | !IsCommentLine(skipLine, styler)) |
961 | skipLevel = levelBeforeComments; |
962 | |
963 | styler.SetLevel(skipLine, skipLevel); |
964 | } |
965 | } |
966 | |
967 | // Set fold header on non-quote line |
968 | if (!quote && !(indentCurrent & SC_FOLDLEVELWHITEFLAG)) { |
969 | if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK)) |
970 | lev |= SC_FOLDLEVELHEADERFLAG; |
971 | } |
972 | |
973 | // Keep track of triple quote state of previous line |
974 | prevQuote = quote; |
975 | |
976 | // Set fold level for this line and move to next line |
977 | styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG); |
978 | indentCurrent = indentNext; |
979 | lineCurrent = lineNext; |
980 | } |
981 | |
982 | // NOTE: Cannot set level of last line here because indentCurrent doesn't have |
983 | // header flag set; the loop above is crafted to take care of this case! |
984 | //styler.SetLevel(lineCurrent, indentCurrent); |
985 | } |
986 | |
987 | LexerModule lmPython(SCLEX_PYTHON, LexerPython::LexerFactoryPython, "python" , |
988 | pythonWordListDesc); |
989 | |