LexRuby.cxx source code [DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexRuby.cxx]

1	// Scintilla source code edit control
2	/* @file LexRuby.cxx*
3	** Lexer for Ruby.
4	**/
5	// Copyright 2001- by Clemens Wyss <wys@helbling.ch>
6	// The License.txt file describes the conditions under which this software may be distributed.
7
8	#include <stdlib.h>
9	#include <string.h>
10	#include <stdio.h>
11	#include <stdarg.h>
12	#include <assert.h>
13	#include <ctype.h>
14
15	#include <string>
16	#include <string_view>
17
18	#include "ILexer.h"
19	#include "Scintilla.h"
20	#include "SciLexer.h"
21
22	#include "WordList.h"
23	#include "LexAccessor.h"
24	#include "Accessor.h"
25	#include "StyleContext.h"
26	#include "CharacterSet.h"
27	#include "LexerModule.h"
28
29	using namespace Lexilla;
30
31	//XXX Identical to Perl, put in common area
32	static inline bool isEOLChar(char ch) {
33	return (ch == `'\r'`) \|\| (ch == `'\n'`);
34	}
35
36	#define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
37	// This one's redundant, but makes for more readable code
38	#define isHighBitChar(ch) ((unsigned int)(ch) > 127)
39
40	static inline bool isSafeAlpha(char ch) {
41	return (isSafeASCII(ch) && isalpha(ch)) \|\| ch == `'_'`;
42	}
43
44	static inline bool isSafeAlnum(char ch) {
45	return (isSafeASCII(ch) && isalnum(ch)) \|\| ch == `'_'`;
46	}
47
48	static inline bool isSafeAlnumOrHigh(char ch) {
49	return isHighBitChar(ch) \|\| isalnum(ch) \|\| ch == `'_'`;
50	}
51
52	static inline bool isSafeDigit(char ch) {
53	return isSafeASCII(ch) && isdigit(ch);
54	}
55
56	static inline bool isSafeWordcharOrHigh(char ch) {
57	// Error: scintilla's KeyWords.h includes '.' as a word-char
58	// we want to separate things that can take methods from the
59	// methods.
60	return isHighBitChar(ch) \|\| isalnum(ch) \|\| ch == `'_'`;
61	}
62
63	static bool inline iswhitespace(char ch) {
64	return ch == `' '` \|\| ch == `'\t'`;
65	}
66
67	#define MAX_KEYWORD_LENGTH 200
68
69	#define STYLE_MASK 63
70	#define actual_style(style) (style & STYLE_MASK)
71
72	static bool followsDot(Sci_PositionU pos, Accessor &styler) {
73	styler.Flush();
74	for (; pos >= `1`; --pos) {
75	int style = actual_style(styler.StyleAt(pos));
76	char ch;
77	switch (style) {
78	case SCE_RB_DEFAULT:
79	ch = styler [pos];
80	if (ch == `' '` \|\| ch == `'\t'`) {
81	//continue
82	} else {
83	return false;
84	}
85	break;
86
87	case SCE_RB_OPERATOR:
88	return styler [pos] == `'.'`;
89
90	default:
91	return false;
92	}
93	}
94	return false;
95	}
96
97	// Forward declarations
98	static bool keywordIsAmbiguous(const char *prevWord);
99	static bool keywordDoStartsLoop(Sci_Position pos,
100	Accessor &styler);
101	static bool keywordIsModifier(const char *word,
102	Sci_Position pos,
103	Accessor &styler);
104
105	static int ClassifyWordRb(Sci_PositionU start, Sci_PositionU end, WordList &keywords, Accessor &styler, char *prevWord) {
106	char s[MAX_KEYWORD_LENGTH];
107	Sci_PositionU i, j;
108	Sci_PositionU lim = end - start + `1`; // num chars to copy
109	if (lim >= MAX_KEYWORD_LENGTH) {
110	lim = MAX_KEYWORD_LENGTH - `1`;
111	}
112	for (i = start, j = `0`; j < lim; i++, j++) {
113	s[j] = styler [i];
114	}
115	s[j] = `'\0'`;
116	int chAttr;
117	if (`0` == strcmp(prevWord, "class"))
118	chAttr = SCE_RB_CLASSNAME;
119	else if (`0` == strcmp(prevWord, "module"))
120	chAttr = SCE_RB_MODULE_NAME;
121	else if (`0` == strcmp(prevWord, "def"))
122	chAttr = SCE_RB_DEFNAME;
123	else if (keywords.InList(s) && ((start == `0`) \|\| !followsDot(start - `1`, styler))) {
124	if (keywordIsAmbiguous(s)
125	&& keywordIsModifier(s, start, styler)) {
126
127	// Demoted keywords are colored as keywords,
128	// but do not affect changes in indentation.
129	//
130	// Consider the word 'if':
131	// 1. <<if test ...>> : normal
132	// 2. <<stmt if test>> : demoted
133	// 3. <<lhs = if ...>> : normal: start a new indent level
134	// 4. <<obj.if = 10>> : color as identifer, since it follows '.'
135
136	chAttr = SCE_RB_WORD_DEMOTED;
137	} else {
138	chAttr = SCE_RB_WORD;
139	}
140	} else
141	chAttr = SCE_RB_IDENTIFIER;
142	styler.ColourTo(end, chAttr);
143	if (chAttr == SCE_RB_WORD) {
144	strcpy(prevWord, s);
145	} else {
146	prevWord[`0`] = `0`;
147	}
148	return chAttr;
149	}
150
151
152	//XXX Identical to Perl, put in common area
153	static bool isMatch(Accessor &styler, Sci_Position lengthDoc, Sci_Position pos, const char *val) {
154	if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
155	return false;
156	}
157	while (*val) {
158	if (*val != styler [pos++]) {
159	return false;
160	}
161	val++;
162	}
163	return true;
164	}
165
166	// Do Ruby better -- find the end of the line, work back,
167	// and then check for leading white space
168
169	// Precondition: the here-doc target can be indented
170	static bool lookingAtHereDocDelim(Accessor &styler,
171	Sci_Position pos,
172	Sci_Position lengthDoc,
173	const char *HereDocDelim)
174	{
175	if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
176	return false;
177	}
178	while (--pos > `0`) {
179	char ch = styler [pos];
180	if (isEOLChar(ch)) {
181	return true;
182	} else if (ch != `' '` && ch != `'\t'`) {
183	return false;
184	}
185	}
186	return false;
187	}
188
189	//XXX Identical to Perl, put in common area
190	static char opposite(char ch) {
191	if (ch == `'('`)
192	return `')'`;
193	if (ch == `'['`)
194	return `']'`;
195	if (ch == `'{'`)
196	return `'}'`;
197	if (ch == `'<'`)
198	return `'>'`;
199	return ch;
200	}
201
202	// Null transitions when we see we've reached the end
203	// and need to relex the curr char.
204
205	static void redo_char(Sci_Position &i, char &ch, char &chNext, char &chNext2,
206	int &state) {
207	i--;
208	chNext2 = chNext;
209	chNext = ch;
210	state = SCE_RB_DEFAULT;
211	}
212
213	static void advance_char(Sci_Position &i, char &ch, char &chNext, char &chNext2) {
214	i++;
215	ch = chNext;
216	chNext = chNext2;
217	}
218
219	// precondition: startPos points to one after the EOL char
220	static bool currLineContainsHereDelims(Sci_Position &startPos,
221	Accessor &styler) {
222	if (startPos <= `1`)
223	return false;
224
225	Sci_Position pos;
226	for (pos = startPos - `1`; pos > `0`; pos--) {
227	char ch = styler.SafeGetCharAt(pos);
228	if (isEOLChar(ch)) {
229	// Leave the pointers where they are -- there are no
230	// here doc delims on the current line, even if
231	// the EOL isn't default style
232
233	return false;
234	} else {
235	styler.Flush();
236	if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
237	break;
238	}
239	}
240	}
241	if (pos == `0`) {
242	return false;
243	}
244	// Update the pointers so we don't have to re-analyze the string
245	startPos = pos;
246	return true;
247	}
248
249	// This class is used by the enter and exit methods, so it needs
250	// to be hoisted out of the function.
251
252	class QuoteCls {
253	public:
254	int Count;
255	char Up;
256	char Down;
257	QuoteCls() noexcept {
258	New();
259	}
260	void New() noexcept {
261	Count = `0`;
262	Up = `'\0'`;
263	Down = `'\0'`;
264	}
265	void Open(char u) {
266	Count++;
267	Up = u;
268	Down = opposite(Up);
269	}
270	};
271
272
273	static void enterInnerExpression(int *p_inner_string_types,
274	int *p_inner_expn_brace_counts,
275	QuoteCls *p_inner_quotes,
276	int &inner_string_count,
277	int &state,
278	int &brace_counts,
279	QuoteCls curr_quote
280	) {
281	p_inner_string_types[inner_string_count] = state;
282	state = SCE_RB_DEFAULT;
283	p_inner_expn_brace_counts[inner_string_count] = brace_counts;
284	brace_counts = `0`;
285	p_inner_quotes[inner_string_count] = curr_quote;
286	++inner_string_count;
287	}
288
289	static void exitInnerExpression(int *p_inner_string_types,
290	int *p_inner_expn_brace_counts,
291	QuoteCls *p_inner_quotes,
292	int &inner_string_count,
293	int &state,
294	int &brace_counts,
295	QuoteCls &curr_quote
296	) {
297	--inner_string_count;
298	state = p_inner_string_types[inner_string_count];
299	brace_counts = p_inner_expn_brace_counts[inner_string_count];
300	curr_quote = p_inner_quotes[inner_string_count];
301	}
302
303	static bool isEmptyLine(Sci_Position pos,
304	Accessor &styler) {
305	int spaceFlags = `0`;
306	Sci_Position lineCurrent = styler.GetLine(pos);
307	int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
308	return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != `0`;
309	}
310
311	static bool RE_CanFollowKeyword(const char *keyword) {
312	if (!strcmp(keyword, "and")
313	\|\| !strcmp(keyword, "begin")
314	\|\| !strcmp(keyword, "break")
315	\|\| !strcmp(keyword, "case")
316	\|\| !strcmp(keyword, "do")
317	\|\| !strcmp(keyword, "else")
318	\|\| !strcmp(keyword, "elsif")
319	\|\| !strcmp(keyword, "if")
320	\|\| !strcmp(keyword, "next")
321	\|\| !strcmp(keyword, "return")
322	\|\| !strcmp(keyword, "when")
323	\|\| !strcmp(keyword, "unless")
324	\|\| !strcmp(keyword, "until")
325	\|\| !strcmp(keyword, "not")
326	\|\| !strcmp(keyword, "or")) {
327	return true;
328	}
329	return false;
330	}
331
332	// Look at chars up to but not including endPos
333	// Don't look at styles in case we're looking forward
334
335	static Sci_Position skipWhitespace(Sci_Position startPos,
336	Sci_Position endPos,
337	Accessor &styler) {
338	for (Sci_Position i = startPos; i < endPos; i++) {
339	if (!iswhitespace(styler [i])) {
340	return i;
341	}
342	}
343	return endPos;
344	}
345
346	// This routine looks for false positives like
347	// undef foo, <<
348	// There aren't too many.
349	//
350	// iPrev points to the start of <<
351
352	static bool sureThisIsHeredoc(Sci_Position iPrev,
353	Accessor &styler,
354	char *prevWord) {
355
356	// Not so fast, since Ruby's so dynamic. Check the context
357	// to make sure we're OK.
358	int prevStyle;
359	Sci_Position lineStart = styler.GetLine(iPrev);
360	Sci_Position lineStartPosn = styler.LineStart(lineStart);
361	styler.Flush();
362
363	// Find the first word after some whitespace
364	Sci_Position firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler);
365	if (firstWordPosn >= iPrev) {
366	// Have something like {^ <<}
367	//XXX Look at the first previous non-comment non-white line
368	// to establish the context. Not too likely though.
369	return true;
370	} else {
371	switch (prevStyle = styler.StyleAt(firstWordPosn)) {
372	case SCE_RB_WORD:
373	case SCE_RB_WORD_DEMOTED:
374	case SCE_RB_IDENTIFIER:
375	break;
376	default:
377	return true;
378	}
379	}
380	Sci_Position firstWordEndPosn = firstWordPosn;
381	char *dst = prevWord;
382	for (;;) {
383	if (firstWordEndPosn >= iPrev \|\|
384	styler.StyleAt(firstWordEndPosn) != prevStyle) {
385	*dst = `0`;
386	break;
387	}
388	*dst++ = styler [firstWordEndPosn];
389	firstWordEndPosn += `1`;
390	}
391	//XXX Write a style-aware thing to regex scintilla buffer objects
392	if (!strcmp(prevWord, "undef")
393	\|\| !strcmp(prevWord, "def")
394	\|\| !strcmp(prevWord, "alias")) {
395	// These keywords are what we were looking for
396	return false;
397	}
398	return true;
399	}
400
401	// Routine that saves us from allocating a buffer for the here-doc target
402	// targetEndPos points one past the end of the current target
403	static bool haveTargetMatch(Sci_Position currPos,
404	Sci_Position lengthDoc,
405	Sci_Position targetStartPos,
406	Sci_Position targetEndPos,
407	Accessor &styler) {
408	if (lengthDoc - currPos < targetEndPos - targetStartPos) {
409	return false;
410	}
411	Sci_Position i, j;
412	for (i = targetStartPos, j = currPos;
413	i < targetEndPos && j < lengthDoc;
414	i++, j++) {
415	if (styler [i] != styler [j]) {
416	return false;
417	}
418	}
419	return true;
420	}
421
422	// Finds the start position of the expression containing @p pos
423	// @p min_pos should be a known expression start, e.g. the start of the line
424	static Sci_Position findExpressionStart(Sci_Position pos,
425	Sci_Position min_pos,
426	Accessor &styler) {
427	int depth = `0`;
428	for (; pos > min_pos; pos -= `1`) {
429	int style = styler.StyleAt(pos - `1`);
430	if (style == SCE_RB_OPERATOR) {
431	int ch = styler [pos - `1`];
432	if (ch == `'}'` \|\| ch == `')'` \|\| ch == `']'`) {
433	depth += `1`;
434	} else if (ch == `'{'` \|\| ch == `'('` \|\| ch == `'['`) {
435	if (depth == `0`) {
436	break;
437	} else {
438	depth -= `1`;
439	}
440	} else if (ch == `';'` && depth == `0`) {
441	break;
442	}
443	}
444	}
445	return pos;
446	}
447
448	// We need a check because the form
449	// [identifier] <<[target]
450	// is ambiguous. The Ruby lexer/parser resolves it by
451	// looking to see if [identifier] names a variable or a
452	// function. If it's the first, it's the start of a here-doc.
453	// If it's a var, it's an operator. This lexer doesn't
454	// maintain a symbol table, so it looks ahead to see what's
455	// going on, in cases where we have
456	// ^[white-space][identifier([.\|::]identifier)][white-space]<<[target]*
457	//
458	// If there's no occurrence of [target] on a line, assume we don't.
459
460	// return true == yes, we have no heredocs
461
462	static bool sureThisIsNotHeredoc(Sci_Position lt2StartPos,
463	Accessor &styler) {
464	int prevStyle;
465	// Use full document, not just part we're styling
466	Sci_Position lengthDoc = styler.Length();
467	Sci_Position lineStart = styler.GetLine(lt2StartPos);
468	Sci_Position lineStartPosn = styler.LineStart(lineStart);
469	styler.Flush();
470	const bool definitely_not_a_here_doc = true;
471	const bool looks_like_a_here_doc = false;
472
473	// find the expression start rather than the line start
474	Sci_Position exprStartPosn = findExpressionStart(lt2StartPos, lineStartPosn, styler);
475
476	// Find the first word after some whitespace
477	Sci_Position firstWordPosn = skipWhitespace(exprStartPosn, lt2StartPos, styler);
478	if (firstWordPosn >= lt2StartPos) {
479	return definitely_not_a_here_doc;
480	}
481	prevStyle = styler.StyleAt(firstWordPosn);
482	// If we have '<<' following a keyword, it's not a heredoc
483	if (prevStyle != SCE_RB_IDENTIFIER
484	&& prevStyle != SCE_RB_SYMBOL
485	&& prevStyle != SCE_RB_INSTANCE_VAR
486	&& prevStyle != SCE_RB_CLASS_VAR) {
487	return definitely_not_a_here_doc;
488	}
489	int newStyle = prevStyle;
490	// Some compilers incorrectly warn about uninit newStyle
491	for (firstWordPosn += `1`; firstWordPosn <= lt2StartPos; firstWordPosn += `1`) {
492	// Inner loop looks at the name
493	for (; firstWordPosn <= lt2StartPos; firstWordPosn += `1`) {
494	newStyle = styler.StyleAt(firstWordPosn);
495	if (newStyle != prevStyle) {
496	break;
497	}
498	}
499	// Do we have '::' or '.'?
500	if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
501	char ch = styler [firstWordPosn];
502	if (ch == `'.'`) {
503	// yes
504	} else if (ch == `':'`) {
505	if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
506	return definitely_not_a_here_doc;
507	} else if (styler [firstWordPosn] != `':'`) {
508	return definitely_not_a_here_doc;
509	}
510	} else {
511	break;
512	}
513	} else {
514	break;
515	}
516	// on second and next passes, only identifiers may appear since
517	// class and instance variable are private
518	prevStyle = SCE_RB_IDENTIFIER;
519	}
520	// Skip next batch of white-space
521	firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
522	// possible symbol for an implicit hash argument
523	if (firstWordPosn < lt2StartPos && styler.StyleAt(firstWordPosn) == SCE_RB_SYMBOL) {
524	for (; firstWordPosn <= lt2StartPos; firstWordPosn += `1`) {
525	if (styler.StyleAt(firstWordPosn) != SCE_RB_SYMBOL) {
526	break;
527	}
528	}
529	// Skip next batch of white-space
530	firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
531	}
532	if (firstWordPosn != lt2StartPos) {
533	// Have [[^ws[identifier]ws[something_else]ws<<
534	return definitely_not_a_here_doc;
535	}
536	// OK, now 'j' will point to the current spot moving ahead
537	Sci_Position j = firstWordPosn + `1`;
538	if (styler.StyleAt(j) != SCE_RB_OPERATOR \|\| styler [j] != `'<'`) {
539	// This shouldn't happen
540	return definitely_not_a_here_doc;
541	}
542	Sci_Position nextLineStartPosn = styler.LineStart(lineStart + `1`);
543	if (nextLineStartPosn >= lengthDoc) {
544	return definitely_not_a_here_doc;
545	}
546	j = skipWhitespace(j + `1`, nextLineStartPosn, styler);
547	if (j >= lengthDoc) {
548	return definitely_not_a_here_doc;
549	}
550	bool allow_indent;
551	Sci_Position target_start, target_end;
552	// From this point on no more styling, since we're looking ahead
553	if (styler [j] == `'-'` \|\| styler [j] == `'~'`) {
554	allow_indent = true;
555	j++;
556	} else {
557	allow_indent = false;
558	}
559
560	// Allow for quoted targets.
561	char target_quote = `0`;
562	switch (styler [j]) {
563	case `'\''`:
564	case `'"'`:
565	case '`':
566	target_quote = styler [j];
567	j += `1`;
568	}
569
570	if (isSafeAlnum(styler [j])) {
571	// Init target_end because some compilers think it won't
572	// be initialized by the time it's used
573	target_start = target_end = j;
574	j++;
575	} else {
576	return definitely_not_a_here_doc;
577	}
578	for (; j < lengthDoc; j++) {
579	if (!isSafeAlnum(styler [j])) {
580	if (target_quote && styler [j] != target_quote) {
581	// unquoted end
582	return definitely_not_a_here_doc;
583	}
584
585	// And for now make sure that it's a newline
586	// don't handle arbitrary expressions yet
587
588	target_end = j;
589	if (target_quote) {
590	// Now we can move to the character after the string delimiter.
591	j += `1`;
592	}
593	j = skipWhitespace(j, lengthDoc, styler);
594	if (j >= lengthDoc) {
595	return definitely_not_a_here_doc;
596	} else {
597	char ch = styler [j];
598	if (ch == `'#'` \|\| isEOLChar(ch)) {
599	// This is OK, so break and continue;
600	break;
601	} else {
602	return definitely_not_a_here_doc;
603	}
604	}
605	}
606	}
607
608	// Just look at the start of each line
609	Sci_Position last_line = styler.GetLine(lengthDoc - `1`);
610	// But don't go too far
611	if (last_line > lineStart + `50`) {
612	last_line = lineStart + `50`;
613	}
614	for (Sci_Position line_num = lineStart + `1`; line_num <= last_line; line_num++) {
615	if (allow_indent) {
616	j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler);
617	} else {
618	j = styler.LineStart(line_num);
619	}
620	// target_end is one past the end
621	if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
622	// We got it
623	return looks_like_a_here_doc;
624	}
625	}
626	return definitely_not_a_here_doc;
627	}
628
629	//todo: if we aren't looking at a stdio character,
630	// move to the start of the first line that is not in a
631	// multi-line construct
632
633	static void synchronizeDocStart(Sci_PositionU &startPos,
634	Sci_Position &length,
635	int &initStyle,
636	Accessor &styler,
637	bool skipWhiteSpace=false) {
638
639	styler.Flush();
640	int style = actual_style(styler.StyleAt(startPos));
641	switch (style) {
642	case SCE_RB_STDIN:
643	case SCE_RB_STDOUT:
644	case SCE_RB_STDERR:
645	// Don't do anything else with these.
646	return;
647	}
648
649	Sci_Position pos = startPos;
650	// Quick way to characterize each line
651	Sci_Position lineStart;
652	for (lineStart = styler.GetLine(pos); lineStart > `0`; lineStart--) {
653	// Now look at the style before the previous line's EOL
654	pos = styler.LineStart(lineStart) - `1`;
655	if (pos <= `10`) {
656	lineStart = `0`;
657	break;
658	}
659	char ch = styler.SafeGetCharAt(pos);
660	char chPrev = styler.SafeGetCharAt(pos - `1`);
661	if (ch == `'\n'` && chPrev == `'\r'`) {
662	pos--;
663	}
664	if (styler.SafeGetCharAt(pos - `1`) == `'\\'`) {
665	// Continuation line -- keep going
666	} else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
667	// Part of multi-line construct -- keep going
668	} else if (currLineContainsHereDelims(pos, styler)) {
669	// Keep going, with pos and length now pointing
670	// at the end of the here-doc delimiter
671	} else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
672	// Keep going
673	} else {
674	break;
675	}
676	}
677	pos = styler.LineStart(lineStart);
678	length += (startPos - pos);
679	startPos = pos;
680	initStyle = SCE_RB_DEFAULT;
681	}
682
683	static void ColouriseRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
684	WordList *keywordlists[], Accessor &styler) {
685
686	// Lexer for Ruby often has to backtrack to start of current style to determine
687	// which characters are being used as quotes, how deeply nested is the
688	// start position and what the termination string is for here documents
689
690	WordList &keywords = *keywordlists[`0`];
691
692	class HereDocCls {
693	public:
694	int State = `0`;
695	// States
696	// 0: '<<' encountered
697	// 1: collect the delimiter
698	// 1b: text between the end of the delimiter and the EOL
699	// 2: here doc text (lines after the delimiter)
700	char Quote = `0`; // the char after '<<'
701	bool Quoted = false; // true if Quote in ('\'','"','`')
702	int DelimiterLength = `0`; // strlen(Delimiter)
703	char Delimiter[`256`] {}; // the Delimiter, limit of 256: from Perl
704	bool CanBeIndented = false;
705	};
706	HereDocCls HereDoc;
707
708	QuoteCls Quote;
709
710	int numDots = `0`; // For numbers --
711	// Don't start lexing in the middle of a num
712
713	synchronizeDocStart(startPos, length, initStyle, styler, // ref args
714	false);
715
716	bool preferRE = true;
717	int state = initStyle;
718	Sci_Position lengthDoc = startPos + length;
719
720	char prevWord[MAX_KEYWORD_LENGTH + `1`] = ""; // 1 byte for zero
721	if (length == `0`)
722	return;
723
724	char chPrev = styler.SafeGetCharAt(startPos - `1`);
725	char chNext = styler.SafeGetCharAt(startPos);
726	bool is_real_number = true; // Differentiate between constants and ?-sequences.
727	styler.StartAt(startPos);
728	styler.StartSegment(startPos);
729
730	static int q_states[] = {SCE_RB_STRING_Q,
731	SCE_RB_STRING_QQ,
732	SCE_RB_STRING_QR,
733	SCE_RB_STRING_QW,
734	SCE_RB_STRING_QW,
735	SCE_RB_STRING_QX
736	};
737	static const char *q_chars = "qQrwWx";
738
739	// In most cases a value of 2 should be ample for the code in the
740	// Ruby library, and the code the user is likely to enter.
741	// For example,
742	// fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}"
743	// if options[:verbose]
744	// from fileutils.rb nests to a level of 2
745	// If the user actually hits a 6th occurrence of '#{' in a double-quoted
746	// string (including regex'es, %Q, %<sym>, %w, and other strings
747	// that interpolate), it will stay as a string. The problem with this
748	// is that quotes might flip, a 7th '#{' will look like a comment,
749	// and code-folding might be wrong.
750
751	// If anyone runs into this problem, I recommend raising this
752	// value slightly higher to replacing the fixed array with a linked
753	// list. Keep in mind this code will be called every time the lexer
754	// is invoked.
755
756	#define INNER_STRINGS_MAX_COUNT 5
757	// These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..."
758	int inner_string_types[INNER_STRINGS_MAX_COUNT] {};
759	// Track # braces when we push a new #{ thing
760	int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT] {};
761	QuoteCls inner_quotes[INNER_STRINGS_MAX_COUNT];
762	int inner_string_count = `0`;
763	int brace_counts = `0`; // Number of #{ ... } things within an expression
764
765	Sci_Position i;
766	for (i = `0`; i < INNER_STRINGS_MAX_COUNT; i++) {
767	inner_string_types[i] = `0`;
768	inner_expn_brace_counts[i] = `0`;
769	}
770	for (i = startPos; i < lengthDoc; i++) {
771	char ch = chNext;
772	chNext = styler.SafeGetCharAt(i + `1`);
773	char chNext2 = styler.SafeGetCharAt(i + `2`);
774
775	if (styler.IsLeadByte(ch)) {
776	chNext = chNext2;
777	chPrev = `' '`;
778	i += `1`;
779	continue;
780	}
781
782	// skip on DOS/Windows
783	//No, don't, because some things will get tagged on,
784	// so we won't recognize keywords, for example
785	#if 0
786	if (ch == `'\r'` && chNext == `'\n'`) {
787	continue;
788	}
789	#endif
790
791	if (HereDoc.State == `1` && isEOLChar(ch)) {
792	// Begin of here-doc (the line after the here-doc delimiter):
793	HereDoc.State = `2`;
794	styler.ColourTo(i-`1`, state);
795	// Don't check for a missing quote, just jump into
796	// the here-doc state
797	state = SCE_RB_HERE_Q;
798	}
799
800	// Regular transitions
801	if (state == SCE_RB_DEFAULT) {
802	if (isSafeDigit(ch)) {
803	styler.ColourTo(i - `1`, state);
804	state = SCE_RB_NUMBER;
805	is_real_number = true;
806	numDots = `0`;
807	} else if (isHighBitChar(ch) \|\| iswordstart(ch)) {
808	styler.ColourTo(i - `1`, state);
809	state = SCE_RB_WORD;
810	} else if (ch == `'#'`) {
811	styler.ColourTo(i - `1`, state);
812	state = SCE_RB_COMMENTLINE;
813	} else if (ch == `'='`) {
814	// =begin indicates the start of a comment (doc) block
815	if ((i == `0` \|\| isEOLChar(chPrev))
816	&& chNext == `'b'`
817	&& styler.SafeGetCharAt(i + `2`) == `'e'`
818	&& styler.SafeGetCharAt(i + `3`) == `'g'`
819	&& styler.SafeGetCharAt(i + `4`) == `'i'`
820	&& styler.SafeGetCharAt(i + `5`) == `'n'`
821	&& !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + `6`))) {
822	styler.ColourTo(i - `1`, state);
823	state = SCE_RB_POD;
824	} else {
825	styler.ColourTo(i - `1`, state);
826	styler.ColourTo(i, SCE_RB_OPERATOR);
827	preferRE = true;
828	}
829	} else if (ch == `'"'`) {
830	styler.ColourTo(i - `1`, state);
831	state = SCE_RB_STRING;
832	Quote.New();
833	Quote.Open(ch);
834	} else if (ch == `'\''`) {
835	styler.ColourTo(i - `1`, state);
836	state = SCE_RB_CHARACTER;
837	Quote.New();
838	Quote.Open(ch);
839	} else if (ch == '`') {
840	styler.ColourTo(i - `1`, state);
841	state = SCE_RB_BACKTICKS;
842	Quote.New();
843	Quote.Open(ch);
844	} else if (ch == `'@'`) {
845	// Instance or class var
846	styler.ColourTo(i - `1`, state);
847	if (chNext == `'@'`) {
848	state = SCE_RB_CLASS_VAR;
849	advance_char(i, ch, chNext, chNext2); // pass by ref
850	} else {
851	state = SCE_RB_INSTANCE_VAR;
852	}
853	} else if (ch == `'$'`) {
854	// Check for a builtin global
855	styler.ColourTo(i - `1`, state);
856	// Recognize it bit by bit
857	state = SCE_RB_GLOBAL;
858	} else if (ch == `'/'` && preferRE) {
859	// Ambigous operator
860	styler.ColourTo(i - `1`, state);
861	state = SCE_RB_REGEX;
862	Quote.New();
863	Quote.Open(ch);
864	} else if (ch == `'<'` && chNext == `'<'` && chNext2 != `'='`) {
865
866	// Recognise the '<<' symbol - either a here document or a binary op
867	styler.ColourTo(i - `1`, state);
868	i++;
869	chNext = chNext2;
870	styler.ColourTo(i, SCE_RB_OPERATOR);
871
872	if (!(strchr("\"\'`_-~", chNext2) \|\| isSafeAlpha(chNext2))) {
873	// It's definitely not a here-doc,
874	// based on Ruby's lexer/parser in the
875	// heredoc_identifier routine.
876	// Nothing else to do.
877	} else if (preferRE) {
878	if (sureThisIsHeredoc(i - `1`, styler, prevWord)) {
879	state = SCE_RB_HERE_DELIM;
880	HereDoc.State = `0`;
881	}
882	// else leave it in default state
883	} else {
884	if (sureThisIsNotHeredoc(i - `1`, styler)) {
885	// leave state as default
886	// We don't have all the heuristics Perl has for indications
887	// of a here-doc, because '<<' is overloadable and used
888	// for so many other classes.
889	} else {
890	state = SCE_RB_HERE_DELIM;
891	HereDoc.State = `0`;
892	}
893	}
894	preferRE = (state != SCE_RB_HERE_DELIM);
895	} else if (ch == `':'`) {
896	styler.ColourTo(i - `1`, state);
897	if (chNext == `':'`) {
898	// Mark "::" as an operator, not symbol start
899	styler.ColourTo(i + `1`, SCE_RB_OPERATOR);
900	advance_char(i, ch, chNext, chNext2); // pass by ref
901	state = SCE_RB_DEFAULT;
902	preferRE = false;
903	} else if (isSafeWordcharOrHigh(chNext)) {
904	state = SCE_RB_SYMBOL;
905	} else if ((chNext == `'@'` \|\| chNext == `'$'`) &&
906	isSafeWordcharOrHigh(chNext2)) {
907	// instance and global variable followed by an identifier
908	advance_char(i, ch, chNext, chNext2);
909	state = SCE_RB_SYMBOL;
910	} else if (((chNext == `'@'` && chNext2 == `'@'`) \|\|
911	(chNext == `'$'` && chNext2 == `'-'`)) &&
912	isSafeWordcharOrHigh(styler.SafeGetCharAt(i+`3`))) {
913	// class variables and special global variable "$-IDENTCHAR"
914	state = SCE_RB_SYMBOL;
915	// $-IDENTCHAR doesn't continue past the IDENTCHAR
916	if (chNext == `'$'`) {
917	styler.ColourTo(i+`3`, SCE_RB_SYMBOL);
918	state = SCE_RB_DEFAULT;
919	}
920	i += `3`;
921	ch = styler.SafeGetCharAt(i);
922	chNext = styler.SafeGetCharAt(i+`1`);
923	} else if (chNext == `'$'` && strchr("_~*$?!@/\\;,.=:<>\"&`'+", chNext2)) {
924	// single-character special global variables
925	i += `2`;
926	ch = chNext2;
927	chNext = styler.SafeGetCharAt(i+`1`);
928	styler.ColourTo(i, SCE_RB_SYMBOL);
929	state = SCE_RB_DEFAULT;
930	} else if (strchr("[!~+-/%=<>&^\|", chNext)) {
931	// Do the operator analysis in-line, looking ahead
932	// Based on the table in pickaxe 2nd ed., page 339
933	bool doColoring = true;
934	switch (chNext) {
935	case `'['`:
936	if (chNext2 == `']'`) {
937	char ch_tmp = styler.SafeGetCharAt(i + `3`);
938	if (ch_tmp == `'='`) {
939	i += `3`;
940	ch = ch_tmp;
941	chNext = styler.SafeGetCharAt(i + `1`);
942	} else {
943	i += `2`;
944	ch = chNext2;
945	chNext = ch_tmp;
946	}
947	} else {
948	doColoring = false;
949	}
950	break;
951
952	case `'*'`:
953	if (chNext2 == `'*'`) {
954	i += `2`;
955	ch = chNext2;
956	chNext = styler.SafeGetCharAt(i + `1`);
957	} else {
958	advance_char(i, ch, chNext, chNext2);
959	}
960	break;
961
962	case `'!'`:
963	if (chNext2 == `'='` \|\| chNext2 == `'~'`) {
964	i += `2`;
965	ch = chNext2;
966	chNext = styler.SafeGetCharAt(i + `1`);
967	} else {
968	advance_char(i, ch, chNext, chNext2);
969	}
970	break;
971
972	case `'<'`:
973	if (chNext2 == `'<'`) {
974	i += `2`;
975	ch = chNext2;
976	chNext = styler.SafeGetCharAt(i + `1`);
977	} else if (chNext2 == `'='`) {
978	char ch_tmp = styler.SafeGetCharAt(i + `3`);
979	if (ch_tmp == `'>'`) { // <=> operator
980	i += `3`;
981	ch = ch_tmp;
982	chNext = styler.SafeGetCharAt(i + `1`);
983	} else {
984	i += `2`;
985	ch = chNext2;
986	chNext = ch_tmp;
987	}
988	} else {
989	advance_char(i, ch, chNext, chNext2);
990	}
991	break;
992
993	default:
994	// Simple one-character operators
995	advance_char(i, ch, chNext, chNext2);
996	break;
997	}
998	if (doColoring) {
999	styler.ColourTo(i, SCE_RB_SYMBOL);
1000	state = SCE_RB_DEFAULT;
1001	}
1002	} else if (!preferRE) {
1003	// Don't color symbol strings (yet)
1004	// Just color the ":" and color rest as string
1005	styler.ColourTo(i, SCE_RB_SYMBOL);
1006	state = SCE_RB_DEFAULT;
1007	} else {
1008	styler.ColourTo(i, SCE_RB_OPERATOR);
1009	state = SCE_RB_DEFAULT;
1010	preferRE = true;
1011	}
1012	} else if (ch == `'%'`) {
1013	styler.ColourTo(i - `1`, state);
1014	bool have_string = false;
1015	if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) {
1016	Quote.New();
1017	const char *hit = strchr(q_chars, chNext);
1018	if (hit != NULL) {
1019	state = q_states[hit - q_chars];
1020	Quote.Open(chNext2);
1021	i += `2`;
1022	ch = chNext2;
1023	chNext = styler.SafeGetCharAt(i + `1`);
1024	have_string = true;
1025	}
1026	} else if (preferRE && !isSafeWordcharOrHigh(chNext)) {
1027	// Ruby doesn't allow high bit chars here,
1028	// but the editor host might
1029	Quote.New();
1030	state = SCE_RB_STRING_QQ;
1031	Quote.Open(chNext);
1032	advance_char(i, ch, chNext, chNext2); // pass by ref
1033	have_string = true;
1034	} else if (!isSafeWordcharOrHigh(chNext) && !iswhitespace(chNext) && !isEOLChar(chNext)) {
1035	// Ruby doesn't allow high bit chars here,
1036	// but the editor host might
1037	Quote.New();
1038	state = SCE_RB_STRING_QQ;
1039	Quote.Open(chNext);
1040	advance_char(i, ch, chNext, chNext2); // pass by ref
1041	have_string = true;
1042	}
1043	if (!have_string) {
1044	styler.ColourTo(i, SCE_RB_OPERATOR);
1045	// stay in default
1046	preferRE = true;
1047	}
1048	} else if (ch == `'?'`) {
1049	styler.ColourTo(i - `1`, state);
1050	if (iswhitespace(chNext) \|\| chNext == `'\n'` \|\| chNext == `'\r'`) {
1051	styler.ColourTo(i, SCE_RB_OPERATOR);
1052	} else {
1053	// It's the start of a character code escape sequence
1054	// Color it as a number.
1055	state = SCE_RB_NUMBER;
1056	is_real_number = false;
1057	}
1058	} else if (isoperator(ch) \|\| ch == `'.'`) {
1059	styler.ColourTo(i - `1`, state);
1060	styler.ColourTo(i, SCE_RB_OPERATOR);
1061	// If we're ending an expression or block,
1062	// assume it ends an object, and the ambivalent
1063	// constructs are binary operators
1064	//
1065	// So if we don't have one of these chars,
1066	// we aren't ending an object exp'n, and ops
1067	// like : << / are unary operators.
1068
1069	if (ch == `'{'`) {
1070	++brace_counts;
1071	preferRE = true;
1072	} else if (ch == `'}'` && --brace_counts < `0`
1073	&& inner_string_count > `0`) {
1074	styler.ColourTo(i, SCE_RB_OPERATOR);
1075	exitInnerExpression(inner_string_types,
1076	inner_expn_brace_counts,
1077	inner_quotes,
1078	inner_string_count,
1079	state, brace_counts, Quote);
1080	} else {
1081	preferRE = (strchr(")}].", ch) == NULL);
1082	}
1083	// Stay in default state
1084	} else if (isEOLChar(ch)) {
1085	// Make sure it's a true line-end, with no backslash
1086	if ((ch == `'\r'` \|\| (ch == `'\n'` && chPrev != `'\r'`))
1087	&& chPrev != `'\\'`) {
1088	// Assume we've hit the end of the statement.
1089	preferRE = true;
1090	}
1091	}
1092	} else if (state == SCE_RB_WORD) {
1093	if (ch == `'.'` \|\| !isSafeWordcharOrHigh(ch)) {
1094	// Words include x? in all contexts,
1095	// and <letters>= after either 'def' or a dot
1096	// Move along until a complete word is on our left
1097
1098	// Default accessor treats '.' as word-chars,
1099	// but we don't for now.
1100
1101	if (ch == `'='`
1102	&& isSafeWordcharOrHigh(chPrev)
1103	&& (chNext == `'('`
1104	\|\| strchr(" \t\n\r", chNext) != NULL)
1105	&& (!strcmp(prevWord, "def")
1106	\|\| followsDot(styler.GetStartSegment(), styler))) {
1107	// <name>= is a name only when being def'd -- Get it the next time
1108	// This means that <name>=<name> is always lexed as
1109	// <name>, (op, =), <name>
1110	} else if (ch == `':'`
1111	&& isSafeWordcharOrHigh(chPrev)
1112	&& strchr(" \t\n\r", chNext) != NULL) {
1113	state = SCE_RB_SYMBOL;
1114	} else if ((ch == `'?'` \|\| ch == `'!'`)
1115	&& isSafeWordcharOrHigh(chPrev)
1116	&& !isSafeWordcharOrHigh(chNext)) {
1117	// <name>? is a name -- Get it the next time
1118	// But <name>?<name> is always lexed as
1119	// <name>, (op, ?), <name>
1120	// Same with <name>! to indicate a method that
1121	// modifies its target
1122	} else if (isEOLChar(ch)
1123	&& isMatch(styler, lengthDoc, i - `7`, "__END__")) {
1124	styler.ColourTo(i, SCE_RB_DATASECTION);
1125	state = SCE_RB_DATASECTION;
1126	// No need to handle this state -- we'll just move to the end
1127	preferRE = false;
1128	} else {
1129	Sci_Position wordStartPos = styler.GetStartSegment();
1130	int word_style = ClassifyWordRb(wordStartPos, i - `1`, keywords, styler, prevWord);
1131	switch (word_style) {
1132	case SCE_RB_WORD:
1133	preferRE = RE_CanFollowKeyword(prevWord);
1134	break;
1135
1136	case SCE_RB_WORD_DEMOTED:
1137	preferRE = true;
1138	break;
1139
1140	case SCE_RB_IDENTIFIER:
1141	if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
1142	preferRE = true;
1143	} else if (isEOLChar(ch)) {
1144	preferRE = true;
1145	} else {
1146	preferRE = false;
1147	}
1148	break;
1149	default:
1150	preferRE = false;
1151	}
1152	if (ch == `'.'`) {
1153	// We might be redefining an operator-method
1154	preferRE = false;
1155	}
1156	// And if it's the first
1157	redo_char(i, ch, chNext, chNext2, state); // pass by ref
1158	}
1159	}
1160	} else if (state == SCE_RB_NUMBER) {
1161	if (!is_real_number) {
1162	if (ch != `'\\'`) {
1163	styler.ColourTo(i, state);
1164	state = SCE_RB_DEFAULT;
1165	preferRE = false;
1166	} else if (strchr("\\ntrfvaebs", chNext)) {
1167	// Terminal escape sequence -- handle it next time
1168	// Nothing more to do this time through the loop
1169	} else if (chNext == `'C'` \|\| chNext == `'M'`) {
1170	if (chNext2 != `'-'`) {
1171	// \C or \M ends the sequence -- handle it next time
1172	} else {
1173	// Move from abc?\C-x
1174	// ^
1175	// to
1176	// ^
1177	i += `2`;
1178	ch = chNext2;
1179	chNext = styler.SafeGetCharAt(i + `1`);
1180	}
1181	} else if (chNext == `'c'`) {
1182	// Stay here, \c is a combining sequence
1183	advance_char(i, ch, chNext, chNext2); // pass by ref
1184	} else {
1185	// ?\x, including ?\\ is final.
1186	styler.ColourTo(i + `1`, state);
1187	state = SCE_RB_DEFAULT;
1188	preferRE = false;
1189	advance_char(i, ch, chNext, chNext2);
1190	}
1191	} else if (isSafeAlnumOrHigh(ch) \|\| ch == `'_'`) {
1192	// Keep going
1193	} else if (ch == `'.'` && chNext == `'.'`) {
1194	++numDots;
1195	styler.ColourTo(i - `1`, state);
1196	redo_char(i, ch, chNext, chNext2, state); // pass by ref
1197	} else if (ch == `'.'` && ++numDots == `1`) {
1198	// Keep going
1199	} else {
1200	styler.ColourTo(i - `1`, state);
1201	redo_char(i, ch, chNext, chNext2, state); // pass by ref
1202	preferRE = false;
1203	}
1204	} else if (state == SCE_RB_COMMENTLINE) {
1205	if (isEOLChar(ch)) {
1206	styler.ColourTo(i - `1`, state);
1207	state = SCE_RB_DEFAULT;
1208	// Use whatever setting we had going into the comment
1209	}
1210	} else if (state == SCE_RB_HERE_DELIM) {
1211	// See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1212	// Slightly different: if we find an immediate '-',
1213	// the target can appear indented.
1214
1215	if (HereDoc.State == `0`) { // '<<' encountered
1216	HereDoc.State = `1`;
1217	HereDoc.DelimiterLength = `0`;
1218	if (ch == `'-'` \|\| ch == `'~'`) {
1219	HereDoc.CanBeIndented = true;
1220	advance_char(i, ch, chNext, chNext2); // pass by ref
1221	} else {
1222	HereDoc.CanBeIndented = false;
1223	}
1224	if (isEOLChar(ch)) {
1225	// Bail out of doing a here doc if there's no target
1226	state = SCE_RB_DEFAULT;
1227	preferRE = false;
1228	} else {
1229	HereDoc.Quote = ch;
1230
1231	if (ch == `'\''` \|\| ch == `'"'` \|\| ch == '`') {
1232	HereDoc.Quoted = true;
1233	HereDoc.Delimiter[`0`] = `'\0'`;
1234	} else {
1235	HereDoc.Quoted = false;
1236	HereDoc.Delimiter[`0`] = ch;
1237	HereDoc.Delimiter[`1`] = `'\0'`;
1238	HereDoc.DelimiterLength = `1`;
1239	}
1240	}
1241	} else if (HereDoc.State == `1`) { // collect the delimiter
1242	if (isEOLChar(ch)) {
1243	// End the quote now, and go back for more
1244	styler.ColourTo(i - `1`, state);
1245	state = SCE_RB_DEFAULT;
1246	i--;
1247	chNext = ch;
1248	preferRE = false;
1249	} else if (HereDoc.Quoted) {
1250	if (ch == HereDoc.Quote) { // closing quote => end of delimiter
1251	styler.ColourTo(i, state);
1252	state = SCE_RB_DEFAULT;
1253	preferRE = false;
1254	} else {
1255	if (ch == `'\\'` && !isEOLChar(chNext)) {
1256	advance_char(i, ch, chNext, chNext2);
1257	}
1258	HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1259	HereDoc.Delimiter[HereDoc.DelimiterLength] = `'\0'`;
1260	}
1261	} else { // an unquoted here-doc delimiter
1262	if (isSafeAlnumOrHigh(ch) \|\| ch == `'_'`) {
1263	HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1264	HereDoc.Delimiter[HereDoc.DelimiterLength] = `'\0'`;
1265	} else {
1266	styler.ColourTo(i - `1`, state);
1267	redo_char(i, ch, chNext, chNext2, state);
1268	preferRE = false;
1269	}
1270	}
1271	if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - `1`) {
1272	styler.ColourTo(i - `1`, state);
1273	state = SCE_RB_ERROR;
1274	preferRE = false;
1275	}
1276	}
1277	} else if (state == SCE_RB_HERE_Q) {
1278	// Not needed: HereDoc.State == 2
1279	// Indentable here docs: look backwards
1280	// Non-indentable: look forwards, like in Perl
1281	//
1282	// Why: so we can quickly resolve things like <<-" abc"
1283
1284	if (!HereDoc.CanBeIndented) {
1285	if (isEOLChar(chPrev)
1286	&& isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
1287	styler.ColourTo(i - `1`, state);
1288	i += static_cast<Sci_Position>(HereDoc.DelimiterLength) - `1`;
1289	chNext = styler.SafeGetCharAt(i + `1`);
1290	if (isEOLChar(chNext)) {
1291	styler.ColourTo(i, SCE_RB_HERE_DELIM);
1292	state = SCE_RB_DEFAULT;
1293	HereDoc.State = `0`;
1294	preferRE = false;
1295	}
1296	// Otherwise we skipped through the here doc faster.
1297	}
1298	} else if (isEOLChar(chNext)
1299	&& lookingAtHereDocDelim(styler,
1300	i - HereDoc.DelimiterLength + `1`,
1301	lengthDoc,
1302	HereDoc.Delimiter)) {
1303	styler.ColourTo(i - `1` - HereDoc.DelimiterLength, state);
1304	styler.ColourTo(i, SCE_RB_HERE_DELIM);
1305	state = SCE_RB_DEFAULT;
1306	preferRE = false;
1307	HereDoc.State = `0`;
1308	}
1309	} else if (state == SCE_RB_CLASS_VAR
1310	\|\| state == SCE_RB_INSTANCE_VAR
1311	\|\| state == SCE_RB_SYMBOL) {
1312	if (state == SCE_RB_SYMBOL &&
1313	// FIDs suffices '?' and '!'
1314	(((ch == `'!'` \|\| ch == `'?'`) && chNext != `'='`) \|\|
1315	// identifier suffix '='
1316	(ch == `'='` && (chNext != `'~'` && chNext != `'>'` &&
1317	(chNext != `'='` \|\| chNext2 == `'>'`))))) {
1318	styler.ColourTo(i, state);
1319	state = SCE_RB_DEFAULT;
1320	preferRE = false;
1321	} else if (!isSafeWordcharOrHigh(ch)) {
1322	styler.ColourTo(i - `1`, state);
1323	redo_char(i, ch, chNext, chNext2, state); // pass by ref
1324	preferRE = false;
1325	}
1326	} else if (state == SCE_RB_GLOBAL) {
1327	if (!isSafeWordcharOrHigh(ch)) {
1328	// handle special globals here as well
1329	if (chPrev == `'$'`) {
1330	if (ch == `'-'`) {
1331	// Include the next char, like $-a
1332	advance_char(i, ch, chNext, chNext2);
1333	}
1334	styler.ColourTo(i, state);
1335	state = SCE_RB_DEFAULT;
1336	} else {
1337	styler.ColourTo(i - `1`, state);
1338	redo_char(i, ch, chNext, chNext2, state); // pass by ref
1339	}
1340	preferRE = false;
1341	}
1342	} else if (state == SCE_RB_POD) {
1343	// PODs end with ^=end\s, -- any whitespace can follow =end
1344	if (strchr(" \t\n\r", ch) != NULL
1345	&& i > `5`
1346	&& isEOLChar(styler [i - `5`])
1347	&& isMatch(styler, lengthDoc, i - `4`, "=end")) {
1348	styler.ColourTo(i - `1`, state);
1349	state = SCE_RB_DEFAULT;
1350	preferRE = false;
1351	}
1352	} else if (state == SCE_RB_REGEX \|\| state == SCE_RB_STRING_QR) {
1353	if (ch == `'\\'` && Quote.Up != `'\\'`) {
1354	// Skip one
1355	advance_char(i, ch, chNext, chNext2);
1356	} else if (ch == Quote.Down) {
1357	Quote.Count--;
1358	if (Quote.Count == `0`) {
1359	// Include the options
1360	while (isSafeAlpha(chNext)) {
1361	i++;
1362	ch = chNext;
1363	chNext = styler.SafeGetCharAt(i + `1`);
1364	}
1365	styler.ColourTo(i, state);
1366	state = SCE_RB_DEFAULT;
1367	preferRE = false;
1368	}
1369	} else if (ch == Quote.Up) {
1370	// Only if close quoter != open quoter
1371	Quote.Count++;
1372
1373	} else if (ch == `'#'`) {
1374	if (chNext == `'{'`
1375	&& inner_string_count < INNER_STRINGS_MAX_COUNT) {
1376	// process #{ ... }
1377	styler.ColourTo(i - `1`, state);
1378	styler.ColourTo(i + `1`, SCE_RB_OPERATOR);
1379	enterInnerExpression(inner_string_types,
1380	inner_expn_brace_counts,
1381	inner_quotes,
1382	inner_string_count,
1383	state,
1384	brace_counts,
1385	Quote);
1386	preferRE = true;
1387	// Skip one
1388	advance_char(i, ch, chNext, chNext2);
1389	} else {
1390	//todo: distinguish comments from pound chars
1391	// for now, handle as comment
1392	styler.ColourTo(i - `1`, state);
1393	bool inEscape = false;
1394	while (++i < lengthDoc) {
1395	ch = styler.SafeGetCharAt(i);
1396	if (ch == `'\\'`) {
1397	inEscape = true;
1398	} else if (isEOLChar(ch)) {
1399	// Comment inside a regex
1400	styler.ColourTo(i - `1`, SCE_RB_COMMENTLINE);
1401	break;
1402	} else if (inEscape) {
1403	inEscape = false; // don't look at char
1404	} else if (ch == Quote.Down) {
1405	// Have the regular handler deal with this
1406	// to get trailing modifiers.
1407	i--;
1408	ch = styler [i];
1409	break;
1410	}
1411	}
1412	chNext = styler.SafeGetCharAt(i + `1`);
1413	}
1414	}
1415	// Quotes of all kinds...
1416	} else if (state == SCE_RB_STRING_Q \|\| state == SCE_RB_STRING_QQ \|\|
1417	state == SCE_RB_STRING_QX \|\| state == SCE_RB_STRING_QW \|\|
1418	state == SCE_RB_STRING \|\| state == SCE_RB_CHARACTER \|\|
1419	state == SCE_RB_BACKTICKS) {
1420	if (!Quote.Down && !isspacechar(ch)) {
1421	Quote.Open(ch);
1422	} else if (ch == `'\\'` && Quote.Up != `'\\'`) {
1423	//Riddle me this: Is it safe to skip every* escaped char?*
1424	advance_char(i, ch, chNext, chNext2);
1425	} else if (ch == Quote.Down) {
1426	Quote.Count--;
1427	if (Quote.Count == `0`) {
1428	styler.ColourTo(i, state);
1429	state = SCE_RB_DEFAULT;
1430	preferRE = false;
1431	}
1432	} else if (ch == Quote.Up) {
1433	Quote.Count++;
1434	} else if (ch == `'#'` && chNext == `'{'`
1435	&& inner_string_count < INNER_STRINGS_MAX_COUNT
1436	&& state != SCE_RB_CHARACTER
1437	&& state != SCE_RB_STRING_Q) {
1438	// process #{ ... }
1439	styler.ColourTo(i - `1`, state);
1440	styler.ColourTo(i + `1`, SCE_RB_OPERATOR);
1441	enterInnerExpression(inner_string_types,
1442	inner_expn_brace_counts,
1443	inner_quotes,
1444	inner_string_count,
1445	state,
1446	brace_counts,
1447	Quote);
1448	preferRE = true;
1449	// Skip one
1450	advance_char(i, ch, chNext, chNext2);
1451	}
1452	}
1453
1454	if (state == SCE_RB_ERROR) {
1455	break;
1456	}
1457	chPrev = ch;
1458	}
1459	if (state == SCE_RB_WORD) {
1460	// We've ended on a word, possibly at EOF, and need to
1461	// classify it.
1462	(void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - `1`, keywords, styler, prevWord);
1463	} else {
1464	styler.ColourTo(lengthDoc - `1`, state);
1465	}
1466	}
1467
1468	// Helper functions for folding, disambiguation keywords
1469	// Assert that there are no high-bit chars
1470
1471	static void getPrevWord(Sci_Position pos,
1472	char *prevWord,
1473	Accessor &styler,
1474	int word_state)
1475	{
1476	Sci_Position i;
1477	styler.Flush();
1478	for (i = pos - `1`; i > `0`; i--) {
1479	if (actual_style(styler.StyleAt(i)) != word_state) {
1480	i++;
1481	break;
1482	}
1483	}
1484	if (i < pos - MAX_KEYWORD_LENGTH) // overflow
1485	i = pos - MAX_KEYWORD_LENGTH;
1486	char *dst = prevWord;
1487	for (; i <= pos; i++) {
1488	*dst++ = styler [i];
1489	}
1490	*dst = `0`;
1491	}
1492
1493	static bool keywordIsAmbiguous(const char *prevWord)
1494	{
1495	// Order from most likely used to least likely
1496	// Lots of ways to do a loop in Ruby besides 'while/until'
1497	if (!strcmp(prevWord, "if")
1498	\|\| !strcmp(prevWord, "do")
1499	\|\| !strcmp(prevWord, "while")
1500	\|\| !strcmp(prevWord, "unless")
1501	\|\| !strcmp(prevWord, "until")
1502	\|\| !strcmp(prevWord, "for")) {
1503	return true;
1504	} else {
1505	return false;
1506	}
1507	}
1508
1509	// Demote keywords in the following conditions:
1510	// if, while, unless, until modify a statement
1511	// do after a while or until, as a noise word (like then after if)
1512
1513	static bool keywordIsModifier(const char *word,
1514	Sci_Position pos,
1515	Accessor &styler)
1516	{
1517	if (word[`0`] == `'d'` && word[`1`] == `'o'` && !word[`2`]) {
1518	return keywordDoStartsLoop(pos, styler);
1519	}
1520	char ch, chPrev, chPrev2;
1521	int style = SCE_RB_DEFAULT;
1522	Sci_Position lineStart = styler.GetLine(pos);
1523	Sci_Position lineStartPosn = styler.LineStart(lineStart);
1524	// We want to step backwards until we don't care about the current
1525	// position. But first move lineStartPosn back behind any
1526	// continuations immediately above word.
1527	while (lineStartPosn > `0`) {
1528	ch = styler [lineStartPosn-`1`];
1529	if (ch == `'\n'` \|\| ch == `'\r'`) {
1530	chPrev = styler.SafeGetCharAt(lineStartPosn-`2`);
1531	chPrev2 = styler.SafeGetCharAt(lineStartPosn-`3`);
1532	lineStart = styler.GetLine(lineStartPosn-`1`);
1533	// If we find a continuation line, include it in our analysis.
1534	if (chPrev == `'\\'`) {
1535	lineStartPosn = styler.LineStart(lineStart);
1536	} else if (ch == `'\n'` && chPrev == `'\r'` && chPrev2 == `'\\'`) {
1537	lineStartPosn = styler.LineStart(lineStart);
1538	} else {
1539	break;
1540	}
1541	} else {
1542	break;
1543	}
1544	}
1545
1546	styler.Flush();
1547	while (--pos >= lineStartPosn) {
1548	style = actual_style(styler.StyleAt(pos));
1549	if (style == SCE_RB_DEFAULT) {
1550	if (iswhitespace(ch = styler [pos])) {
1551	//continue
1552	} else if (ch == `'\r'` \|\| ch == `'\n'`) {
1553	// Scintilla's LineStart() and GetLine() routines aren't
1554	// platform-independent, so if we have text prepared with
1555	// a different system we can't rely on it.
1556
1557	// Also, lineStartPosn may have been moved to more than one
1558	// line above word's line while pushing past continuations.
1559	chPrev = styler.SafeGetCharAt(pos - `1`);
1560	chPrev2 = styler.SafeGetCharAt(pos - `2`);
1561	if (chPrev == `'\\'`) {
1562	pos-=`1`; // gloss over the "\\"
1563	//continue
1564	} else if (ch == `'\n'` && chPrev == `'\r'` && chPrev2 == `'\\'`) {
1565	pos-=`2`; // gloss over the "\\\r"
1566	//continue
1567	} else {
1568	return false;
1569	}
1570	}
1571	} else {
1572	break;
1573	}
1574	}
1575	if (pos < lineStartPosn) {
1576	return false;
1577	}
1578	// First things where the action is unambiguous
1579	switch (style) {
1580	case SCE_RB_DEFAULT:
1581	case SCE_RB_COMMENTLINE:
1582	case SCE_RB_POD:
1583	case SCE_RB_CLASSNAME:
1584	case SCE_RB_DEFNAME:
1585	case SCE_RB_MODULE_NAME:
1586	return false;
1587	case SCE_RB_OPERATOR:
1588	break;
1589	case SCE_RB_WORD:
1590	// Watch out for uses of 'else if'
1591	//XXX: Make a list of other keywords where 'if' isn't a modifier
1592	// and can appear legitimately
1593	// Formulate this to avoid warnings from most compilers
1594	if (strcmp(word, "if") == `0`) {
1595	char prevWord[MAX_KEYWORD_LENGTH + `1`];
1596	getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1597	return strcmp(prevWord, "else") != `0`;
1598	}
1599	return true;
1600	default:
1601	return true;
1602	}
1603	// Assume that if the keyword follows an operator,
1604	// usually it's a block assignment, like
1605	// a << if x then y else z
1606
1607	ch = styler [pos];
1608	switch (ch) {
1609	case `')'`:
1610	case `']'`:
1611	case `'}'`:
1612	return true;
1613	default:
1614	return false;
1615	}
1616	}
1617
1618	#define WHILE_BACKWARDS "elihw"
1619	#define UNTIL_BACKWARDS "litnu"
1620	#define FOR_BACKWARDS "rof"
1621
1622	// Nothing fancy -- look to see if we follow a while/until somewhere
1623	// on the current line
1624
1625	static bool keywordDoStartsLoop(Sci_Position pos,
1626	Accessor &styler)
1627	{
1628	char ch;
1629	Sci_Position lineStart = styler.GetLine(pos);
1630	Sci_Position lineStartPosn = styler.LineStart(lineStart);
1631	styler.Flush();
1632	while (--pos >= lineStartPosn) {
1633	const int style = actual_style(styler.StyleAt(pos));
1634	if (style == SCE_RB_DEFAULT) {
1635	if ((ch = styler [pos]) == `'\r'` \|\| ch == `'\n'`) {
1636	// Scintilla's LineStart() and GetLine() routines aren't
1637	// platform-independent, so if we have text prepared with
1638	// a different system we can't rely on it.
1639	return false;
1640	}
1641	} else if (style == SCE_RB_WORD) {
1642	// Check for while or until, but write the word in backwards
1643	char prevWord[MAX_KEYWORD_LENGTH + `1`]; // 1 byte for zero
1644	char *dst = prevWord;
1645	int wordLen = `0`;
1646	Sci_Position start_word;
1647	for (start_word = pos;
1648	start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1649	start_word--) {
1650	if (++wordLen < MAX_KEYWORD_LENGTH) {
1651	*dst++ = styler [start_word];
1652	}
1653	}
1654	*dst = `0`;
1655	// Did we see our keyword?
1656	if (!strcmp(prevWord, WHILE_BACKWARDS)
1657	\|\| !strcmp(prevWord, UNTIL_BACKWARDS)
1658	\|\| !strcmp(prevWord, FOR_BACKWARDS)) {
1659	return true;
1660	}
1661	// We can move pos to the beginning of the keyword, and then
1662	// accept another decrement, as we can never have two contiguous
1663	// keywords:
1664	// word1 word2
1665	// ^
1666	// <- move to start_word
1667	// ^
1668	// <- loop decrement
1669	// ^ # pointing to end of word1 is fine
1670	pos = start_word;
1671	}
1672	}
1673	return false;
1674	}
1675
1676	static bool IsCommentLine(Sci_Position line, Accessor &styler) {
1677	Sci_Position pos = styler.LineStart(line);
1678	Sci_Position eol_pos = styler.LineStart(line + `1`) - `1`;
1679	for (Sci_Position i = pos; i < eol_pos; i++) {
1680	char ch = styler [i];
1681	if (ch == `'#'`)
1682	return true;
1683	else if (ch != `' '` && ch != `'\t'`)
1684	return false;
1685	}
1686	return false;
1687	}
1688
1689	/*
1690	* Folding Ruby
1691	*
1692	* The language is quite complex to analyze without a full parse.
1693	* For example, this line shouldn't affect fold level:
1694	*
1695	* print "hello" if feeling_friendly?
1696	*
1697	* Neither should this:
1698	*
1699	* print "hello" \
1700	* if feeling_friendly?
1701	*
1702	*
1703	* But this should:
1704	*
1705	* if feeling_friendly? #++
1706	* print "hello" \
1707	* print "goodbye"
1708	* end #--
1709	*
1710	* So we cheat, by actually looking at the existing indentation
1711	* levels for each line, and just echoing it back. Like Python.
1712	* Then if we get better at it, we'll take braces into consideration,
1713	* which always affect folding levels.
1714
1715	* How the keywords should work:
1716	* No effect:
1717	* __FILE__ __LINE__ BEGIN END alias and
1718	* defined? false in nil not or self super then
1719	* true undef
1720
1721	* Always increment:
1722	* begin class def do for module when {
1723	*
1724	* Always decrement:
1725	* end }
1726	*
1727	* Increment if these start a statement
1728	* if unless until while -- do nothing if they're modifiers
1729
1730	* These end a block if there's no modifier, but don't bother
1731	* break next redo retry return yield
1732	*
1733	* These temporarily de-indent, but re-indent
1734	* case else elsif ensure rescue
1735	*
1736	* This means that the folder reflects indentation rather
1737	* than setting it. The language-service updates indentation
1738	* when users type return and finishes entering de-denters.
1739	*
1740	* Later offer to fold POD, here-docs, strings, and blocks of comments
1741	*/
1742
1743	static void FoldRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
1744	WordList *[], Accessor &styler) {
1745	const bool foldCompact = styler.GetPropertyInt("fold.compact", `1`) != `0`;
1746	bool foldComment = styler.GetPropertyInt("fold.comment") != `0`;
1747
1748	synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1749	false);
1750	Sci_PositionU endPos = startPos + length;
1751	int visibleChars = `0`;
1752	Sci_Position lineCurrent = styler.GetLine(startPos);
1753	int levelPrev = startPos == `0` ? `0` : (styler.LevelAt(lineCurrent)
1754	& SC_FOLDLEVELNUMBERMASK
1755	& ~SC_FOLDLEVELBASE);
1756	int levelCurrent = levelPrev;
1757	char chNext = styler [startPos];
1758	int styleNext = styler.StyleAt(startPos);
1759	int stylePrev = startPos <= `1` ? SCE_RB_DEFAULT : styler.StyleAt(startPos - `1`);
1760	bool buffer_ends_with_eol = false;
1761	for (Sci_PositionU i = startPos; i < endPos; i++) {
1762	char ch = chNext;
1763	chNext = styler.SafeGetCharAt(i + `1`);
1764	int style = styleNext;
1765	styleNext = styler.StyleAt(i + `1`);
1766	bool atEOL = (ch == `'\r'` && chNext != `'\n'`) \|\| (ch == `'\n'`);
1767
1768	/Mutiline comment patch/
1769	if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1770	if (!IsCommentLine(lineCurrent - `1`, styler)
1771	&& IsCommentLine(lineCurrent + `1`, styler))
1772	levelCurrent++;
1773	else if (IsCommentLine(lineCurrent - `1`, styler)
1774	&& !IsCommentLine(lineCurrent + `1`, styler))
1775	levelCurrent--;
1776	}
1777
1778	if (style == SCE_RB_COMMENTLINE) {
1779	if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1780	if (chNext == `'{'`) {
1781	levelCurrent++;
1782	} else if (chNext == `'}'` && levelCurrent > `0`) {
1783	levelCurrent--;
1784	}
1785	}
1786	} else if (style == SCE_RB_OPERATOR) {
1787	if (strchr("[{(", ch)) {
1788	levelCurrent++;
1789	} else if (strchr(")}]", ch)) {
1790	// Don't decrement below 0
1791	if (levelCurrent > `0`)
1792	levelCurrent--;
1793	}
1794	} else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1795	// Look at the keyword on the left and decide what to do
1796	char prevWord[MAX_KEYWORD_LENGTH + `1`]; // 1 byte for zero
1797	prevWord[`0`] = `0`;
1798	getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1799	if (!strcmp(prevWord, "end")) {
1800	// Don't decrement below 0
1801	if (levelCurrent > `0`)
1802	levelCurrent--;
1803	} else if (!strcmp(prevWord, "if")
1804	\|\| !strcmp(prevWord, "def")
1805	\|\| !strcmp(prevWord, "class")
1806	\|\| !strcmp(prevWord, "module")
1807	\|\| !strcmp(prevWord, "begin")
1808	\|\| !strcmp(prevWord, "case")
1809	\|\| !strcmp(prevWord, "do")
1810	\|\| !strcmp(prevWord, "while")
1811	\|\| !strcmp(prevWord, "unless")
1812	\|\| !strcmp(prevWord, "until")
1813	\|\| !strcmp(prevWord, "for")
1814	) {
1815	levelCurrent++;
1816	}
1817	} else if (style == SCE_RB_HERE_DELIM) {
1818	if (styler.SafeGetCharAt(i-`2`) == `'<'` && styler.SafeGetCharAt(i-`1`) == `'<'`) {
1819	levelCurrent++;
1820	} else if (styleNext == SCE_RB_DEFAULT) {
1821	levelCurrent--;
1822	}
1823	}
1824	if (atEOL) {
1825	int lev = levelPrev;
1826	if (visibleChars == `0` && foldCompact)
1827	lev \|= SC_FOLDLEVELWHITEFLAG;
1828	if ((levelCurrent > levelPrev) && (visibleChars > `0`))
1829	lev \|= SC_FOLDLEVELHEADERFLAG;
1830	styler.SetLevel(lineCurrent, lev\|SC_FOLDLEVELBASE);
1831	lineCurrent++;
1832	levelPrev = levelCurrent;
1833	visibleChars = `0`;
1834	buffer_ends_with_eol = true;
1835	} else if (!isspacechar(ch)) {
1836	visibleChars++;
1837	buffer_ends_with_eol = false;
1838	}
1839	stylePrev = style;
1840	}
1841	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
1842	if (!buffer_ends_with_eol) {
1843	int new_lev = levelCurrent;
1844	if (visibleChars == `0` && foldCompact)
1845	new_lev \|= SC_FOLDLEVELWHITEFLAG;
1846	if ((levelCurrent > levelPrev) && (visibleChars > `0`))
1847	new_lev \|= SC_FOLDLEVELHEADERFLAG;
1848	levelCurrent = new_lev;
1849	}
1850	styler.SetLevel(lineCurrent, levelCurrent\|SC_FOLDLEVELBASE);
1851	}
1852
1853	static const char *const rubyWordListDesc[] = {
1854	"Keywords",
1855	`0`
1856	};
1857
1858	LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);
1859

Browse the source code of DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexRuby.cxx