| 1 | // Scintilla source code edit control |
| 2 | /** @file LexPO.cxx |
| 3 | ** Lexer for GetText Translation (PO) files. |
| 4 | **/ |
| 5 | // Copyright 2012 by Colomban Wendling <ban@herbesfolles.org> |
| 6 | // The License.txt file describes the conditions under which this software may be distributed. |
| 7 | |
| 8 | // see https://www.gnu.org/software/gettext/manual/gettext.html#PO-Files for the syntax reference |
| 9 | // some details are taken from the GNU msgfmt behavior (like that indent is allows in front of lines) |
| 10 | |
| 11 | // TODO: |
| 12 | // * add keywords for flags (fuzzy, c-format, ...) |
| 13 | // * highlight formats inside c-format strings (%s, %d, etc.) |
| 14 | // * style for previous untranslated string? ("#|" comment) |
| 15 | |
| 16 | #include <stdlib.h> |
| 17 | #include <string.h> |
| 18 | #include <stdio.h> |
| 19 | #include <stdarg.h> |
| 20 | #include <assert.h> |
| 21 | #include <ctype.h> |
| 22 | |
| 23 | #include <string> |
| 24 | #include <string_view> |
| 25 | |
| 26 | #include "ILexer.h" |
| 27 | #include "Scintilla.h" |
| 28 | #include "SciLexer.h" |
| 29 | |
| 30 | #include "WordList.h" |
| 31 | #include "LexAccessor.h" |
| 32 | #include "Accessor.h" |
| 33 | #include "StyleContext.h" |
| 34 | #include "CharacterSet.h" |
| 35 | #include "LexerModule.h" |
| 36 | |
| 37 | using namespace Lexilla; |
| 38 | |
| 39 | static void ColourisePODoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *[], Accessor &styler) { |
| 40 | StyleContext sc(startPos, length, initStyle, styler); |
| 41 | bool escaped = false; |
| 42 | Sci_Position curLine = styler.GetLine(startPos); |
| 43 | // the line state holds the last state on or before the line that isn't the default style |
| 44 | int curLineState = curLine > 0 ? styler.GetLineState(curLine - 1) : SCE_PO_DEFAULT; |
| 45 | |
| 46 | for (; sc.More(); sc.Forward()) { |
| 47 | // whether we should leave a state |
| 48 | switch (sc.state) { |
| 49 | case SCE_PO_COMMENT: |
| 50 | case SCE_PO_PROGRAMMER_COMMENT: |
| 51 | case SCE_PO_REFERENCE: |
| 52 | case SCE_PO_FLAGS: |
| 53 | case SCE_PO_FUZZY: |
| 54 | if (sc.atLineEnd) |
| 55 | sc.SetState(SCE_PO_DEFAULT); |
| 56 | else if (sc.state == SCE_PO_FLAGS && sc.Match("fuzzy" )) |
| 57 | // here we behave like the previous parser, but this should probably be highlighted |
| 58 | // on its own like a keyword rather than changing the whole flags style |
| 59 | sc.ChangeState(SCE_PO_FUZZY); |
| 60 | break; |
| 61 | |
| 62 | case SCE_PO_MSGCTXT: |
| 63 | case SCE_PO_MSGID: |
| 64 | case SCE_PO_MSGSTR: |
| 65 | if (isspacechar(sc.ch)) |
| 66 | sc.SetState(SCE_PO_DEFAULT); |
| 67 | break; |
| 68 | |
| 69 | case SCE_PO_ERROR: |
| 70 | if (sc.atLineEnd) |
| 71 | sc.SetState(SCE_PO_DEFAULT); |
| 72 | break; |
| 73 | |
| 74 | case SCE_PO_MSGCTXT_TEXT: |
| 75 | case SCE_PO_MSGID_TEXT: |
| 76 | case SCE_PO_MSGSTR_TEXT: |
| 77 | if (sc.atLineEnd) { // invalid inside a string |
| 78 | if (sc.state == SCE_PO_MSGCTXT_TEXT) |
| 79 | sc.ChangeState(SCE_PO_MSGCTXT_TEXT_EOL); |
| 80 | else if (sc.state == SCE_PO_MSGID_TEXT) |
| 81 | sc.ChangeState(SCE_PO_MSGID_TEXT_EOL); |
| 82 | else if (sc.state == SCE_PO_MSGSTR_TEXT) |
| 83 | sc.ChangeState(SCE_PO_MSGSTR_TEXT_EOL); |
| 84 | sc.SetState(SCE_PO_DEFAULT); |
| 85 | escaped = false; |
| 86 | } else { |
| 87 | if (escaped) |
| 88 | escaped = false; |
| 89 | else if (sc.ch == '\\') |
| 90 | escaped = true; |
| 91 | else if (sc.ch == '"') |
| 92 | sc.ForwardSetState(SCE_PO_DEFAULT); |
| 93 | } |
| 94 | break; |
| 95 | } |
| 96 | |
| 97 | // whether we should enter a new state |
| 98 | if (sc.state == SCE_PO_DEFAULT) { |
| 99 | // forward to the first non-white character on the line |
| 100 | bool atLineStart = sc.atLineStart; |
| 101 | if (atLineStart) { |
| 102 | // reset line state if it is set to comment state so empty lines don't get |
| 103 | // comment line state, and the folding code folds comments separately, |
| 104 | // and anyway the styling don't use line state for comments |
| 105 | if (curLineState == SCE_PO_COMMENT) |
| 106 | curLineState = SCE_PO_DEFAULT; |
| 107 | |
| 108 | while (sc.More() && ! sc.atLineEnd && isspacechar(sc.ch)) |
| 109 | sc.Forward(); |
| 110 | } |
| 111 | |
| 112 | if (atLineStart && sc.ch == '#') { |
| 113 | if (sc.chNext == '.') |
| 114 | sc.SetState(SCE_PO_PROGRAMMER_COMMENT); |
| 115 | else if (sc.chNext == ':') |
| 116 | sc.SetState(SCE_PO_REFERENCE); |
| 117 | else if (sc.chNext == ',') |
| 118 | sc.SetState(SCE_PO_FLAGS); |
| 119 | else |
| 120 | sc.SetState(SCE_PO_COMMENT); |
| 121 | } else if (atLineStart && sc.Match("msgid" )) { // includes msgid_plural |
| 122 | sc.SetState(SCE_PO_MSGID); |
| 123 | } else if (atLineStart && sc.Match("msgstr" )) { // includes [] suffixes |
| 124 | sc.SetState(SCE_PO_MSGSTR); |
| 125 | } else if (atLineStart && sc.Match("msgctxt" )) { |
| 126 | sc.SetState(SCE_PO_MSGCTXT); |
| 127 | } else if (sc.ch == '"') { |
| 128 | if (curLineState == SCE_PO_MSGCTXT || curLineState == SCE_PO_MSGCTXT_TEXT) |
| 129 | sc.SetState(SCE_PO_MSGCTXT_TEXT); |
| 130 | else if (curLineState == SCE_PO_MSGID || curLineState == SCE_PO_MSGID_TEXT) |
| 131 | sc.SetState(SCE_PO_MSGID_TEXT); |
| 132 | else if (curLineState == SCE_PO_MSGSTR || curLineState == SCE_PO_MSGSTR_TEXT) |
| 133 | sc.SetState(SCE_PO_MSGSTR_TEXT); |
| 134 | else |
| 135 | sc.SetState(SCE_PO_ERROR); |
| 136 | } else if (! isspacechar(sc.ch)) |
| 137 | sc.SetState(SCE_PO_ERROR); |
| 138 | |
| 139 | if (sc.state != SCE_PO_DEFAULT) |
| 140 | curLineState = sc.state; |
| 141 | } |
| 142 | |
| 143 | if (sc.atLineEnd) { |
| 144 | // Update the line state, so it can be seen by next line |
| 145 | curLine = styler.GetLine(sc.currentPos); |
| 146 | styler.SetLineState(curLine, curLineState); |
| 147 | } |
| 148 | } |
| 149 | sc.Complete(); |
| 150 | } |
| 151 | |
| 152 | static int FindNextNonEmptyLineState(Sci_PositionU startPos, Accessor &styler) { |
| 153 | Sci_PositionU length = styler.Length(); |
| 154 | for (Sci_PositionU i = startPos; i < length; i++) { |
| 155 | if (! isspacechar(styler[i])) { |
| 156 | return styler.GetLineState(styler.GetLine(i)); |
| 157 | } |
| 158 | } |
| 159 | return 0; |
| 160 | } |
| 161 | |
| 162 | static void FoldPODoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler) { |
| 163 | if (! styler.GetPropertyInt("fold" )) |
| 164 | return; |
| 165 | bool foldCompact = styler.GetPropertyInt("fold.compact" ) != 0; |
| 166 | bool = styler.GetPropertyInt("fold.comment" ) != 0; |
| 167 | |
| 168 | Sci_PositionU endPos = startPos + length; |
| 169 | Sci_Position curLine = styler.GetLine(startPos); |
| 170 | int lineState = styler.GetLineState(curLine); |
| 171 | int nextLineState; |
| 172 | int level = styler.LevelAt(curLine) & SC_FOLDLEVELNUMBERMASK; |
| 173 | int nextLevel; |
| 174 | int visible = 0; |
| 175 | int chNext = styler[startPos]; |
| 176 | |
| 177 | for (Sci_PositionU i = startPos; i < endPos; i++) { |
| 178 | int ch = chNext; |
| 179 | chNext = styler.SafeGetCharAt(i+1); |
| 180 | |
| 181 | if (! isspacechar(ch)) { |
| 182 | visible++; |
| 183 | } else if ((ch == '\r' && chNext != '\n') || ch == '\n' || i+1 >= endPos) { |
| 184 | int lvl = level; |
| 185 | Sci_Position nextLine = curLine + 1; |
| 186 | |
| 187 | nextLineState = styler.GetLineState(nextLine); |
| 188 | if ((lineState != SCE_PO_COMMENT || foldComment) && |
| 189 | nextLineState == lineState && |
| 190 | FindNextNonEmptyLineState(i, styler) == lineState) |
| 191 | nextLevel = SC_FOLDLEVELBASE + 1; |
| 192 | else |
| 193 | nextLevel = SC_FOLDLEVELBASE; |
| 194 | |
| 195 | if (nextLevel > level) |
| 196 | lvl |= SC_FOLDLEVELHEADERFLAG; |
| 197 | if (visible == 0 && foldCompact) |
| 198 | lvl |= SC_FOLDLEVELWHITEFLAG; |
| 199 | |
| 200 | styler.SetLevel(curLine, lvl); |
| 201 | |
| 202 | lineState = nextLineState; |
| 203 | curLine = nextLine; |
| 204 | level = nextLevel; |
| 205 | visible = 0; |
| 206 | } |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | static const char *const poWordListDesc[] = { |
| 211 | 0 |
| 212 | }; |
| 213 | |
| 214 | LexerModule lmPO(SCLEX_PO, ColourisePODoc, "po" , FoldPODoc, poWordListDesc); |
| 215 | |