1 | // Scintilla source code edit control |
2 | /** @file Document.cxx |
3 | ** Text document that handles notifications, DBCS, styling, words and end of line. |
4 | **/ |
5 | // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org> |
6 | // The License.txt file describes the conditions under which this software may be distributed. |
7 | |
8 | #include <cstddef> |
9 | #include <cstdlib> |
10 | #include <cassert> |
11 | #include <cstring> |
12 | #include <cstdio> |
13 | #include <cmath> |
14 | |
15 | #include <stdexcept> |
16 | #include <string> |
17 | #include <string_view> |
18 | #include <vector> |
19 | #include <forward_list> |
20 | #include <optional> |
21 | #include <algorithm> |
22 | #include <memory> |
23 | #include <chrono> |
24 | |
25 | #ifndef NO_CXX11_REGEX |
26 | #include <regex> |
27 | #endif |
28 | |
29 | #include "ScintillaTypes.h" |
30 | #include "ILoader.h" |
31 | #include "ILexer.h" |
32 | |
33 | #include "Debugging.h" |
34 | |
35 | #include "CharacterType.h" |
36 | #include "CharacterCategoryMap.h" |
37 | #include "Position.h" |
38 | #include "SplitVector.h" |
39 | #include "Partitioning.h" |
40 | #include "RunStyles.h" |
41 | #include "CellBuffer.h" |
42 | #include "PerLine.h" |
43 | #include "CharClassify.h" |
44 | #include "Decoration.h" |
45 | #include "CaseFolder.h" |
46 | #include "Document.h" |
47 | #include "RESearch.h" |
48 | #include "UniConversion.h" |
49 | #include "ElapsedPeriod.h" |
50 | |
51 | using namespace Scintilla; |
52 | using namespace Scintilla::Internal; |
53 | |
54 | LexInterface::LexInterface(Document *pdoc_) noexcept : pdoc(pdoc_), performingStyle(false) { |
55 | } |
56 | |
57 | LexInterface::~LexInterface() noexcept = default; |
58 | |
59 | void LexInterface::SetInstance(ILexer5 *instance_) { |
60 | instance.reset(instance_); |
61 | pdoc->LexerChanged(); |
62 | } |
63 | |
64 | void LexInterface::Colourise(Sci::Position start, Sci::Position end) { |
65 | if (pdoc && instance && !performingStyle) { |
66 | // Protect against reentrance, which may occur, for example, when |
67 | // fold points are discovered while performing styling and the folding |
68 | // code looks for child lines which may trigger styling. |
69 | performingStyle = true; |
70 | |
71 | const Sci::Position lengthDoc = pdoc->Length(); |
72 | if (end == -1) |
73 | end = lengthDoc; |
74 | const Sci::Position len = end - start; |
75 | |
76 | PLATFORM_ASSERT(len >= 0); |
77 | PLATFORM_ASSERT(start + len <= lengthDoc); |
78 | |
79 | int styleStart = 0; |
80 | if (start > 0) |
81 | styleStart = pdoc->StyleAt(start - 1); |
82 | |
83 | if (len > 0) { |
84 | instance->Lex(start, len, styleStart, pdoc); |
85 | instance->Fold(start, len, styleStart, pdoc); |
86 | } |
87 | |
88 | performingStyle = false; |
89 | } |
90 | } |
91 | |
92 | LineEndType LexInterface::LineEndTypesSupported() { |
93 | if (instance) { |
94 | return static_cast<LineEndType>(instance->LineEndTypesSupported()); |
95 | } |
96 | return LineEndType::Default; |
97 | } |
98 | |
99 | bool LexInterface::UseContainerLexing() const noexcept { |
100 | return !instance; |
101 | } |
102 | |
103 | ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept : |
104 | duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) { |
105 | } |
106 | |
107 | void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept { |
108 | // Only adjust for multiple actions to avoid instability |
109 | if (numberActions < 8) |
110 | return; |
111 | |
112 | // Alpha value for exponential smoothing. |
113 | // Most recent value contributes 25% to smoothed value. |
114 | constexpr double alpha = 0.25; |
115 | |
116 | const double durationOne = durationOfActions / numberActions; |
117 | duration = std::clamp(alpha * durationOne + (1.0 - alpha) * duration, |
118 | minDuration, maxDuration); |
119 | } |
120 | |
121 | double ActionDuration::Duration() const noexcept { |
122 | return duration; |
123 | } |
124 | |
125 | size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcept { |
126 | return std::lround(secondsAllowed / Duration()); |
127 | } |
128 | |
129 | Document::Document(DocumentOption options) : |
130 | cb(!FlagSet(options, DocumentOption::StylesNone), FlagSet(options, DocumentOption::TextLarge)), |
131 | durationStyleOneByte(0.000001, 0.0000001, 0.00001) { |
132 | refCount = 0; |
133 | #ifdef _WIN32 |
134 | eolMode = EndOfLine::CrLf; |
135 | #else |
136 | eolMode = EndOfLine::Lf; |
137 | #endif |
138 | dbcsCodePage = CpUtf8; |
139 | lineEndBitSet = LineEndType::Default; |
140 | endStyled = 0; |
141 | styleClock = 0; |
142 | enteredModification = 0; |
143 | enteredStyling = 0; |
144 | enteredReadOnlyCount = 0; |
145 | insertionSet = false; |
146 | tabInChars = 8; |
147 | indentInChars = 0; |
148 | actualIndentInChars = 8; |
149 | useTabs = true; |
150 | tabIndents = true; |
151 | backspaceUnindents = false; |
152 | |
153 | matchesValid = false; |
154 | |
155 | perLineData[ldMarkers] = std::make_unique<LineMarkers>(); |
156 | perLineData[ldLevels] = std::make_unique<LineLevels>(); |
157 | perLineData[ldState] = std::make_unique<LineState>(); |
158 | perLineData[ldMargin] = std::make_unique<LineAnnotation>(); |
159 | perLineData[ldAnnotation] = std::make_unique<LineAnnotation>(); |
160 | perLineData[ldEOLAnnotation] = std::make_unique<LineAnnotation>(); |
161 | |
162 | decorations = DecorationListCreate(IsLarge()); |
163 | |
164 | cb.SetPerLine(this); |
165 | cb.SetUTF8Substance(CpUtf8 == dbcsCodePage); |
166 | } |
167 | |
168 | Document::~Document() { |
169 | for (const WatcherWithUserData &watcher : watchers) { |
170 | watcher.watcher->NotifyDeleted(this, watcher.userData); |
171 | } |
172 | } |
173 | |
174 | // Increase reference count and return its previous value. |
175 | int Document::AddRef() { |
176 | return refCount++; |
177 | } |
178 | |
179 | // Decrease reference count and return its previous value. |
180 | // Delete the document if reference count reaches zero. |
181 | int SCI_METHOD Document::Release() { |
182 | const int curRefCount = --refCount; |
183 | if (curRefCount == 0) |
184 | delete this; |
185 | return curRefCount; |
186 | } |
187 | |
188 | void Document::Init() { |
189 | for (const std::unique_ptr<PerLine> &pl : perLineData) { |
190 | if (pl) |
191 | pl->Init(); |
192 | } |
193 | } |
194 | |
195 | void Document::InsertLine(Sci::Line line) { |
196 | for (const std::unique_ptr<PerLine> &pl : perLineData) { |
197 | if (pl) |
198 | pl->InsertLine(line); |
199 | } |
200 | } |
201 | |
202 | void Document::InsertLines(Sci::Line line, Sci::Line lines) { |
203 | for (const auto &pl : perLineData) { |
204 | if (pl) |
205 | pl->InsertLines(line, lines); |
206 | } |
207 | } |
208 | |
209 | void Document::RemoveLine(Sci::Line line) { |
210 | for (const std::unique_ptr<PerLine> &pl : perLineData) { |
211 | if (pl) |
212 | pl->RemoveLine(line); |
213 | } |
214 | } |
215 | |
216 | LineMarkers *Document::Markers() const noexcept { |
217 | return dynamic_cast<LineMarkers *>(perLineData[ldMarkers].get()); |
218 | } |
219 | |
220 | LineLevels *Document::Levels() const noexcept { |
221 | return dynamic_cast<LineLevels *>(perLineData[ldLevels].get()); |
222 | } |
223 | |
224 | LineState *Document::States() const noexcept { |
225 | return dynamic_cast<LineState *>(perLineData[ldState].get()); |
226 | } |
227 | |
228 | LineAnnotation *Document::Margins() const noexcept { |
229 | return dynamic_cast<LineAnnotation *>(perLineData[ldMargin].get()); |
230 | } |
231 | |
232 | LineAnnotation *Document::Annotations() const noexcept { |
233 | return dynamic_cast<LineAnnotation *>(perLineData[ldAnnotation].get()); |
234 | } |
235 | |
236 | LineAnnotation *Document::EOLAnnotations() const noexcept { |
237 | return dynamic_cast<LineAnnotation *>(perLineData[ldEOLAnnotation].get()); |
238 | } |
239 | |
240 | LineEndType Document::LineEndTypesSupported() const { |
241 | if ((CpUtf8 == dbcsCodePage) && pli) |
242 | return pli->LineEndTypesSupported(); |
243 | else |
244 | return LineEndType::Default; |
245 | } |
246 | |
247 | bool Document::SetDBCSCodePage(int dbcsCodePage_) { |
248 | if (dbcsCodePage != dbcsCodePage_) { |
249 | dbcsCodePage = dbcsCodePage_; |
250 | SetCaseFolder(nullptr); |
251 | cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported()); |
252 | cb.SetUTF8Substance(CpUtf8 == dbcsCodePage); |
253 | ModifiedAt(0); // Need to restyle whole document |
254 | return true; |
255 | } else { |
256 | return false; |
257 | } |
258 | } |
259 | |
260 | bool Document::SetLineEndTypesAllowed(LineEndType lineEndBitSet_) { |
261 | if (lineEndBitSet != lineEndBitSet_) { |
262 | lineEndBitSet = lineEndBitSet_; |
263 | const LineEndType lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported(); |
264 | if (lineEndBitSetActive != cb.GetLineEndTypes()) { |
265 | ModifiedAt(0); |
266 | cb.SetLineEndTypes(lineEndBitSetActive); |
267 | return true; |
268 | } else { |
269 | return false; |
270 | } |
271 | } else { |
272 | return false; |
273 | } |
274 | } |
275 | |
276 | void Document::SetSavePoint() { |
277 | cb.SetSavePoint(); |
278 | NotifySavePoint(true); |
279 | } |
280 | |
281 | void Document::TentativeUndo() { |
282 | if (!TentativeActive()) |
283 | return; |
284 | CheckReadOnly(); |
285 | if (enteredModification == 0) { |
286 | enteredModification++; |
287 | if (!cb.IsReadOnly()) { |
288 | const bool startSavePoint = cb.IsSavePoint(); |
289 | bool multiLine = false; |
290 | const int steps = cb.TentativeSteps(); |
291 | //Platform::DebugPrintf("Steps=%d\n", steps); |
292 | for (int step = 0; step < steps; step++) { |
293 | const Sci::Line prevLinesTotal = LinesTotal(); |
294 | const Action &action = cb.GetUndoStep(); |
295 | if (action.at == ActionType::remove) { |
296 | NotifyModified(DocModification( |
297 | ModificationFlags::BeforeInsert | ModificationFlags::Undo, action)); |
298 | } else if (action.at == ActionType::container) { |
299 | DocModification dm(ModificationFlags::Container | ModificationFlags::Undo); |
300 | dm.token = action.position; |
301 | NotifyModified(dm); |
302 | } else { |
303 | NotifyModified(DocModification( |
304 | ModificationFlags::BeforeDelete | ModificationFlags::Undo, action)); |
305 | } |
306 | cb.PerformUndoStep(); |
307 | if (action.at != ActionType::container) { |
308 | ModifiedAt(action.position); |
309 | } |
310 | |
311 | ModificationFlags modFlags = ModificationFlags::Undo; |
312 | // With undo, an insertion action becomes a deletion notification |
313 | if (action.at == ActionType::remove) { |
314 | modFlags |= ModificationFlags::InsertText; |
315 | } else if (action.at == ActionType::insert) { |
316 | modFlags |= ModificationFlags::DeleteText; |
317 | } |
318 | if (steps > 1) |
319 | modFlags |= ModificationFlags::MultiStepUndoRedo; |
320 | const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; |
321 | if (linesAdded != 0) |
322 | multiLine = true; |
323 | if (step == steps - 1) { |
324 | modFlags |= ModificationFlags::LastStepInUndoRedo; |
325 | if (multiLine) |
326 | modFlags |= ModificationFlags::MultilineUndoRedo; |
327 | } |
328 | NotifyModified(DocModification(modFlags, action.position, action.lenData, |
329 | linesAdded, action.data.get())); |
330 | } |
331 | |
332 | const bool endSavePoint = cb.IsSavePoint(); |
333 | if (startSavePoint != endSavePoint) |
334 | NotifySavePoint(endSavePoint); |
335 | |
336 | cb.TentativeCommit(); |
337 | } |
338 | enteredModification--; |
339 | } |
340 | } |
341 | |
342 | int Document::GetMark(Sci::Line line) const noexcept { |
343 | return Markers()->MarkValue(line); |
344 | } |
345 | |
346 | Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept { |
347 | return Markers()->MarkerNext(lineStart, mask); |
348 | } |
349 | |
350 | int Document::AddMark(Sci::Line line, int markerNum) { |
351 | if (line >= 0 && line <= LinesTotal()) { |
352 | const int prev = Markers()->AddMark(line, markerNum, LinesTotal()); |
353 | const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); |
354 | NotifyModified(mh); |
355 | return prev; |
356 | } else { |
357 | return -1; |
358 | } |
359 | } |
360 | |
361 | void Document::AddMarkSet(Sci::Line line, int valueSet) { |
362 | if (line < 0 || line > LinesTotal()) { |
363 | return; |
364 | } |
365 | unsigned int m = valueSet; |
366 | for (int i = 0; m; i++, m >>= 1) { |
367 | if (m & 1) |
368 | Markers()->AddMark(line, i, LinesTotal()); |
369 | } |
370 | const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); |
371 | NotifyModified(mh); |
372 | } |
373 | |
374 | void Document::DeleteMark(Sci::Line line, int markerNum) { |
375 | Markers()->DeleteMark(line, markerNum, false); |
376 | const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); |
377 | NotifyModified(mh); |
378 | } |
379 | |
380 | void Document::DeleteMarkFromHandle(int markerHandle) { |
381 | Markers()->DeleteMarkFromHandle(markerHandle); |
382 | DocModification mh(ModificationFlags::ChangeMarker); |
383 | mh.line = -1; |
384 | NotifyModified(mh); |
385 | } |
386 | |
387 | void Document::DeleteAllMarks(int markerNum) { |
388 | bool someChanges = false; |
389 | for (Sci::Line line = 0; line < LinesTotal(); line++) { |
390 | if (Markers()->DeleteMark(line, markerNum, true)) |
391 | someChanges = true; |
392 | } |
393 | if (someChanges) { |
394 | DocModification mh(ModificationFlags::ChangeMarker); |
395 | mh.line = -1; |
396 | NotifyModified(mh); |
397 | } |
398 | } |
399 | |
400 | Sci::Line Document::LineFromHandle(int markerHandle) const noexcept { |
401 | return Markers()->LineFromHandle(markerHandle); |
402 | } |
403 | |
404 | int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept { |
405 | return Markers()->NumberFromLine(line, which); |
406 | } |
407 | |
408 | int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept { |
409 | return Markers()->HandleFromLine(line, which); |
410 | } |
411 | |
412 | Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const { |
413 | return cb.LineStart(line); |
414 | } |
415 | |
416 | bool Document::IsLineStartPosition(Sci::Position position) const { |
417 | return LineStart(LineFromPosition(position)) == position; |
418 | } |
419 | |
420 | Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const { |
421 | if (line >= LinesTotal() - 1) { |
422 | return LineStart(line + 1); |
423 | } else { |
424 | Sci::Position position = LineStart(line + 1); |
425 | if (LineEndType::Unicode == cb.GetLineEndTypes()) { |
426 | const unsigned char bytes[] = { |
427 | cb.UCharAt(position-3), |
428 | cb.UCharAt(position-2), |
429 | cb.UCharAt(position-1), |
430 | }; |
431 | if (UTF8IsSeparator(bytes)) { |
432 | return position - UTF8SeparatorLength; |
433 | } |
434 | if (UTF8IsNEL(bytes+1)) { |
435 | return position - UTF8NELLength; |
436 | } |
437 | } |
438 | position--; // Back over CR or LF |
439 | // When line terminator is CR+LF, may need to go back one more |
440 | if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) { |
441 | position--; |
442 | } |
443 | return position; |
444 | } |
445 | } |
446 | |
447 | void SCI_METHOD Document::SetErrorStatus(int status) { |
448 | // Tell the watchers an error has occurred. |
449 | for (const WatcherWithUserData &watcher : watchers) { |
450 | watcher.watcher->NotifyErrorOccurred(this, watcher.userData, static_cast<Status>(status)); |
451 | } |
452 | } |
453 | |
454 | Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const { |
455 | return cb.LineFromPosition(pos); |
456 | } |
457 | |
458 | Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept { |
459 | // Avoids casting in callers for this very common function |
460 | return cb.LineFromPosition(pos); |
461 | } |
462 | |
463 | Sci::Position Document::LineEndPosition(Sci::Position position) const { |
464 | return LineEnd(LineFromPosition(position)); |
465 | } |
466 | |
467 | bool Document::IsLineEndPosition(Sci::Position position) const { |
468 | return LineEnd(LineFromPosition(position)) == position; |
469 | } |
470 | |
471 | bool Document::IsPositionInLineEnd(Sci::Position position) const { |
472 | return position >= LineEnd(LineFromPosition(position)); |
473 | } |
474 | |
475 | Sci::Position Document::VCHomePosition(Sci::Position position) const { |
476 | const Sci::Line line = SciLineFromPosition(position); |
477 | const Sci::Position startPosition = LineStart(line); |
478 | const Sci::Position endLine = LineEnd(line); |
479 | Sci::Position startText = startPosition; |
480 | while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t')) |
481 | startText++; |
482 | if (position == startText) |
483 | return startPosition; |
484 | else |
485 | return startText; |
486 | } |
487 | |
488 | Sci::Position Document::IndexLineStart(Sci::Line line, LineCharacterIndexType lineCharacterIndex) const noexcept { |
489 | return cb.IndexLineStart(line, lineCharacterIndex); |
490 | } |
491 | |
492 | Sci::Line Document::LineFromPositionIndex(Sci::Position pos, LineCharacterIndexType lineCharacterIndex) const noexcept { |
493 | return cb.LineFromPositionIndex(pos, lineCharacterIndex); |
494 | } |
495 | |
496 | Sci::Line Document::LineFromPositionAfter(Sci::Line line, Sci::Position length) const noexcept { |
497 | const Sci::Position posAfter = cb.LineStart(line) + length; |
498 | if (posAfter >= LengthNoExcept()) { |
499 | return LinesTotal(); |
500 | } |
501 | const Sci::Line lineAfter = SciLineFromPosition(posAfter); |
502 | if (lineAfter > line) { |
503 | return lineAfter; |
504 | } else { |
505 | // Want to make some progress so return next line |
506 | return lineAfter + 1; |
507 | } |
508 | } |
509 | |
510 | int SCI_METHOD Document::SetLevel(Sci_Position line, int level) { |
511 | const int prev = Levels()->SetLevel(line, level, LinesTotal()); |
512 | if (prev != level) { |
513 | DocModification mh(ModificationFlags::ChangeFold | ModificationFlags::ChangeMarker, |
514 | LineStart(line), 0, 0, nullptr, line); |
515 | mh.foldLevelNow = static_cast<FoldLevel>(level); |
516 | mh.foldLevelPrev = static_cast<FoldLevel>(prev); |
517 | NotifyModified(mh); |
518 | } |
519 | return prev; |
520 | } |
521 | |
522 | int SCI_METHOD Document::GetLevel(Sci_Position line) const { |
523 | return Levels()->GetLevel(line); |
524 | } |
525 | |
526 | FoldLevel Document::GetFoldLevel(Sci_Position line) const { |
527 | return static_cast<FoldLevel>(Levels()->GetLevel(line)); |
528 | } |
529 | |
530 | void Document::ClearLevels() { |
531 | Levels()->ClearLevels(); |
532 | } |
533 | |
534 | static bool IsSubordinate(FoldLevel levelStart, FoldLevel levelTry) noexcept { |
535 | if (LevelIsWhitespace(levelTry)) |
536 | return true; |
537 | else |
538 | return LevelNumber(levelStart) < LevelNumber(levelTry); |
539 | } |
540 | |
541 | Sci::Line Document::GetLastChild(Sci::Line lineParent, std::optional<FoldLevel> level, Sci::Line lastLine) { |
542 | const FoldLevel levelStart = LevelNumberPart(level ? *level : GetFoldLevel(lineParent)); |
543 | const Sci::Line maxLine = LinesTotal(); |
544 | const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1; |
545 | Sci::Line lineMaxSubord = lineParent; |
546 | while (lineMaxSubord < maxLine - 1) { |
547 | EnsureStyledTo(LineStart(lineMaxSubord + 2)); |
548 | if (!IsSubordinate(levelStart, GetFoldLevel(lineMaxSubord + 1))) |
549 | break; |
550 | if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) |
551 | break; |
552 | lineMaxSubord++; |
553 | } |
554 | if (lineMaxSubord > lineParent) { |
555 | if (levelStart > LevelNumberPart(GetFoldLevel(lineMaxSubord + 1))) { |
556 | // Have chewed up some whitespace that belongs to a parent so seek back |
557 | if (LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) { |
558 | lineMaxSubord--; |
559 | } |
560 | } |
561 | } |
562 | return lineMaxSubord; |
563 | } |
564 | |
565 | Sci::Line Document::GetFoldParent(Sci::Line line) const { |
566 | const FoldLevel level = LevelNumberPart(GetFoldLevel(line)); |
567 | Sci::Line lineLook = line - 1; |
568 | while ((lineLook > 0) && ( |
569 | (!LevelIsHeader(GetFoldLevel(lineLook))) || |
570 | (LevelNumberPart(GetFoldLevel(lineLook)) >= level)) |
571 | ) { |
572 | lineLook--; |
573 | } |
574 | if (LevelIsHeader(GetFoldLevel(lineLook)) && |
575 | (LevelNumberPart(GetFoldLevel(lineLook)) < level)) { |
576 | return lineLook; |
577 | } else { |
578 | return -1; |
579 | } |
580 | } |
581 | |
582 | void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) { |
583 | const FoldLevel level = GetFoldLevel(line); |
584 | const Sci::Line lookLastLine = std::max(line, lastLine) + 1; |
585 | |
586 | Sci::Line lookLine = line; |
587 | FoldLevel lookLineLevel = level; |
588 | FoldLevel lookLineLevelNum = LevelNumberPart(lookLineLevel); |
589 | while ((lookLine > 0) && (LevelIsWhitespace(lookLineLevel) || |
590 | (LevelIsHeader(lookLineLevel) && (lookLineLevelNum >= LevelNumberPart(GetFoldLevel(lookLine + 1)))))) { |
591 | lookLineLevel = GetFoldLevel(--lookLine); |
592 | lookLineLevelNum = LevelNumberPart(lookLineLevel); |
593 | } |
594 | |
595 | Sci::Line beginFoldBlock = LevelIsHeader(lookLineLevel) ? lookLine : GetFoldParent(lookLine); |
596 | if (beginFoldBlock == -1) { |
597 | highlightDelimiter.Clear(); |
598 | return; |
599 | } |
600 | |
601 | Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, {}, lookLastLine); |
602 | Sci::Line firstChangeableLineBefore = -1; |
603 | if (endFoldBlock < line) { |
604 | lookLine = beginFoldBlock - 1; |
605 | lookLineLevel = GetFoldLevel(lookLine); |
606 | lookLineLevelNum = LevelNumberPart(lookLineLevel); |
607 | while ((lookLine >= 0) && (lookLineLevelNum >= FoldLevel::Base)) { |
608 | if (LevelIsHeader(lookLineLevel)) { |
609 | if (GetLastChild(lookLine, {}, lookLastLine) == line) { |
610 | beginFoldBlock = lookLine; |
611 | endFoldBlock = line; |
612 | firstChangeableLineBefore = line - 1; |
613 | } |
614 | } |
615 | if ((lookLine > 0) && (lookLineLevelNum == FoldLevel::Base) && (LevelNumberPart(GetFoldLevel(lookLine - 1)) > lookLineLevelNum)) |
616 | break; |
617 | lookLineLevel = GetFoldLevel(--lookLine); |
618 | lookLineLevelNum = LevelNumberPart(lookLineLevel); |
619 | } |
620 | } |
621 | if (firstChangeableLineBefore == -1) { |
622 | for (lookLine = line - 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel); |
623 | lookLine >= beginFoldBlock; |
624 | lookLineLevel = GetFoldLevel(--lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) { |
625 | if (LevelIsWhitespace(lookLineLevel) || (lookLineLevelNum > LevelNumberPart(level))) { |
626 | firstChangeableLineBefore = lookLine; |
627 | break; |
628 | } |
629 | } |
630 | } |
631 | if (firstChangeableLineBefore == -1) |
632 | firstChangeableLineBefore = beginFoldBlock - 1; |
633 | |
634 | Sci::Line firstChangeableLineAfter = -1; |
635 | for (lookLine = line + 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel); |
636 | lookLine <= endFoldBlock; |
637 | lookLineLevel = GetFoldLevel(++lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) { |
638 | if (LevelIsHeader(lookLineLevel) && (lookLineLevelNum < LevelNumberPart(GetFoldLevel(lookLine + 1)))) { |
639 | firstChangeableLineAfter = lookLine; |
640 | break; |
641 | } |
642 | } |
643 | if (firstChangeableLineAfter == -1) |
644 | firstChangeableLineAfter = endFoldBlock + 1; |
645 | |
646 | highlightDelimiter.beginFoldBlock = beginFoldBlock; |
647 | highlightDelimiter.endFoldBlock = endFoldBlock; |
648 | highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore; |
649 | highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter; |
650 | } |
651 | |
652 | Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const noexcept { |
653 | return std::clamp<Sci::Position>(pos, 0, LengthNoExcept()); |
654 | } |
655 | |
656 | bool Document::IsCrLf(Sci::Position pos) const noexcept { |
657 | if (pos < 0) |
658 | return false; |
659 | if (pos >= (LengthNoExcept() - 1)) |
660 | return false; |
661 | return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n'); |
662 | } |
663 | |
664 | int Document::LenChar(Sci::Position pos) const noexcept { |
665 | if (pos < 0 || pos >= LengthNoExcept()) { |
666 | // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds. |
667 | return 1; |
668 | } else if (IsCrLf(pos)) { |
669 | return 2; |
670 | } |
671 | |
672 | const unsigned char leadByte = cb.UCharAt(pos); |
673 | if (!dbcsCodePage || UTF8IsAscii(leadByte)) { |
674 | // Common case: ASCII character |
675 | return 1; |
676 | } |
677 | if (CpUtf8 == dbcsCodePage) { |
678 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
679 | unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; |
680 | for (int b = 1; b < widthCharBytes; b++) { |
681 | charBytes[b] = cb.UCharAt(pos + b); |
682 | } |
683 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
684 | if (utf8status & UTF8MaskInvalid) { |
685 | // Treat as invalid and use up just one byte |
686 | return 1; |
687 | } else { |
688 | return utf8status & UTF8MaskWidth; |
689 | } |
690 | } else { |
691 | if (IsDBCSLeadByteNoExcept(leadByte) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1))) { |
692 | return 2; |
693 | } else { |
694 | return 1; |
695 | } |
696 | } |
697 | } |
698 | |
699 | bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept { |
700 | Sci::Position trail = pos; |
701 | while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1))) |
702 | trail--; |
703 | start = (trail > 0) ? trail-1 : trail; |
704 | |
705 | const unsigned char leadByte = cb.UCharAt(start); |
706 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
707 | if (widthCharBytes == 1) { |
708 | return false; |
709 | } else { |
710 | const int trailBytes = widthCharBytes - 1; |
711 | const Sci::Position len = pos - start; |
712 | if (len > trailBytes) |
713 | // pos too far from lead |
714 | return false; |
715 | unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; |
716 | for (Sci::Position b=1; b<widthCharBytes && ((start+b) < cb.Length()); b++) |
717 | charBytes[b] = cb.CharAt(start+b); |
718 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
719 | if (utf8status & UTF8MaskInvalid) |
720 | return false; |
721 | end = start + widthCharBytes; |
722 | return true; |
723 | } |
724 | } |
725 | |
726 | // Normalise a position so that it is not part way through a multi-byte character. |
727 | // This can occur in two situations - |
728 | // When lines are terminated with \r\n pairs which should be treated as one character. |
729 | // When displaying DBCS text such as Japanese. |
730 | // If moving, move the position in the indicated direction. |
731 | Sci::Position Document::MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir, bool checkLineEnd) const noexcept { |
732 | //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir); |
733 | // If out of range, just return minimum/maximum value. |
734 | if (pos <= 0) |
735 | return 0; |
736 | if (pos >= LengthNoExcept()) |
737 | return LengthNoExcept(); |
738 | |
739 | // PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept()); |
740 | if (checkLineEnd && IsCrLf(pos - 1)) { |
741 | if (moveDir > 0) |
742 | return pos + 1; |
743 | else |
744 | return pos - 1; |
745 | } |
746 | |
747 | if (dbcsCodePage) { |
748 | if (CpUtf8 == dbcsCodePage) { |
749 | const unsigned char ch = cb.UCharAt(pos); |
750 | // If ch is not a trail byte then pos is valid intercharacter position |
751 | if (UTF8IsTrailByte(ch)) { |
752 | Sci::Position startUTF = pos; |
753 | Sci::Position endUTF = pos; |
754 | if (InGoodUTF8(pos, startUTF, endUTF)) { |
755 | // ch is a trail byte within a UTF-8 character |
756 | if (moveDir > 0) |
757 | pos = endUTF; |
758 | else |
759 | pos = startUTF; |
760 | } |
761 | // Else invalid UTF-8 so return position of isolated trail byte |
762 | } |
763 | } else { |
764 | // Anchor DBCS calculations at start of line because start of line can |
765 | // not be a DBCS trail byte. |
766 | const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); |
767 | if (pos == posStartLine) |
768 | return pos; |
769 | |
770 | // Step back until a non-lead-byte is found. |
771 | Sci::Position posCheck = pos; |
772 | while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1))) |
773 | posCheck--; |
774 | |
775 | // Check from known start of character. |
776 | while (posCheck < pos) { |
777 | const int mbsize = IsDBCSDualByteAt(posCheck) ? 2 : 1; |
778 | if (posCheck + mbsize == pos) { |
779 | return pos; |
780 | } else if (posCheck + mbsize > pos) { |
781 | if (moveDir > 0) { |
782 | return posCheck + mbsize; |
783 | } else { |
784 | return posCheck; |
785 | } |
786 | } |
787 | posCheck += mbsize; |
788 | } |
789 | } |
790 | } |
791 | |
792 | return pos; |
793 | } |
794 | |
795 | // NextPosition moves between valid positions - it can not handle a position in the middle of a |
796 | // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar. |
797 | // A \r\n pair is treated as two characters. |
798 | Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept { |
799 | // If out of range, just return minimum/maximum value. |
800 | const int increment = (moveDir > 0) ? 1 : -1; |
801 | if (pos + increment <= 0) |
802 | return 0; |
803 | if (pos + increment >= cb.Length()) |
804 | return cb.Length(); |
805 | |
806 | if (dbcsCodePage) { |
807 | if (CpUtf8 == dbcsCodePage) { |
808 | if (increment == 1) { |
809 | // Simple forward movement case so can avoid some checks |
810 | const unsigned char leadByte = cb.UCharAt(pos); |
811 | if (UTF8IsAscii(leadByte)) { |
812 | // Single byte character or invalid |
813 | pos++; |
814 | } else { |
815 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
816 | unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; |
817 | for (int b=1; b<widthCharBytes; b++) |
818 | charBytes[b] = cb.CharAt(pos+b); |
819 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
820 | if (utf8status & UTF8MaskInvalid) |
821 | pos++; |
822 | else |
823 | pos += utf8status & UTF8MaskWidth; |
824 | } |
825 | } else { |
826 | // Examine byte before position |
827 | pos--; |
828 | const unsigned char ch = cb.UCharAt(pos); |
829 | // If ch is not a trail byte then pos is valid intercharacter position |
830 | if (UTF8IsTrailByte(ch)) { |
831 | // If ch is a trail byte in a valid UTF-8 character then return start of character |
832 | Sci::Position startUTF = pos; |
833 | Sci::Position endUTF = pos; |
834 | if (InGoodUTF8(pos, startUTF, endUTF)) { |
835 | pos = startUTF; |
836 | } |
837 | // Else invalid UTF-8 so return position of isolated trail byte |
838 | } |
839 | } |
840 | } else { |
841 | if (moveDir > 0) { |
842 | const int mbsize = IsDBCSDualByteAt(pos) ? 2 : 1; |
843 | pos += mbsize; |
844 | if (pos > cb.Length()) |
845 | pos = cb.Length(); |
846 | } else { |
847 | // Anchor DBCS calculations at start of line because start of line can |
848 | // not be a DBCS trail byte. |
849 | const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); |
850 | // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx |
851 | // http://msdn.microsoft.com/en-us/library/cc194790.aspx |
852 | if ((pos - 1) <= posStartLine) { |
853 | return pos - 1; |
854 | } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) { |
855 | // Should actually be trail byte |
856 | if (IsDBCSDualByteAt(pos - 2)) { |
857 | return pos - 2; |
858 | } else { |
859 | // Invalid byte pair so treat as one byte wide |
860 | return pos - 1; |
861 | } |
862 | } else { |
863 | // Otherwise, step back until a non-lead-byte is found. |
864 | Sci::Position posTemp = pos - 1; |
865 | while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp))) |
866 | ; |
867 | // Now posTemp+1 must point to the beginning of a character, |
868 | // so figure out whether we went back an even or an odd |
869 | // number of bytes and go back 1 or 2 bytes, respectively. |
870 | const Sci::Position widthLast = ((pos - posTemp) & 1) + 1; |
871 | if ((widthLast == 2) && (IsDBCSDualByteAt(pos - widthLast))) { |
872 | return pos - widthLast; |
873 | } |
874 | // Byte before pos may be valid character or may be an invalid second byte |
875 | return pos - 1; |
876 | } |
877 | } |
878 | } |
879 | } else { |
880 | pos += increment; |
881 | } |
882 | |
883 | return pos; |
884 | } |
885 | |
886 | bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept { |
887 | // Returns true if pos changed |
888 | Sci::Position posNext = NextPosition(pos, moveDir); |
889 | if (posNext == pos) { |
890 | return false; |
891 | } else { |
892 | pos = posNext; |
893 | return true; |
894 | } |
895 | } |
896 | |
897 | Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept { |
898 | if (position >= LengthNoExcept()) { |
899 | return CharacterExtracted(unicodeReplacementChar, 0); |
900 | } |
901 | const unsigned char leadByte = cb.UCharAt(position); |
902 | if (!dbcsCodePage || UTF8IsAscii(leadByte)) { |
903 | // Common case: ASCII character |
904 | return CharacterExtracted(leadByte, 1); |
905 | } |
906 | if (CpUtf8 == dbcsCodePage) { |
907 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
908 | unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; |
909 | for (int b = 1; b<widthCharBytes; b++) |
910 | charBytes[b] = cb.UCharAt(position + b); |
911 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
912 | if (utf8status & UTF8MaskInvalid) { |
913 | // Treat as invalid and use up just one byte |
914 | return CharacterExtracted(unicodeReplacementChar, 1); |
915 | } else { |
916 | return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); |
917 | } |
918 | } else { |
919 | if (IsDBCSLeadByteNoExcept(leadByte)) { |
920 | const unsigned char trailByte = cb.UCharAt(position + 1); |
921 | if (IsDBCSTrailByteNoExcept(trailByte)) { |
922 | return CharacterExtracted::DBCS(leadByte, trailByte); |
923 | } |
924 | } |
925 | return CharacterExtracted(leadByte, 1); |
926 | } |
927 | } |
928 | |
929 | Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept { |
930 | if (position <= 0) { |
931 | return CharacterExtracted(unicodeReplacementChar, 0); |
932 | } |
933 | const unsigned char previousByte = cb.UCharAt(position - 1); |
934 | if (0 == dbcsCodePage) { |
935 | return CharacterExtracted(previousByte, 1); |
936 | } |
937 | if (CpUtf8 == dbcsCodePage) { |
938 | if (UTF8IsAscii(previousByte)) { |
939 | return CharacterExtracted(previousByte, 1); |
940 | } |
941 | position--; |
942 | // If previousByte is not a trail byte then its invalid |
943 | if (UTF8IsTrailByte(previousByte)) { |
944 | // If previousByte is a trail byte in a valid UTF-8 character then find start of character |
945 | Sci::Position startUTF = position; |
946 | Sci::Position endUTF = position; |
947 | if (InGoodUTF8(position, startUTF, endUTF)) { |
948 | const Sci::Position widthCharBytes = endUTF - startUTF; |
949 | unsigned char charBytes[UTF8MaxBytes] = { 0, 0, 0, 0 }; |
950 | for (Sci::Position b = 0; b<widthCharBytes; b++) |
951 | charBytes[b] = cb.UCharAt(startUTF + b); |
952 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
953 | if (utf8status & UTF8MaskInvalid) { |
954 | // Treat as invalid and use up just one byte |
955 | return CharacterExtracted(unicodeReplacementChar, 1); |
956 | } else { |
957 | return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); |
958 | } |
959 | } |
960 | // Else invalid UTF-8 so return position of isolated trail byte |
961 | } |
962 | return CharacterExtracted(unicodeReplacementChar, 1); |
963 | } else { |
964 | // Moving backwards in DBCS is complex so use NextPosition |
965 | const Sci::Position posStartCharacter = NextPosition(position, -1); |
966 | return CharacterAfter(posStartCharacter); |
967 | } |
968 | } |
969 | |
970 | // Return -1 on out-of-bounds |
971 | Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const { |
972 | Sci::Position pos = positionStart; |
973 | if (dbcsCodePage) { |
974 | const int increment = (characterOffset > 0) ? 1 : -1; |
975 | while (characterOffset != 0) { |
976 | const Sci::Position posNext = NextPosition(pos, increment); |
977 | if (posNext == pos) |
978 | return Sci::invalidPosition; |
979 | pos = posNext; |
980 | characterOffset -= increment; |
981 | } |
982 | } else { |
983 | pos = positionStart + characterOffset; |
984 | if ((pos < 0) || (pos > Length())) |
985 | return Sci::invalidPosition; |
986 | } |
987 | return pos; |
988 | } |
989 | |
990 | Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept { |
991 | Sci::Position pos = positionStart; |
992 | if (dbcsCodePage) { |
993 | const int increment = (characterOffset > 0) ? 1 : -1; |
994 | while (characterOffset != 0) { |
995 | const Sci::Position posNext = NextPosition(pos, increment); |
996 | if (posNext == pos) |
997 | return Sci::invalidPosition; |
998 | if (std::abs(pos-posNext) > 3) // 4 byte character = 2*UTF16. |
999 | characterOffset -= increment; |
1000 | pos = posNext; |
1001 | characterOffset -= increment; |
1002 | } |
1003 | } else { |
1004 | pos = positionStart + characterOffset; |
1005 | if ((pos < 0) || (pos > LengthNoExcept())) |
1006 | return Sci::invalidPosition; |
1007 | } |
1008 | return pos; |
1009 | } |
1010 | |
1011 | int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const { |
1012 | int bytesInCharacter = 1; |
1013 | const unsigned char leadByte = cb.UCharAt(position); |
1014 | int character = leadByte; |
1015 | if (dbcsCodePage && !UTF8IsAscii(leadByte)) { |
1016 | if (CpUtf8 == dbcsCodePage) { |
1017 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
1018 | unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; |
1019 | for (int b=1; b<widthCharBytes; b++) |
1020 | charBytes[b] = cb.UCharAt(position+b); |
1021 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
1022 | if (utf8status & UTF8MaskInvalid) { |
1023 | // Report as singleton surrogate values which are invalid Unicode |
1024 | character = 0xDC80 + leadByte; |
1025 | } else { |
1026 | bytesInCharacter = utf8status & UTF8MaskWidth; |
1027 | character = UnicodeFromUTF8(charBytes); |
1028 | } |
1029 | } else { |
1030 | if (IsDBCSLeadByteNoExcept(leadByte)) { |
1031 | const unsigned char trailByte = cb.UCharAt(position + 1); |
1032 | if (IsDBCSTrailByteNoExcept(trailByte)) { |
1033 | bytesInCharacter = 2; |
1034 | character = (leadByte << 8) | trailByte; |
1035 | } |
1036 | } |
1037 | } |
1038 | } |
1039 | if (pWidth) { |
1040 | *pWidth = bytesInCharacter; |
1041 | } |
1042 | return character; |
1043 | } |
1044 | |
1045 | int SCI_METHOD Document::CodePage() const { |
1046 | return dbcsCodePage; |
1047 | } |
1048 | |
1049 | bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const { |
1050 | // Used by lexers so must match IDocument method exactly |
1051 | return IsDBCSLeadByteNoExcept(ch); |
1052 | } |
1053 | |
1054 | bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept { |
1055 | // Used inside core Scintilla |
1056 | // Byte ranges found in Wikipedia articles with relevant search strings in each case |
1057 | const unsigned char uch = ch; |
1058 | switch (dbcsCodePage) { |
1059 | case 932: |
1060 | // Shift_jis |
1061 | return ((uch >= 0x81) && (uch <= 0x9F)) || |
1062 | ((uch >= 0xE0) && (uch <= 0xFC)); |
1063 | // Lead bytes F0 to FC may be a Microsoft addition. |
1064 | case 936: |
1065 | // GBK |
1066 | return (uch >= 0x81) && (uch <= 0xFE); |
1067 | case 949: |
1068 | // Korean Wansung KS C-5601-1987 |
1069 | return (uch >= 0x81) && (uch <= 0xFE); |
1070 | case 950: |
1071 | // Big5 |
1072 | return (uch >= 0x81) && (uch <= 0xFE); |
1073 | case 1361: |
1074 | // Korean Johab KS C-5601-1992 |
1075 | return |
1076 | ((uch >= 0x84) && (uch <= 0xD3)) || |
1077 | ((uch >= 0xD8) && (uch <= 0xDE)) || |
1078 | ((uch >= 0xE0) && (uch <= 0xF9)); |
1079 | } |
1080 | return false; |
1081 | } |
1082 | |
1083 | bool Document::IsDBCSTrailByteNoExcept(char ch) const noexcept { |
1084 | const unsigned char trail = ch; |
1085 | switch (dbcsCodePage) { |
1086 | case 932: |
1087 | // Shift_jis |
1088 | return (trail != 0x7F) && |
1089 | ((trail >= 0x40) && (trail <= 0xFC)); |
1090 | case 936: |
1091 | // GBK |
1092 | return (trail != 0x7F) && |
1093 | ((trail >= 0x40) && (trail <= 0xFE)); |
1094 | case 949: |
1095 | // Korean Wansung KS C-5601-1987 |
1096 | return |
1097 | ((trail >= 0x41) && (trail <= 0x5A)) || |
1098 | ((trail >= 0x61) && (trail <= 0x7A)) || |
1099 | ((trail >= 0x81) && (trail <= 0xFE)); |
1100 | case 950: |
1101 | // Big5 |
1102 | return |
1103 | ((trail >= 0x40) && (trail <= 0x7E)) || |
1104 | ((trail >= 0xA1) && (trail <= 0xFE)); |
1105 | case 1361: |
1106 | // Korean Johab KS C-5601-1992 |
1107 | return |
1108 | ((trail >= 0x31) && (trail <= 0x7E)) || |
1109 | ((trail >= 0x81) && (trail <= 0xFE)); |
1110 | } |
1111 | return false; |
1112 | } |
1113 | |
1114 | int Document::DBCSDrawBytes(std::string_view text) const noexcept { |
1115 | if (text.length() <= 1) { |
1116 | return static_cast<int>(text.length()); |
1117 | } |
1118 | if (IsDBCSLeadByteNoExcept(text[0])) { |
1119 | return IsDBCSTrailByteNoExcept(text[1]) ? 2 : 1; |
1120 | } else { |
1121 | return 1; |
1122 | } |
1123 | } |
1124 | |
1125 | bool Document::IsDBCSDualByteAt(Sci::Position pos) const noexcept { |
1126 | return IsDBCSLeadByteNoExcept(cb.CharAt(pos)) |
1127 | && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1)); |
1128 | } |
1129 | |
1130 | // Need to break text into segments near end but taking into account the |
1131 | // encoding to not break inside a UTF-8 or DBCS character and also trying |
1132 | // to avoid breaking inside a pair of combining characters, or inside |
1133 | // ligatures. |
1134 | // TODO: implement grapheme cluster boundaries, |
1135 | // see https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries. |
1136 | // |
1137 | // The segment length must always be long enough (more than 4 bytes) |
1138 | // so that there will be at least one whole character to make a segment. |
1139 | // For UTF-8, text must consist only of valid whole characters. |
1140 | // In preference order from best to worst: |
1141 | // 1) Break before or after spaces or controls |
1142 | // 2) Break at word and punctuation boundary for better kerning and ligature support |
1143 | // 3) Break after whole character, this may break combining characters |
1144 | |
1145 | size_t Document::SafeSegment(std::string_view text) const noexcept { |
1146 | // check space first as most written language use spaces. |
1147 | for (std::string_view::iterator it = text.end() - 1; it != text.begin(); --it) { |
1148 | if (IsBreakSpace(*it)) { |
1149 | return it - text.begin(); |
1150 | } |
1151 | } |
1152 | |
1153 | if (!dbcsCodePage || dbcsCodePage == CpUtf8) { |
1154 | // backward iterate for UTF-8 and single byte encoding to find word and punctuation boundary. |
1155 | std::string_view::iterator it = text.end() - 1; |
1156 | const bool punctuation = IsPunctuation(*it); |
1157 | do { |
1158 | --it; |
1159 | if (punctuation != IsPunctuation(*it)) { |
1160 | return it - text.begin() + 1; |
1161 | } |
1162 | } while (it != text.begin()); |
1163 | |
1164 | it = text.end() - 1; |
1165 | if (dbcsCodePage) { |
1166 | // for UTF-8 go back to the start of last character. |
1167 | for (int trail = 0; trail < UTF8MaxBytes - 1 && UTF8IsTrailByte(*it); trail++) { |
1168 | --it; |
1169 | } |
1170 | } |
1171 | return it - text.begin(); |
1172 | } |
1173 | |
1174 | { |
1175 | // forward iterate for DBCS to find word and punctuation boundary. |
1176 | size_t lastPunctuationBreak = 0; |
1177 | size_t lastEncodingAllowedBreak = 0; |
1178 | CharacterClass ccPrev = CharacterClass::space; |
1179 | for (size_t j = 0; j < text.length();) { |
1180 | const unsigned char ch = text[j]; |
1181 | lastEncodingAllowedBreak = j++; |
1182 | |
1183 | CharacterClass cc = CharacterClass::word; |
1184 | if (UTF8IsAscii(ch)) { |
1185 | if (IsPunctuation(ch)) { |
1186 | cc = CharacterClass::punctuation; |
1187 | } |
1188 | } else { |
1189 | j += IsDBCSLeadByteNoExcept(ch); |
1190 | } |
1191 | if (cc != ccPrev) { |
1192 | ccPrev = cc; |
1193 | lastPunctuationBreak = lastEncodingAllowedBreak; |
1194 | } |
1195 | } |
1196 | return lastPunctuationBreak ? lastPunctuationBreak : lastEncodingAllowedBreak; |
1197 | } |
1198 | } |
1199 | |
1200 | EncodingFamily Document::CodePageFamily() const noexcept { |
1201 | if (CpUtf8 == dbcsCodePage) |
1202 | return EncodingFamily::unicode; |
1203 | else if (dbcsCodePage) |
1204 | return EncodingFamily::dbcs; |
1205 | else |
1206 | return EncodingFamily::eightBit; |
1207 | } |
1208 | |
1209 | void Document::ModifiedAt(Sci::Position pos) noexcept { |
1210 | if (endStyled > pos) |
1211 | endStyled = pos; |
1212 | } |
1213 | |
1214 | void Document::CheckReadOnly() { |
1215 | if (cb.IsReadOnly() && enteredReadOnlyCount == 0) { |
1216 | enteredReadOnlyCount++; |
1217 | NotifyModifyAttempt(); |
1218 | enteredReadOnlyCount--; |
1219 | } |
1220 | } |
1221 | |
1222 | // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt. |
1223 | // SetStyleAt does not change the persistent state of a document |
1224 | |
1225 | bool Document::DeleteChars(Sci::Position pos, Sci::Position len) { |
1226 | if (pos < 0) |
1227 | return false; |
1228 | if (len <= 0) |
1229 | return false; |
1230 | if ((pos + len) > LengthNoExcept()) |
1231 | return false; |
1232 | CheckReadOnly(); |
1233 | if (enteredModification != 0) { |
1234 | return false; |
1235 | } else { |
1236 | enteredModification++; |
1237 | if (!cb.IsReadOnly()) { |
1238 | NotifyModified( |
1239 | DocModification( |
1240 | ModificationFlags::BeforeDelete | ModificationFlags::User, |
1241 | pos, len, |
1242 | 0, nullptr)); |
1243 | const Sci::Line prevLinesTotal = LinesTotal(); |
1244 | const bool startSavePoint = cb.IsSavePoint(); |
1245 | bool startSequence = false; |
1246 | const char *text = cb.DeleteChars(pos, len, startSequence); |
1247 | if (startSavePoint && cb.IsCollectingUndo()) |
1248 | NotifySavePoint(false); |
1249 | if ((pos < LengthNoExcept()) || (pos == 0)) |
1250 | ModifiedAt(pos); |
1251 | else |
1252 | ModifiedAt(pos-1); |
1253 | NotifyModified( |
1254 | DocModification( |
1255 | ModificationFlags::DeleteText | ModificationFlags::User | |
1256 | (startSequence?ModificationFlags::StartAction:ModificationFlags::None), |
1257 | pos, len, |
1258 | LinesTotal() - prevLinesTotal, text)); |
1259 | } |
1260 | enteredModification--; |
1261 | } |
1262 | return !cb.IsReadOnly(); |
1263 | } |
1264 | |
1265 | /** |
1266 | * Insert a string with a length. |
1267 | */ |
1268 | Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) { |
1269 | if (insertLength <= 0) { |
1270 | return 0; |
1271 | } |
1272 | CheckReadOnly(); // Application may change read only state here |
1273 | if (cb.IsReadOnly()) { |
1274 | return 0; |
1275 | } |
1276 | if (enteredModification != 0) { |
1277 | return 0; |
1278 | } |
1279 | enteredModification++; |
1280 | insertionSet = false; |
1281 | insertion.clear(); |
1282 | NotifyModified( |
1283 | DocModification( |
1284 | ModificationFlags::InsertCheck, |
1285 | position, insertLength, |
1286 | 0, s)); |
1287 | if (insertionSet) { |
1288 | s = insertion.c_str(); |
1289 | insertLength = insertion.length(); |
1290 | } |
1291 | NotifyModified( |
1292 | DocModification( |
1293 | ModificationFlags::BeforeInsert | ModificationFlags::User, |
1294 | position, insertLength, |
1295 | 0, s)); |
1296 | const Sci::Line prevLinesTotal = LinesTotal(); |
1297 | const bool startSavePoint = cb.IsSavePoint(); |
1298 | bool startSequence = false; |
1299 | const char *text = cb.InsertString(position, s, insertLength, startSequence); |
1300 | if (startSavePoint && cb.IsCollectingUndo()) |
1301 | NotifySavePoint(false); |
1302 | ModifiedAt(position); |
1303 | NotifyModified( |
1304 | DocModification( |
1305 | ModificationFlags::InsertText | ModificationFlags::User | |
1306 | (startSequence?ModificationFlags::StartAction:ModificationFlags::None), |
1307 | position, insertLength, |
1308 | LinesTotal() - prevLinesTotal, text)); |
1309 | if (insertionSet) { // Free memory as could be large |
1310 | std::string().swap(insertion); |
1311 | } |
1312 | enteredModification--; |
1313 | return insertLength; |
1314 | } |
1315 | |
1316 | void Document::ChangeInsertion(const char *s, Sci::Position length) { |
1317 | insertionSet = true; |
1318 | insertion.assign(s, length); |
1319 | } |
1320 | |
1321 | int SCI_METHOD Document::AddData(const char *data, Sci_Position length) { |
1322 | try { |
1323 | const Sci::Position position = Length(); |
1324 | InsertString(position, data, length); |
1325 | } catch (std::bad_alloc &) { |
1326 | return static_cast<int>(Status::BadAlloc); |
1327 | } catch (...) { |
1328 | return static_cast<int>(Status::Failure); |
1329 | } |
1330 | return static_cast<int>(Status::Ok); |
1331 | } |
1332 | |
1333 | void * SCI_METHOD Document::ConvertToDocument() { |
1334 | return this; |
1335 | } |
1336 | |
1337 | Sci::Position Document::Undo() { |
1338 | Sci::Position newPos = -1; |
1339 | CheckReadOnly(); |
1340 | if ((enteredModification == 0) && (cb.IsCollectingUndo())) { |
1341 | enteredModification++; |
1342 | if (!cb.IsReadOnly()) { |
1343 | const bool startSavePoint = cb.IsSavePoint(); |
1344 | bool multiLine = false; |
1345 | const int steps = cb.StartUndo(); |
1346 | //Platform::DebugPrintf("Steps=%d\n", steps); |
1347 | Sci::Position coalescedRemovePos = -1; |
1348 | Sci::Position coalescedRemoveLen = 0; |
1349 | Sci::Position prevRemoveActionPos = -1; |
1350 | Sci::Position prevRemoveActionLen = 0; |
1351 | for (int step = 0; step < steps; step++) { |
1352 | const Sci::Line prevLinesTotal = LinesTotal(); |
1353 | const Action &action = cb.GetUndoStep(); |
1354 | if (action.at == ActionType::remove) { |
1355 | NotifyModified(DocModification( |
1356 | ModificationFlags::BeforeInsert | ModificationFlags::Undo, action)); |
1357 | } else if (action.at == ActionType::container) { |
1358 | DocModification dm(ModificationFlags::Container | ModificationFlags::Undo); |
1359 | dm.token = action.position; |
1360 | NotifyModified(dm); |
1361 | if (!action.mayCoalesce) { |
1362 | coalescedRemovePos = -1; |
1363 | coalescedRemoveLen = 0; |
1364 | prevRemoveActionPos = -1; |
1365 | prevRemoveActionLen = 0; |
1366 | } |
1367 | } else { |
1368 | NotifyModified(DocModification( |
1369 | ModificationFlags::BeforeDelete | ModificationFlags::Undo, action)); |
1370 | } |
1371 | cb.PerformUndoStep(); |
1372 | if (action.at != ActionType::container) { |
1373 | ModifiedAt(action.position); |
1374 | newPos = action.position; |
1375 | } |
1376 | |
1377 | ModificationFlags modFlags = ModificationFlags::Undo; |
1378 | // With undo, an insertion action becomes a deletion notification |
1379 | if (action.at == ActionType::remove) { |
1380 | newPos += action.lenData; |
1381 | modFlags |= ModificationFlags::InsertText; |
1382 | if ((coalescedRemoveLen > 0) && |
1383 | (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) { |
1384 | coalescedRemoveLen += action.lenData; |
1385 | newPos = coalescedRemovePos + coalescedRemoveLen; |
1386 | } else { |
1387 | coalescedRemovePos = action.position; |
1388 | coalescedRemoveLen = action.lenData; |
1389 | } |
1390 | prevRemoveActionPos = action.position; |
1391 | prevRemoveActionLen = action.lenData; |
1392 | } else if (action.at == ActionType::insert) { |
1393 | modFlags |= ModificationFlags::DeleteText; |
1394 | coalescedRemovePos = -1; |
1395 | coalescedRemoveLen = 0; |
1396 | prevRemoveActionPos = -1; |
1397 | prevRemoveActionLen = 0; |
1398 | } |
1399 | if (steps > 1) |
1400 | modFlags |= ModificationFlags::MultiStepUndoRedo; |
1401 | const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; |
1402 | if (linesAdded != 0) |
1403 | multiLine = true; |
1404 | if (step == steps - 1) { |
1405 | modFlags |= ModificationFlags::LastStepInUndoRedo; |
1406 | if (multiLine) |
1407 | modFlags |= ModificationFlags::MultilineUndoRedo; |
1408 | } |
1409 | NotifyModified(DocModification(modFlags, action.position, action.lenData, |
1410 | linesAdded, action.data.get())); |
1411 | } |
1412 | |
1413 | const bool endSavePoint = cb.IsSavePoint(); |
1414 | if (startSavePoint != endSavePoint) |
1415 | NotifySavePoint(endSavePoint); |
1416 | } |
1417 | enteredModification--; |
1418 | } |
1419 | return newPos; |
1420 | } |
1421 | |
1422 | Sci::Position Document::Redo() { |
1423 | Sci::Position newPos = -1; |
1424 | CheckReadOnly(); |
1425 | if ((enteredModification == 0) && (cb.IsCollectingUndo())) { |
1426 | enteredModification++; |
1427 | if (!cb.IsReadOnly()) { |
1428 | const bool startSavePoint = cb.IsSavePoint(); |
1429 | bool multiLine = false; |
1430 | const int steps = cb.StartRedo(); |
1431 | for (int step = 0; step < steps; step++) { |
1432 | const Sci::Line prevLinesTotal = LinesTotal(); |
1433 | const Action &action = cb.GetRedoStep(); |
1434 | if (action.at == ActionType::insert) { |
1435 | NotifyModified(DocModification( |
1436 | ModificationFlags::BeforeInsert | ModificationFlags::Redo, action)); |
1437 | } else if (action.at == ActionType::container) { |
1438 | DocModification dm(ModificationFlags::Container | ModificationFlags::Redo); |
1439 | dm.token = action.position; |
1440 | NotifyModified(dm); |
1441 | } else { |
1442 | NotifyModified(DocModification( |
1443 | ModificationFlags::BeforeDelete | ModificationFlags::Redo, action)); |
1444 | } |
1445 | cb.PerformRedoStep(); |
1446 | if (action.at != ActionType::container) { |
1447 | ModifiedAt(action.position); |
1448 | newPos = action.position; |
1449 | } |
1450 | |
1451 | ModificationFlags modFlags = ModificationFlags::Redo; |
1452 | if (action.at == ActionType::insert) { |
1453 | newPos += action.lenData; |
1454 | modFlags |= ModificationFlags::InsertText; |
1455 | } else if (action.at == ActionType::remove) { |
1456 | modFlags |= ModificationFlags::DeleteText; |
1457 | } |
1458 | if (steps > 1) |
1459 | modFlags |= ModificationFlags::MultiStepUndoRedo; |
1460 | const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; |
1461 | if (linesAdded != 0) |
1462 | multiLine = true; |
1463 | if (step == steps - 1) { |
1464 | modFlags |= ModificationFlags::LastStepInUndoRedo; |
1465 | if (multiLine) |
1466 | modFlags |= ModificationFlags::MultilineUndoRedo; |
1467 | } |
1468 | NotifyModified( |
1469 | DocModification(modFlags, action.position, action.lenData, |
1470 | linesAdded, action.data.get())); |
1471 | } |
1472 | |
1473 | const bool endSavePoint = cb.IsSavePoint(); |
1474 | if (startSavePoint != endSavePoint) |
1475 | NotifySavePoint(endSavePoint); |
1476 | } |
1477 | enteredModification--; |
1478 | } |
1479 | return newPos; |
1480 | } |
1481 | |
1482 | void Document::DelChar(Sci::Position pos) { |
1483 | DeleteChars(pos, LenChar(pos)); |
1484 | } |
1485 | |
1486 | void Document::DelCharBack(Sci::Position pos) { |
1487 | if (pos <= 0) { |
1488 | return; |
1489 | } else if (IsCrLf(pos - 2)) { |
1490 | DeleteChars(pos - 2, 2); |
1491 | } else if (dbcsCodePage) { |
1492 | const Sci::Position startChar = NextPosition(pos, -1); |
1493 | DeleteChars(startChar, pos - startChar); |
1494 | } else { |
1495 | DeleteChars(pos - 1, 1); |
1496 | } |
1497 | } |
1498 | |
1499 | static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept { |
1500 | return ((pos / tabSize) + 1) * tabSize; |
1501 | } |
1502 | |
1503 | static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) { |
1504 | std::string indentation; |
1505 | if (!insertSpaces) { |
1506 | while (indent >= tabSize) { |
1507 | indentation += '\t'; |
1508 | indent -= tabSize; |
1509 | } |
1510 | } |
1511 | while (indent > 0) { |
1512 | indentation += ' '; |
1513 | indent--; |
1514 | } |
1515 | return indentation; |
1516 | } |
1517 | |
1518 | int SCI_METHOD Document::GetLineIndentation(Sci_Position line) { |
1519 | int indent = 0; |
1520 | if ((line >= 0) && (line < LinesTotal())) { |
1521 | const Sci::Position lineStart = LineStart(line); |
1522 | const Sci::Position length = Length(); |
1523 | for (Sci::Position i = lineStart; i < length; i++) { |
1524 | const char ch = cb.CharAt(i); |
1525 | if (ch == ' ') |
1526 | indent++; |
1527 | else if (ch == '\t') |
1528 | indent = static_cast<int>(NextTab(indent, tabInChars)); |
1529 | else |
1530 | return indent; |
1531 | } |
1532 | } |
1533 | return indent; |
1534 | } |
1535 | |
1536 | Sci::Position Document::SetLineIndentation(Sci::Line line, Sci::Position indent) { |
1537 | const int indentOfLine = GetLineIndentation(line); |
1538 | if (indent < 0) |
1539 | indent = 0; |
1540 | if (indent != indentOfLine) { |
1541 | std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs); |
1542 | const Sci::Position thisLineStart = LineStart(line); |
1543 | const Sci::Position indentPos = GetLineIndentPosition(line); |
1544 | UndoGroup ug(this); |
1545 | DeleteChars(thisLineStart, indentPos - thisLineStart); |
1546 | return thisLineStart + InsertString(thisLineStart, linebuf.c_str(), |
1547 | linebuf.length()); |
1548 | } else { |
1549 | return GetLineIndentPosition(line); |
1550 | } |
1551 | } |
1552 | |
1553 | Sci::Position Document::GetLineIndentPosition(Sci::Line line) const { |
1554 | if (line < 0) |
1555 | return 0; |
1556 | Sci::Position pos = LineStart(line); |
1557 | const Sci::Position length = Length(); |
1558 | while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) { |
1559 | pos++; |
1560 | } |
1561 | return pos; |
1562 | } |
1563 | |
1564 | Sci::Position Document::GetColumn(Sci::Position pos) { |
1565 | Sci::Position column = 0; |
1566 | const Sci::Line line = SciLineFromPosition(pos); |
1567 | if ((line >= 0) && (line < LinesTotal())) { |
1568 | for (Sci::Position i = LineStart(line); i < pos;) { |
1569 | const char ch = cb.CharAt(i); |
1570 | if (ch == '\t') { |
1571 | column = NextTab(column, tabInChars); |
1572 | i++; |
1573 | } else if (ch == '\r') { |
1574 | return column; |
1575 | } else if (ch == '\n') { |
1576 | return column; |
1577 | } else if (i >= Length()) { |
1578 | return column; |
1579 | } else { |
1580 | column++; |
1581 | i = NextPosition(i, 1); |
1582 | } |
1583 | } |
1584 | } |
1585 | return column; |
1586 | } |
1587 | |
1588 | Sci::Position Document::CountCharacters(Sci::Position startPos, Sci::Position endPos) const noexcept { |
1589 | startPos = MovePositionOutsideChar(startPos, 1, false); |
1590 | endPos = MovePositionOutsideChar(endPos, -1, false); |
1591 | Sci::Position count = 0; |
1592 | Sci::Position i = startPos; |
1593 | while (i < endPos) { |
1594 | count++; |
1595 | i = NextPosition(i, 1); |
1596 | } |
1597 | return count; |
1598 | } |
1599 | |
1600 | Sci::Position Document::CountUTF16(Sci::Position startPos, Sci::Position endPos) const noexcept { |
1601 | startPos = MovePositionOutsideChar(startPos, 1, false); |
1602 | endPos = MovePositionOutsideChar(endPos, -1, false); |
1603 | Sci::Position count = 0; |
1604 | Sci::Position i = startPos; |
1605 | while (i < endPos) { |
1606 | count++; |
1607 | const Sci::Position next = NextPosition(i, 1); |
1608 | if ((next - i) > 3) |
1609 | count++; |
1610 | i = next; |
1611 | } |
1612 | return count; |
1613 | } |
1614 | |
1615 | Sci::Position Document::FindColumn(Sci::Line line, Sci::Position column) { |
1616 | Sci::Position position = LineStart(line); |
1617 | if ((line >= 0) && (line < LinesTotal())) { |
1618 | Sci::Position columnCurrent = 0; |
1619 | while ((columnCurrent < column) && (position < Length())) { |
1620 | const char ch = cb.CharAt(position); |
1621 | if (ch == '\t') { |
1622 | columnCurrent = NextTab(columnCurrent, tabInChars); |
1623 | if (columnCurrent > column) |
1624 | return position; |
1625 | position++; |
1626 | } else if (ch == '\r') { |
1627 | return position; |
1628 | } else if (ch == '\n') { |
1629 | return position; |
1630 | } else { |
1631 | columnCurrent++; |
1632 | position = NextPosition(position, 1); |
1633 | } |
1634 | } |
1635 | } |
1636 | return position; |
1637 | } |
1638 | |
1639 | void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) { |
1640 | // Dedent - suck white space off the front of the line to dedent by equivalent of a tab |
1641 | for (Sci::Line line = lineBottom; line >= lineTop; line--) { |
1642 | const Sci::Position indentOfLine = GetLineIndentation(line); |
1643 | if (forwards) { |
1644 | if (LineStart(line) < LineEnd(line)) { |
1645 | SetLineIndentation(line, indentOfLine + IndentSize()); |
1646 | } |
1647 | } else { |
1648 | SetLineIndentation(line, indentOfLine - IndentSize()); |
1649 | } |
1650 | } |
1651 | } |
1652 | |
1653 | // Convert line endings for a piece of text to a particular mode. |
1654 | // Stop at len or when a NUL is found. |
1655 | std::string Document::TransformLineEnds(const char *s, size_t len, EndOfLine eolModeWanted) { |
1656 | std::string dest; |
1657 | for (size_t i = 0; (i < len) && (s[i]); i++) { |
1658 | if (s[i] == '\n' || s[i] == '\r') { |
1659 | if (eolModeWanted == EndOfLine::Cr) { |
1660 | dest.push_back('\r'); |
1661 | } else if (eolModeWanted == EndOfLine::Lf) { |
1662 | dest.push_back('\n'); |
1663 | } else { // eolModeWanted == EndOfLine::CrLf |
1664 | dest.push_back('\r'); |
1665 | dest.push_back('\n'); |
1666 | } |
1667 | if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) { |
1668 | i++; |
1669 | } |
1670 | } else { |
1671 | dest.push_back(s[i]); |
1672 | } |
1673 | } |
1674 | return dest; |
1675 | } |
1676 | |
1677 | void Document::ConvertLineEnds(EndOfLine eolModeSet) { |
1678 | UndoGroup ug(this); |
1679 | |
1680 | for (Sci::Position pos = 0; pos < Length(); pos++) { |
1681 | if (cb.CharAt(pos) == '\r') { |
1682 | if (cb.CharAt(pos + 1) == '\n') { |
1683 | // CRLF |
1684 | if (eolModeSet == EndOfLine::Cr) { |
1685 | DeleteChars(pos + 1, 1); // Delete the LF |
1686 | } else if (eolModeSet == EndOfLine::Lf) { |
1687 | DeleteChars(pos, 1); // Delete the CR |
1688 | } else { |
1689 | pos++; |
1690 | } |
1691 | } else { |
1692 | // CR |
1693 | if (eolModeSet == EndOfLine::CrLf) { |
1694 | pos += InsertString(pos + 1, "\n" , 1); // Insert LF |
1695 | } else if (eolModeSet == EndOfLine::Lf) { |
1696 | pos += InsertString(pos, "\n" , 1); // Insert LF |
1697 | DeleteChars(pos, 1); // Delete CR |
1698 | pos--; |
1699 | } |
1700 | } |
1701 | } else if (cb.CharAt(pos) == '\n') { |
1702 | // LF |
1703 | if (eolModeSet == EndOfLine::CrLf) { |
1704 | pos += InsertString(pos, "\r" , 1); // Insert CR |
1705 | } else if (eolModeSet == EndOfLine::Cr) { |
1706 | pos += InsertString(pos, "\r" , 1); // Insert CR |
1707 | DeleteChars(pos, 1); // Delete LF |
1708 | pos--; |
1709 | } |
1710 | } |
1711 | } |
1712 | |
1713 | } |
1714 | |
1715 | DocumentOption Document::Options() const noexcept { |
1716 | return (IsLarge() ? DocumentOption::TextLarge : DocumentOption::Default) | |
1717 | (cb.HasStyles() ? DocumentOption::Default : DocumentOption::StylesNone); |
1718 | } |
1719 | |
1720 | bool Document::IsWhiteLine(Sci::Line line) const { |
1721 | Sci::Position currentChar = LineStart(line); |
1722 | const Sci::Position endLine = LineEnd(line); |
1723 | while (currentChar < endLine) { |
1724 | if (!IsSpaceOrTab(cb.CharAt(currentChar))) { |
1725 | return false; |
1726 | } |
1727 | ++currentChar; |
1728 | } |
1729 | return true; |
1730 | } |
1731 | |
1732 | Sci::Position Document::ParaUp(Sci::Position pos) const { |
1733 | Sci::Line line = SciLineFromPosition(pos); |
1734 | line--; |
1735 | while (line >= 0 && IsWhiteLine(line)) { // skip empty lines |
1736 | line--; |
1737 | } |
1738 | while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines |
1739 | line--; |
1740 | } |
1741 | line++; |
1742 | return LineStart(line); |
1743 | } |
1744 | |
1745 | Sci::Position Document::ParaDown(Sci::Position pos) const { |
1746 | Sci::Line line = SciLineFromPosition(pos); |
1747 | while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines |
1748 | line++; |
1749 | } |
1750 | while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines |
1751 | line++; |
1752 | } |
1753 | if (line < LinesTotal()) |
1754 | return LineStart(line); |
1755 | else // end of a document |
1756 | return LineEnd(line-1); |
1757 | } |
1758 | |
1759 | CharacterClass Document::WordCharacterClass(unsigned int ch) const { |
1760 | if (dbcsCodePage && (ch >= 0x80)) { |
1761 | if (CpUtf8 == dbcsCodePage) { |
1762 | // Use hard coded Unicode class |
1763 | const CharacterCategory cc = charMap.CategoryFor(ch); |
1764 | switch (cc) { |
1765 | |
1766 | // Separator, Line/Paragraph |
1767 | case ccZl: |
1768 | case ccZp: |
1769 | return CharacterClass::newLine; |
1770 | |
1771 | // Separator, Space |
1772 | case ccZs: |
1773 | // Other |
1774 | case ccCc: |
1775 | case ccCf: |
1776 | case ccCs: |
1777 | case ccCo: |
1778 | case ccCn: |
1779 | return CharacterClass::space; |
1780 | |
1781 | // Letter |
1782 | case ccLu: |
1783 | case ccLl: |
1784 | case ccLt: |
1785 | case ccLm: |
1786 | case ccLo: |
1787 | // Number |
1788 | case ccNd: |
1789 | case ccNl: |
1790 | case ccNo: |
1791 | // Mark - includes combining diacritics |
1792 | case ccMn: |
1793 | case ccMc: |
1794 | case ccMe: |
1795 | return CharacterClass::word; |
1796 | |
1797 | // Punctuation |
1798 | case ccPc: |
1799 | case ccPd: |
1800 | case ccPs: |
1801 | case ccPe: |
1802 | case ccPi: |
1803 | case ccPf: |
1804 | case ccPo: |
1805 | // Symbol |
1806 | case ccSm: |
1807 | case ccSc: |
1808 | case ccSk: |
1809 | case ccSo: |
1810 | return CharacterClass::punctuation; |
1811 | |
1812 | } |
1813 | } else { |
1814 | // Asian DBCS |
1815 | return CharacterClass::word; |
1816 | } |
1817 | } |
1818 | return charClass.GetClass(static_cast<unsigned char>(ch)); |
1819 | } |
1820 | |
1821 | /** |
1822 | * Used by commands that want to select whole words. |
1823 | * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0. |
1824 | */ |
1825 | Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const { |
1826 | CharacterClass ccStart = CharacterClass::word; |
1827 | if (delta < 0) { |
1828 | if (!onlyWordCharacters) { |
1829 | const CharacterExtracted ce = CharacterBefore(pos); |
1830 | ccStart = WordCharacterClass(ce.character); |
1831 | } |
1832 | while (pos > 0) { |
1833 | const CharacterExtracted ce = CharacterBefore(pos); |
1834 | if (WordCharacterClass(ce.character) != ccStart) |
1835 | break; |
1836 | pos -= ce.widthBytes; |
1837 | } |
1838 | } else { |
1839 | if (!onlyWordCharacters && pos < LengthNoExcept()) { |
1840 | const CharacterExtracted ce = CharacterAfter(pos); |
1841 | ccStart = WordCharacterClass(ce.character); |
1842 | } |
1843 | while (pos < LengthNoExcept()) { |
1844 | const CharacterExtracted ce = CharacterAfter(pos); |
1845 | if (WordCharacterClass(ce.character) != ccStart) |
1846 | break; |
1847 | pos += ce.widthBytes; |
1848 | } |
1849 | } |
1850 | return MovePositionOutsideChar(pos, delta, true); |
1851 | } |
1852 | |
1853 | /** |
1854 | * Find the start of the next word in either a forward (delta >= 0) or backwards direction |
1855 | * (delta < 0). |
1856 | * This is looking for a transition between character classes although there is also some |
1857 | * additional movement to transit white space. |
1858 | * Used by cursor movement by word commands. |
1859 | */ |
1860 | Sci::Position Document::NextWordStart(Sci::Position pos, int delta) const { |
1861 | if (delta < 0) { |
1862 | while (pos > 0) { |
1863 | const CharacterExtracted ce = CharacterBefore(pos); |
1864 | if (WordCharacterClass(ce.character) != CharacterClass::space) |
1865 | break; |
1866 | pos -= ce.widthBytes; |
1867 | } |
1868 | if (pos > 0) { |
1869 | CharacterExtracted ce = CharacterBefore(pos); |
1870 | const CharacterClass ccStart = WordCharacterClass(ce.character); |
1871 | while (pos > 0) { |
1872 | ce = CharacterBefore(pos); |
1873 | if (WordCharacterClass(ce.character) != ccStart) |
1874 | break; |
1875 | pos -= ce.widthBytes; |
1876 | } |
1877 | } |
1878 | } else { |
1879 | CharacterExtracted ce = CharacterAfter(pos); |
1880 | const CharacterClass ccStart = WordCharacterClass(ce.character); |
1881 | while (pos < LengthNoExcept()) { |
1882 | ce = CharacterAfter(pos); |
1883 | if (WordCharacterClass(ce.character) != ccStart) |
1884 | break; |
1885 | pos += ce.widthBytes; |
1886 | } |
1887 | while (pos < LengthNoExcept()) { |
1888 | ce = CharacterAfter(pos); |
1889 | if (WordCharacterClass(ce.character) != CharacterClass::space) |
1890 | break; |
1891 | pos += ce.widthBytes; |
1892 | } |
1893 | } |
1894 | return pos; |
1895 | } |
1896 | |
1897 | /** |
1898 | * Find the end of the next word in either a forward (delta >= 0) or backwards direction |
1899 | * (delta < 0). |
1900 | * This is looking for a transition between character classes although there is also some |
1901 | * additional movement to transit white space. |
1902 | * Used by cursor movement by word commands. |
1903 | */ |
1904 | Sci::Position Document::NextWordEnd(Sci::Position pos, int delta) const { |
1905 | if (delta < 0) { |
1906 | if (pos > 0) { |
1907 | CharacterExtracted ce = CharacterBefore(pos); |
1908 | const CharacterClass ccStart = WordCharacterClass(ce.character); |
1909 | if (ccStart != CharacterClass::space) { |
1910 | while (pos > 0) { |
1911 | ce = CharacterBefore(pos); |
1912 | if (WordCharacterClass(ce.character) != ccStart) |
1913 | break; |
1914 | pos -= ce.widthBytes; |
1915 | } |
1916 | } |
1917 | while (pos > 0) { |
1918 | ce = CharacterBefore(pos); |
1919 | if (WordCharacterClass(ce.character) != CharacterClass::space) |
1920 | break; |
1921 | pos -= ce.widthBytes; |
1922 | } |
1923 | } |
1924 | } else { |
1925 | while (pos < LengthNoExcept()) { |
1926 | const CharacterExtracted ce = CharacterAfter(pos); |
1927 | if (WordCharacterClass(ce.character) != CharacterClass::space) |
1928 | break; |
1929 | pos += ce.widthBytes; |
1930 | } |
1931 | if (pos < LengthNoExcept()) { |
1932 | CharacterExtracted ce = CharacterAfter(pos); |
1933 | const CharacterClass ccStart = WordCharacterClass(ce.character); |
1934 | while (pos < LengthNoExcept()) { |
1935 | ce = CharacterAfter(pos); |
1936 | if (WordCharacterClass(ce.character) != ccStart) |
1937 | break; |
1938 | pos += ce.widthBytes; |
1939 | } |
1940 | } |
1941 | } |
1942 | return pos; |
1943 | } |
1944 | |
1945 | namespace { |
1946 | |
1947 | constexpr bool IsWordEdge(CharacterClass cc, CharacterClass ccNext) noexcept { |
1948 | return (cc != ccNext) && |
1949 | (cc == CharacterClass::word || cc == CharacterClass::punctuation); |
1950 | } |
1951 | |
1952 | } |
1953 | |
1954 | /** |
1955 | * Check that the character at the given position is a word or punctuation character and that |
1956 | * the previous character is of a different character class. |
1957 | */ |
1958 | bool Document::IsWordStartAt(Sci::Position pos) const { |
1959 | if (pos >= LengthNoExcept()) |
1960 | return false; |
1961 | if (pos >= 0) { |
1962 | const CharacterExtracted cePos = CharacterAfter(pos); |
1963 | // At start of document, treat as if space before so can be word start |
1964 | const CharacterExtracted cePrev = (pos > 0) ? |
1965 | CharacterBefore(pos) : CharacterExtracted(' ', 1); |
1966 | return IsWordEdge(WordCharacterClass(cePos.character), WordCharacterClass(cePrev.character)); |
1967 | } |
1968 | return true; |
1969 | } |
1970 | |
1971 | /** |
1972 | * Check that the character before the given position is a word or punctuation character and that |
1973 | * the next character is of a different character class. |
1974 | */ |
1975 | bool Document::IsWordEndAt(Sci::Position pos) const { |
1976 | if (pos <= 0) |
1977 | return false; |
1978 | if (pos <= LengthNoExcept()) { |
1979 | // At end of document, treat as if space after so can be word end |
1980 | const CharacterExtracted cePos = (pos < LengthNoExcept()) ? |
1981 | CharacterAfter(pos) : CharacterExtracted(' ', 1); |
1982 | const CharacterExtracted cePrev = CharacterBefore(pos); |
1983 | return IsWordEdge(WordCharacterClass(cePrev.character), WordCharacterClass(cePos.character)); |
1984 | } |
1985 | return true; |
1986 | } |
1987 | |
1988 | /** |
1989 | * Check that the given range is has transitions between character classes at both |
1990 | * ends and where the characters on the inside are word or punctuation characters. |
1991 | */ |
1992 | bool Document::IsWordAt(Sci::Position start, Sci::Position end) const { |
1993 | return (start < end) && IsWordStartAt(start) && IsWordEndAt(end); |
1994 | } |
1995 | |
1996 | bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const { |
1997 | return (!word && !wordStart) || |
1998 | (word && IsWordAt(pos, pos + length)) || |
1999 | (wordStart && IsWordStartAt(pos)); |
2000 | } |
2001 | |
2002 | bool Document::HasCaseFolder() const noexcept { |
2003 | return pcf != nullptr; |
2004 | } |
2005 | |
2006 | void Document::SetCaseFolder(std::unique_ptr<CaseFolder> pcf_) noexcept { |
2007 | pcf = std::move(pcf_); |
2008 | } |
2009 | |
2010 | Document::CharacterExtracted Document::(Sci::Position position) const noexcept { |
2011 | const unsigned char leadByte = cb.UCharAt(position); |
2012 | if (UTF8IsAscii(leadByte)) { |
2013 | // Common case: ASCII character |
2014 | return CharacterExtracted(leadByte, 1); |
2015 | } |
2016 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
2017 | unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; |
2018 | for (int b=1; b<widthCharBytes; b++) |
2019 | charBytes[b] = cb.UCharAt(position + b); |
2020 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
2021 | if (utf8status & UTF8MaskInvalid) { |
2022 | // Treat as invalid and use up just one byte |
2023 | return CharacterExtracted(unicodeReplacementChar, 1); |
2024 | } else { |
2025 | return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); |
2026 | } |
2027 | } |
2028 | |
2029 | namespace { |
2030 | |
2031 | // Equivalent of memchr over the split view |
2032 | ptrdiff_t SplitFindChar(const SplitView &view, size_t start, size_t length, int ch) noexcept { |
2033 | size_t range1Length = 0; |
2034 | if (start < view.length1) { |
2035 | range1Length = std::min(length, view.length1 - start); |
2036 | const char *match = static_cast<const char *>(memchr(view.segment1 + start, ch, range1Length)); |
2037 | if (match) { |
2038 | return match - view.segment1; |
2039 | } |
2040 | start += range1Length; |
2041 | } |
2042 | const char *match2 = static_cast<const char *>(memchr(view.segment2 + start, ch, length - range1Length)); |
2043 | if (match2) { |
2044 | return match2 - view.segment2; |
2045 | } |
2046 | return -1; |
2047 | } |
2048 | |
2049 | // Equivalent of memcmp over the split view |
2050 | // This does not call memcmp as search texts are commonly too short to overcome the |
2051 | // call overhead. |
2052 | bool SplitMatch(const SplitView &view, size_t start, std::string_view text) noexcept { |
2053 | for (size_t i = 0; i < text.length(); i++) { |
2054 | if (view.CharAt(i + start) != text[i]) { |
2055 | return false; |
2056 | } |
2057 | } |
2058 | return true; |
2059 | } |
2060 | |
2061 | } |
2062 | |
2063 | /** |
2064 | * Find text in document, supporting both forward and backward |
2065 | * searches (just pass minPos > maxPos to do a backward search) |
2066 | * Has not been tested with backwards DBCS searches yet. |
2067 | */ |
2068 | Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, const char *search, |
2069 | FindOption flags, Sci::Position *length) { |
2070 | if (*length <= 0) |
2071 | return minPos; |
2072 | const bool caseSensitive = FlagSet(flags, FindOption::MatchCase); |
2073 | const bool word = FlagSet(flags, FindOption::WholeWord); |
2074 | const bool wordStart = FlagSet(flags, FindOption::WordStart); |
2075 | const bool regExp = FlagSet(flags, FindOption::RegExp); |
2076 | if (regExp) { |
2077 | if (!regex) |
2078 | regex = std::unique_ptr<RegexSearchBase>(CreateRegexSearch(&charClass)); |
2079 | return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length); |
2080 | } else { |
2081 | |
2082 | const bool forward = minPos <= maxPos; |
2083 | const int increment = forward ? 1 : -1; |
2084 | |
2085 | // Range endpoints should not be inside DBCS characters, but just in case, move them. |
2086 | const Sci::Position startPos = MovePositionOutsideChar(minPos, increment, false); |
2087 | const Sci::Position endPos = MovePositionOutsideChar(maxPos, increment, false); |
2088 | |
2089 | // Compute actual search ranges needed |
2090 | const Sci::Position lengthFind = *length; |
2091 | |
2092 | //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind); |
2093 | const Sci::Position limitPos = std::max(startPos, endPos); |
2094 | Sci::Position pos = startPos; |
2095 | if (!forward) { |
2096 | // Back all of a character |
2097 | pos = NextPosition(pos, increment); |
2098 | } |
2099 | const SplitView cbView = cb.AllView(); |
2100 | if (caseSensitive) { |
2101 | const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos; |
2102 | const unsigned char charStartSearch = search[0]; |
2103 | if (forward && ((0 == dbcsCodePage) || (CpUtf8 == dbcsCodePage && !UTF8IsTrailByte(charStartSearch)))) { |
2104 | // This is a fast case where there is no need to test byte values to iterate |
2105 | // so becomes the equivalent of a memchr+memcmp loop. |
2106 | // UTF-8 search will not be self-synchronizing when starts with trail byte |
2107 | const std::string_view suffix(search + 1, lengthFind - 1); |
2108 | while (pos < endSearch) { |
2109 | pos = SplitFindChar(cbView, pos, limitPos - pos, charStartSearch); |
2110 | if (pos < 0) { |
2111 | break; |
2112 | } |
2113 | if (SplitMatch(cbView, pos + 1, suffix) && MatchesWordOptions(word, wordStart, pos, lengthFind)) { |
2114 | return pos; |
2115 | } |
2116 | pos++; |
2117 | } |
2118 | } else { |
2119 | while (forward ? (pos < endSearch) : (pos >= endSearch)) { |
2120 | const unsigned char leadByte = cbView.CharAt(pos); |
2121 | if (leadByte == charStartSearch) { |
2122 | bool found = (pos + lengthFind) <= limitPos; |
2123 | // SplitMatch could be called here but it is slower with g++ -O2 |
2124 | for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) { |
2125 | found = cbView.CharAt(pos + indexSearch) == search[indexSearch]; |
2126 | } |
2127 | if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) { |
2128 | return pos; |
2129 | } |
2130 | } |
2131 | if (forward && UTF8IsAscii(leadByte)) { |
2132 | pos++; |
2133 | } else { |
2134 | if (dbcsCodePage) { |
2135 | if (!NextCharacter(pos, increment)) { |
2136 | break; |
2137 | } |
2138 | } else { |
2139 | pos += increment; |
2140 | } |
2141 | } |
2142 | } |
2143 | } |
2144 | } else if (CpUtf8 == dbcsCodePage) { |
2145 | constexpr size_t maxFoldingExpansion = 4; |
2146 | std::vector<char> searchThing((lengthFind+1) * UTF8MaxBytes * maxFoldingExpansion + 1); |
2147 | const size_t lenSearch = |
2148 | pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); |
2149 | while (forward ? (pos < endPos) : (pos >= endPos)) { |
2150 | int widthFirstCharacter = 0; |
2151 | Sci::Position posIndexDocument = pos; |
2152 | size_t indexSearch = 0; |
2153 | bool characterMatches = true; |
2154 | for (;;) { |
2155 | const unsigned char leadByte = cbView.CharAt(posIndexDocument); |
2156 | char bytes[UTF8MaxBytes + 1]; |
2157 | int widthChar = 1; |
2158 | if (!UTF8IsAscii(leadByte)) { |
2159 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
2160 | bytes[0] = leadByte; |
2161 | for (int b=1; b<widthCharBytes; b++) { |
2162 | bytes[b] = cbView.CharAt(posIndexDocument+b); |
2163 | } |
2164 | widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth; |
2165 | } |
2166 | if (!widthFirstCharacter) { |
2167 | widthFirstCharacter = widthChar; |
2168 | } |
2169 | if ((posIndexDocument + widthChar) > limitPos) { |
2170 | break; |
2171 | } |
2172 | size_t lenFlat = 1; |
2173 | if (widthChar == 1) { |
2174 | characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte); |
2175 | } else { |
2176 | char folded[UTF8MaxBytes * maxFoldingExpansion + 1]; |
2177 | lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar); |
2178 | // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing |
2179 | assert((indexSearch + lenFlat) <= searchThing.size()); |
2180 | // Does folded match the buffer |
2181 | characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat); |
2182 | } |
2183 | if (!characterMatches) { |
2184 | break; |
2185 | } |
2186 | posIndexDocument += widthChar; |
2187 | indexSearch += lenFlat; |
2188 | if (indexSearch >= lenSearch) { |
2189 | break; |
2190 | } |
2191 | } |
2192 | if (characterMatches && (indexSearch == lenSearch)) { |
2193 | if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) { |
2194 | *length = posIndexDocument - pos; |
2195 | return pos; |
2196 | } |
2197 | } |
2198 | if (forward) { |
2199 | pos += widthFirstCharacter; |
2200 | } else { |
2201 | if (!NextCharacter(pos, increment)) { |
2202 | break; |
2203 | } |
2204 | } |
2205 | } |
2206 | } else if (dbcsCodePage) { |
2207 | constexpr size_t maxBytesCharacter = 2; |
2208 | constexpr size_t maxFoldingExpansion = 4; |
2209 | std::vector<char> searchThing((lengthFind+1) * maxBytesCharacter * maxFoldingExpansion + 1); |
2210 | const size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); |
2211 | while (forward ? (pos < endPos) : (pos >= endPos)) { |
2212 | int widthFirstCharacter = 0; |
2213 | Sci::Position indexDocument = 0; |
2214 | size_t indexSearch = 0; |
2215 | bool characterMatches = true; |
2216 | while (((pos + indexDocument) < limitPos) && |
2217 | (indexSearch < lenSearch)) { |
2218 | const unsigned char leadByte = cbView.CharAt(pos + indexDocument); |
2219 | const int widthChar = (!UTF8IsAscii(leadByte) && IsDBCSLeadByteNoExcept(leadByte)) ? 2 : 1; |
2220 | if (!widthFirstCharacter) { |
2221 | widthFirstCharacter = widthChar; |
2222 | } |
2223 | if ((pos + indexDocument + widthChar) > limitPos) { |
2224 | break; |
2225 | } |
2226 | size_t lenFlat = 1; |
2227 | if (widthChar == 1) { |
2228 | characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte); |
2229 | } else { |
2230 | char bytes[maxBytesCharacter + 1]; |
2231 | bytes[0] = leadByte; |
2232 | bytes[1] = cbView.CharAt(pos + indexDocument + 1); |
2233 | char folded[maxBytesCharacter * maxFoldingExpansion + 1]; |
2234 | lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar); |
2235 | // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing |
2236 | assert((indexSearch + lenFlat) <= searchThing.size()); |
2237 | // Does folded match the buffer |
2238 | characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat); |
2239 | } |
2240 | if (!characterMatches) { |
2241 | break; |
2242 | } |
2243 | indexDocument += widthChar; |
2244 | indexSearch += lenFlat; |
2245 | } |
2246 | if (characterMatches && (indexSearch == lenSearch)) { |
2247 | if (MatchesWordOptions(word, wordStart, pos, indexDocument)) { |
2248 | *length = indexDocument; |
2249 | return pos; |
2250 | } |
2251 | } |
2252 | if (forward) { |
2253 | pos += widthFirstCharacter; |
2254 | } else { |
2255 | if (!NextCharacter(pos, increment)) { |
2256 | break; |
2257 | } |
2258 | } |
2259 | } |
2260 | } else { |
2261 | const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos; |
2262 | std::vector<char> searchThing(lengthFind + 1); |
2263 | pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); |
2264 | while (forward ? (pos < endSearch) : (pos >= endSearch)) { |
2265 | bool found = (pos + lengthFind) <= limitPos; |
2266 | for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) { |
2267 | const char ch = cbView.CharAt(pos + indexSearch); |
2268 | const char chTest = searchThing[indexSearch]; |
2269 | if (UTF8IsAscii(ch)) { |
2270 | found = chTest == MakeLowerCase(ch); |
2271 | } else { |
2272 | char folded[2]; |
2273 | pcf->Fold(folded, sizeof(folded), &ch, 1); |
2274 | found = folded[0] == chTest; |
2275 | } |
2276 | } |
2277 | if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) { |
2278 | return pos; |
2279 | } |
2280 | pos += increment; |
2281 | } |
2282 | } |
2283 | } |
2284 | //Platform::DebugPrintf("Not found\n"); |
2285 | return -1; |
2286 | } |
2287 | |
2288 | const char *Document::SubstituteByPosition(const char *text, Sci::Position *length) { |
2289 | if (regex) |
2290 | return regex->SubstituteByPosition(this, text, length); |
2291 | else |
2292 | return nullptr; |
2293 | } |
2294 | |
2295 | LineCharacterIndexType Document::LineCharacterIndex() const noexcept { |
2296 | return cb.LineCharacterIndex(); |
2297 | } |
2298 | |
2299 | void Document::AllocateLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) { |
2300 | return cb.AllocateLineCharacterIndex(lineCharacterIndex); |
2301 | } |
2302 | |
2303 | void Document::ReleaseLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) { |
2304 | return cb.ReleaseLineCharacterIndex(lineCharacterIndex); |
2305 | } |
2306 | |
2307 | Sci::Line Document::LinesTotal() const noexcept { |
2308 | return cb.Lines(); |
2309 | } |
2310 | |
2311 | void Document::AllocateLines(Sci::Line lines) { |
2312 | cb.AllocateLines(lines); |
2313 | } |
2314 | |
2315 | void Document::SetDefaultCharClasses(bool includeWordClass) { |
2316 | charClass.SetDefaultCharClasses(includeWordClass); |
2317 | } |
2318 | |
2319 | void Document::SetCharClasses(const unsigned char *chars, CharacterClass newCharClass) { |
2320 | charClass.SetCharClasses(chars, newCharClass); |
2321 | } |
2322 | |
2323 | int Document::GetCharsOfClass(CharacterClass characterClass, unsigned char *buffer) const { |
2324 | return charClass.GetCharsOfClass(characterClass, buffer); |
2325 | } |
2326 | |
2327 | void Document::SetCharacterCategoryOptimization(int countCharacters) { |
2328 | charMap.Optimize(countCharacters); |
2329 | } |
2330 | |
2331 | int Document::CharacterCategoryOptimization() const noexcept { |
2332 | return charMap.Size(); |
2333 | } |
2334 | |
2335 | void SCI_METHOD Document::StartStyling(Sci_Position position) { |
2336 | endStyled = position; |
2337 | } |
2338 | |
2339 | bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) { |
2340 | if (enteredStyling != 0) { |
2341 | return false; |
2342 | } else { |
2343 | enteredStyling++; |
2344 | const Sci::Position prevEndStyled = endStyled; |
2345 | if (cb.SetStyleFor(endStyled, length, style)) { |
2346 | const DocModification mh(ModificationFlags::ChangeStyle | ModificationFlags::User, |
2347 | prevEndStyled, length); |
2348 | NotifyModified(mh); |
2349 | } |
2350 | endStyled += length; |
2351 | enteredStyling--; |
2352 | return true; |
2353 | } |
2354 | } |
2355 | |
2356 | bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) { |
2357 | if (enteredStyling != 0) { |
2358 | return false; |
2359 | } else { |
2360 | enteredStyling++; |
2361 | bool didChange = false; |
2362 | Sci::Position startMod = 0; |
2363 | Sci::Position endMod = 0; |
2364 | for (int iPos = 0; iPos < length; iPos++, endStyled++) { |
2365 | PLATFORM_ASSERT(endStyled < Length()); |
2366 | if (cb.SetStyleAt(endStyled, styles[iPos])) { |
2367 | if (!didChange) { |
2368 | startMod = endStyled; |
2369 | } |
2370 | didChange = true; |
2371 | endMod = endStyled; |
2372 | } |
2373 | } |
2374 | if (didChange) { |
2375 | const DocModification mh(ModificationFlags::ChangeStyle | ModificationFlags::User, |
2376 | startMod, endMod - startMod + 1); |
2377 | NotifyModified(mh); |
2378 | } |
2379 | enteredStyling--; |
2380 | return true; |
2381 | } |
2382 | } |
2383 | |
2384 | void Document::EnsureStyledTo(Sci::Position pos) { |
2385 | if ((enteredStyling == 0) && (pos > GetEndStyled())) { |
2386 | IncrementStyleClock(); |
2387 | if (pli && !pli->UseContainerLexing()) { |
2388 | const Sci::Line lineEndStyled = SciLineFromPosition(GetEndStyled()); |
2389 | const Sci::Position endStyledTo = LineStart(lineEndStyled); |
2390 | pli->Colourise(endStyledTo, pos); |
2391 | } else { |
2392 | // Ask the watchers to style, and stop as soon as one responds. |
2393 | for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); |
2394 | (pos > GetEndStyled()) && (it != watchers.end()); ++it) { |
2395 | it->watcher->NotifyStyleNeeded(this, it->userData, pos); |
2396 | } |
2397 | } |
2398 | } |
2399 | } |
2400 | |
2401 | void Document::StyleToAdjustingLineDuration(Sci::Position pos) { |
2402 | const Sci::Position stylingStart = GetEndStyled(); |
2403 | ElapsedPeriod epStyling; |
2404 | EnsureStyledTo(pos); |
2405 | durationStyleOneByte.AddSample(pos - stylingStart, epStyling.Duration()); |
2406 | } |
2407 | |
2408 | void Document::LexerChanged() { |
2409 | // Tell the watchers the lexer has changed. |
2410 | for (const WatcherWithUserData &watcher : watchers) { |
2411 | watcher.watcher->NotifyLexerChanged(this, watcher.userData); |
2412 | } |
2413 | } |
2414 | |
2415 | LexInterface *Document::GetLexInterface() const noexcept { |
2416 | return pli.get(); |
2417 | } |
2418 | |
2419 | void Document::SetLexInterface(std::unique_ptr<LexInterface> pLexInterface) noexcept { |
2420 | pli = std::move(pLexInterface); |
2421 | } |
2422 | |
2423 | int SCI_METHOD Document::SetLineState(Sci_Position line, int state) { |
2424 | const int statePrevious = States()->SetLineState(line, state); |
2425 | if (state != statePrevious) { |
2426 | const DocModification mh(ModificationFlags::ChangeLineState, LineStart(line), 0, 0, nullptr, |
2427 | static_cast<Sci::Line>(line)); |
2428 | NotifyModified(mh); |
2429 | } |
2430 | return statePrevious; |
2431 | } |
2432 | |
2433 | int SCI_METHOD Document::GetLineState(Sci_Position line) const { |
2434 | return States()->GetLineState(line); |
2435 | } |
2436 | |
2437 | Sci::Line Document::GetMaxLineState() const noexcept { |
2438 | return States()->GetMaxLineState(); |
2439 | } |
2440 | |
2441 | void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) { |
2442 | const DocModification mh(ModificationFlags::LexerState, start, |
2443 | end-start, 0, nullptr, 0); |
2444 | NotifyModified(mh); |
2445 | } |
2446 | |
2447 | StyledText Document::MarginStyledText(Sci::Line line) const noexcept { |
2448 | const LineAnnotation *pla = Margins(); |
2449 | return StyledText(pla->Length(line), pla->Text(line), |
2450 | pla->MultipleStyles(line), pla->Style(line), pla->Styles(line)); |
2451 | } |
2452 | |
2453 | void Document::MarginSetText(Sci::Line line, const char *text) { |
2454 | Margins()->SetText(line, text); |
2455 | const DocModification mh(ModificationFlags::ChangeMargin, LineStart(line), |
2456 | 0, 0, nullptr, line); |
2457 | NotifyModified(mh); |
2458 | } |
2459 | |
2460 | void Document::MarginSetStyle(Sci::Line line, int style) { |
2461 | Margins()->SetStyle(line, style); |
2462 | NotifyModified(DocModification(ModificationFlags::ChangeMargin, LineStart(line), |
2463 | 0, 0, nullptr, line)); |
2464 | } |
2465 | |
2466 | void Document::MarginSetStyles(Sci::Line line, const unsigned char *styles) { |
2467 | Margins()->SetStyles(line, styles); |
2468 | NotifyModified(DocModification(ModificationFlags::ChangeMargin, LineStart(line), |
2469 | 0, 0, nullptr, line)); |
2470 | } |
2471 | |
2472 | void Document::MarginClearAll() { |
2473 | const Sci::Line maxEditorLine = LinesTotal(); |
2474 | for (Sci::Line l=0; l<maxEditorLine; l++) |
2475 | MarginSetText(l, nullptr); |
2476 | // Free remaining data |
2477 | Margins()->ClearAll(); |
2478 | } |
2479 | |
2480 | StyledText Document::AnnotationStyledText(Sci::Line line) const noexcept { |
2481 | const LineAnnotation *pla = Annotations(); |
2482 | return StyledText(pla->Length(line), pla->Text(line), |
2483 | pla->MultipleStyles(line), pla->Style(line), pla->Styles(line)); |
2484 | } |
2485 | |
2486 | void Document::AnnotationSetText(Sci::Line line, const char *text) { |
2487 | if (line >= 0 && line < LinesTotal()) { |
2488 | const Sci::Line linesBefore = AnnotationLines(line); |
2489 | Annotations()->SetText(line, text); |
2490 | const int linesAfter = AnnotationLines(line); |
2491 | DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line), |
2492 | 0, 0, nullptr, line); |
2493 | mh.annotationLinesAdded = linesAfter - linesBefore; |
2494 | NotifyModified(mh); |
2495 | } |
2496 | } |
2497 | |
2498 | void Document::AnnotationSetStyle(Sci::Line line, int style) { |
2499 | if (line >= 0 && line < LinesTotal()) { |
2500 | Annotations()->SetStyle(line, style); |
2501 | const DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line), |
2502 | 0, 0, nullptr, line); |
2503 | NotifyModified(mh); |
2504 | } |
2505 | } |
2506 | |
2507 | void Document::AnnotationSetStyles(Sci::Line line, const unsigned char *styles) { |
2508 | if (line >= 0 && line < LinesTotal()) { |
2509 | Annotations()->SetStyles(line, styles); |
2510 | } |
2511 | } |
2512 | |
2513 | int Document::AnnotationLines(Sci::Line line) const noexcept { |
2514 | return Annotations()->Lines(line); |
2515 | } |
2516 | |
2517 | void Document::AnnotationClearAll() { |
2518 | const Sci::Line maxEditorLine = LinesTotal(); |
2519 | for (Sci::Line l=0; l<maxEditorLine; l++) |
2520 | AnnotationSetText(l, nullptr); |
2521 | // Free remaining data |
2522 | Annotations()->ClearAll(); |
2523 | } |
2524 | |
2525 | StyledText Document::EOLAnnotationStyledText(Sci::Line line) const noexcept { |
2526 | const LineAnnotation *pla = EOLAnnotations(); |
2527 | return StyledText(pla->Length(line), pla->Text(line), |
2528 | pla->MultipleStyles(line), pla->Style(line), pla->Styles(line)); |
2529 | } |
2530 | |
2531 | void Document::EOLAnnotationSetText(Sci::Line line, const char *text) { |
2532 | if (line >= 0 && line < LinesTotal()) { |
2533 | EOLAnnotations()->SetText(line, text); |
2534 | const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line), |
2535 | 0, 0, nullptr, line); |
2536 | NotifyModified(mh); |
2537 | } |
2538 | } |
2539 | |
2540 | void Document::EOLAnnotationSetStyle(Sci::Line line, int style) { |
2541 | if (line >= 0 && line < LinesTotal()) { |
2542 | EOLAnnotations()->SetStyle(line, style); |
2543 | const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line), |
2544 | 0, 0, nullptr, line); |
2545 | NotifyModified(mh); |
2546 | } |
2547 | } |
2548 | |
2549 | void Document::EOLAnnotationClearAll() { |
2550 | const Sci::Line maxEditorLine = LinesTotal(); |
2551 | for (Sci::Line l=0; l<maxEditorLine; l++) |
2552 | EOLAnnotationSetText(l, nullptr); |
2553 | // Free remaining data |
2554 | EOLAnnotations()->ClearAll(); |
2555 | } |
2556 | |
2557 | void Document::IncrementStyleClock() noexcept { |
2558 | styleClock = (styleClock + 1) % 0x100000; |
2559 | } |
2560 | |
2561 | void SCI_METHOD Document::DecorationSetCurrentIndicator(int indicator) { |
2562 | decorations->SetCurrentIndicator(indicator); |
2563 | } |
2564 | |
2565 | void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) { |
2566 | const FillResult<Sci::Position> fr = decorations->FillRange( |
2567 | position, value, fillLength); |
2568 | if (fr.changed) { |
2569 | const DocModification mh(ModificationFlags::ChangeIndicator | ModificationFlags::User, |
2570 | fr.position, fr.fillLength); |
2571 | NotifyModified(mh); |
2572 | } |
2573 | } |
2574 | |
2575 | bool Document::AddWatcher(DocWatcher *watcher, void *userData) { |
2576 | const WatcherWithUserData wwud(watcher, userData); |
2577 | std::vector<WatcherWithUserData>::iterator it = |
2578 | std::find(watchers.begin(), watchers.end(), wwud); |
2579 | if (it != watchers.end()) |
2580 | return false; |
2581 | watchers.push_back(wwud); |
2582 | return true; |
2583 | } |
2584 | |
2585 | bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) noexcept { |
2586 | try { |
2587 | // This can never fail as WatcherWithUserData constructor and == are noexcept |
2588 | // but std::find is not noexcept. |
2589 | std::vector<WatcherWithUserData>::iterator it = |
2590 | std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData)); |
2591 | if (it != watchers.end()) { |
2592 | watchers.erase(it); |
2593 | return true; |
2594 | } |
2595 | } catch (...) { |
2596 | // Ignore any exception |
2597 | } |
2598 | return false; |
2599 | } |
2600 | |
2601 | void Document::NotifyModifyAttempt() { |
2602 | for (const WatcherWithUserData &watcher : watchers) { |
2603 | watcher.watcher->NotifyModifyAttempt(this, watcher.userData); |
2604 | } |
2605 | } |
2606 | |
2607 | void Document::NotifySavePoint(bool atSavePoint) { |
2608 | for (const WatcherWithUserData &watcher : watchers) { |
2609 | watcher.watcher->NotifySavePoint(this, watcher.userData, atSavePoint); |
2610 | } |
2611 | } |
2612 | |
2613 | void Document::NotifyModified(DocModification mh) { |
2614 | if (FlagSet(mh.modificationType, ModificationFlags::InsertText)) { |
2615 | decorations->InsertSpace(mh.position, mh.length); |
2616 | } else if (FlagSet(mh.modificationType, ModificationFlags::DeleteText)) { |
2617 | decorations->DeleteRange(mh.position, mh.length); |
2618 | } |
2619 | for (const WatcherWithUserData &watcher : watchers) { |
2620 | watcher.watcher->NotifyModified(this, mh, watcher.userData); |
2621 | } |
2622 | } |
2623 | |
2624 | bool Document::IsWordPartSeparator(unsigned int ch) const { |
2625 | return (WordCharacterClass(ch) == CharacterClass::word) && IsPunctuation(ch); |
2626 | } |
2627 | |
2628 | Sci::Position Document::WordPartLeft(Sci::Position pos) const { |
2629 | if (pos > 0) { |
2630 | pos -= CharacterBefore(pos).widthBytes; |
2631 | CharacterExtracted ceStart = CharacterAfter(pos); |
2632 | if (IsWordPartSeparator(ceStart.character)) { |
2633 | while (pos > 0 && IsWordPartSeparator(CharacterAfter(pos).character)) { |
2634 | pos -= CharacterBefore(pos).widthBytes; |
2635 | } |
2636 | } |
2637 | if (pos > 0) { |
2638 | ceStart = CharacterAfter(pos); |
2639 | pos -= CharacterBefore(pos).widthBytes; |
2640 | if (IsLowerCase(ceStart.character)) { |
2641 | while (pos > 0 && IsLowerCase(CharacterAfter(pos).character)) |
2642 | pos -= CharacterBefore(pos).widthBytes; |
2643 | if (!IsUpperCase(CharacterAfter(pos).character) && !IsLowerCase(CharacterAfter(pos).character)) |
2644 | pos += CharacterAfter(pos).widthBytes; |
2645 | } else if (IsUpperCase(ceStart.character)) { |
2646 | while (pos > 0 && IsUpperCase(CharacterAfter(pos).character)) |
2647 | pos -= CharacterBefore(pos).widthBytes; |
2648 | if (!IsUpperCase(CharacterAfter(pos).character)) |
2649 | pos += CharacterAfter(pos).widthBytes; |
2650 | } else if (IsADigit(ceStart.character)) { |
2651 | while (pos > 0 && IsADigit(CharacterAfter(pos).character)) |
2652 | pos -= CharacterBefore(pos).widthBytes; |
2653 | if (!IsADigit(CharacterAfter(pos).character)) |
2654 | pos += CharacterAfter(pos).widthBytes; |
2655 | } else if (IsPunctuation(ceStart.character)) { |
2656 | while (pos > 0 && IsPunctuation(CharacterAfter(pos).character)) |
2657 | pos -= CharacterBefore(pos).widthBytes; |
2658 | if (!IsPunctuation(CharacterAfter(pos).character)) |
2659 | pos += CharacterAfter(pos).widthBytes; |
2660 | } else if (IsASpace(ceStart.character)) { |
2661 | while (pos > 0 && IsASpace(CharacterAfter(pos).character)) |
2662 | pos -= CharacterBefore(pos).widthBytes; |
2663 | if (!IsASpace(CharacterAfter(pos).character)) |
2664 | pos += CharacterAfter(pos).widthBytes; |
2665 | } else if (!IsASCII(ceStart.character)) { |
2666 | while (pos > 0 && !IsASCII(CharacterAfter(pos).character)) |
2667 | pos -= CharacterBefore(pos).widthBytes; |
2668 | if (IsASCII(CharacterAfter(pos).character)) |
2669 | pos += CharacterAfter(pos).widthBytes; |
2670 | } else { |
2671 | pos += CharacterAfter(pos).widthBytes; |
2672 | } |
2673 | } |
2674 | } |
2675 | return pos; |
2676 | } |
2677 | |
2678 | Sci::Position Document::WordPartRight(Sci::Position pos) const { |
2679 | CharacterExtracted ceStart = CharacterAfter(pos); |
2680 | const Sci::Position length = LengthNoExcept(); |
2681 | if (IsWordPartSeparator(ceStart.character)) { |
2682 | while (pos < length && IsWordPartSeparator(CharacterAfter(pos).character)) |
2683 | pos += CharacterAfter(pos).widthBytes; |
2684 | ceStart = CharacterAfter(pos); |
2685 | } |
2686 | if (!IsASCII(ceStart.character)) { |
2687 | while (pos < length && !IsASCII(CharacterAfter(pos).character)) |
2688 | pos += CharacterAfter(pos).widthBytes; |
2689 | } else if (IsLowerCase(ceStart.character)) { |
2690 | while (pos < length && IsLowerCase(CharacterAfter(pos).character)) |
2691 | pos += CharacterAfter(pos).widthBytes; |
2692 | } else if (IsUpperCase(ceStart.character)) { |
2693 | if (IsLowerCase(CharacterAfter(pos + ceStart.widthBytes).character)) { |
2694 | pos += CharacterAfter(pos).widthBytes; |
2695 | while (pos < length && IsLowerCase(CharacterAfter(pos).character)) |
2696 | pos += CharacterAfter(pos).widthBytes; |
2697 | } else { |
2698 | while (pos < length && IsUpperCase(CharacterAfter(pos).character)) |
2699 | pos += CharacterAfter(pos).widthBytes; |
2700 | } |
2701 | if (IsLowerCase(CharacterAfter(pos).character) && IsUpperCase(CharacterBefore(pos).character)) |
2702 | pos -= CharacterBefore(pos).widthBytes; |
2703 | } else if (IsADigit(ceStart.character)) { |
2704 | while (pos < length && IsADigit(CharacterAfter(pos).character)) |
2705 | pos += CharacterAfter(pos).widthBytes; |
2706 | } else if (IsPunctuation(ceStart.character)) { |
2707 | while (pos < length && IsPunctuation(CharacterAfter(pos).character)) |
2708 | pos += CharacterAfter(pos).widthBytes; |
2709 | } else if (IsASpace(ceStart.character)) { |
2710 | while (pos < length && IsASpace(CharacterAfter(pos).character)) |
2711 | pos += CharacterAfter(pos).widthBytes; |
2712 | } else { |
2713 | pos += CharacterAfter(pos).widthBytes; |
2714 | } |
2715 | return pos; |
2716 | } |
2717 | |
2718 | Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) noexcept { |
2719 | const char sStart = cb.StyleAt(pos); |
2720 | if (delta < 0) { |
2721 | while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos)))) |
2722 | pos--; |
2723 | pos++; |
2724 | } else { |
2725 | while (pos < (LengthNoExcept()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos)))) |
2726 | pos++; |
2727 | } |
2728 | return pos; |
2729 | } |
2730 | |
2731 | static char BraceOpposite(char ch) noexcept { |
2732 | switch (ch) { |
2733 | case '(': |
2734 | return ')'; |
2735 | case ')': |
2736 | return '('; |
2737 | case '[': |
2738 | return ']'; |
2739 | case ']': |
2740 | return '['; |
2741 | case '{': |
2742 | return '}'; |
2743 | case '}': |
2744 | return '{'; |
2745 | case '<': |
2746 | return '>'; |
2747 | case '>': |
2748 | return '<'; |
2749 | default: |
2750 | return '\0'; |
2751 | } |
2752 | } |
2753 | |
2754 | // TODO: should be able to extend styled region to find matching brace |
2755 | Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /*maxReStyle*/, Sci::Position startPos, bool useStartPos) noexcept { |
2756 | const char chBrace = CharAt(position); |
2757 | const char chSeek = BraceOpposite(chBrace); |
2758 | if (chSeek == '\0') |
2759 | return - 1; |
2760 | const int styBrace = StyleIndexAt(position); |
2761 | int direction = -1; |
2762 | if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<') |
2763 | direction = 1; |
2764 | int depth = 1; |
2765 | position = useStartPos ? startPos : NextPosition(position, direction); |
2766 | while ((position >= 0) && (position < LengthNoExcept())) { |
2767 | const char chAtPos = CharAt(position); |
2768 | const int styAtPos = StyleIndexAt(position); |
2769 | if ((position > GetEndStyled()) || (styAtPos == styBrace)) { |
2770 | if (chAtPos == chBrace) |
2771 | depth++; |
2772 | if (chAtPos == chSeek) |
2773 | depth--; |
2774 | if (depth == 0) |
2775 | return position; |
2776 | } |
2777 | const Sci::Position positionBeforeMove = position; |
2778 | position = NextPosition(position, direction); |
2779 | if (position == positionBeforeMove) |
2780 | break; |
2781 | } |
2782 | return - 1; |
2783 | } |
2784 | |
2785 | /** |
2786 | * Implementation of RegexSearchBase for the default built-in regular expression engine |
2787 | */ |
2788 | class BuiltinRegex : public RegexSearchBase { |
2789 | public: |
2790 | explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {} |
2791 | |
2792 | Sci::Position FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s, |
2793 | bool caseSensitive, bool word, bool wordStart, FindOption flags, |
2794 | Sci::Position *length) override; |
2795 | |
2796 | const char *SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) override; |
2797 | |
2798 | private: |
2799 | RESearch search; |
2800 | std::string substituted; |
2801 | }; |
2802 | |
2803 | namespace { |
2804 | |
2805 | /** |
2806 | * RESearchRange keeps track of search range. |
2807 | */ |
2808 | class RESearchRange { |
2809 | public: |
2810 | const Document *doc; |
2811 | int increment; |
2812 | Sci::Position startPos; |
2813 | Sci::Position endPos; |
2814 | Sci::Line lineRangeStart; |
2815 | Sci::Line lineRangeEnd; |
2816 | Sci::Line lineRangeBreak; |
2817 | RESearchRange(const Document *doc_, Sci::Position minPos, Sci::Position maxPos) noexcept : doc(doc_) { |
2818 | increment = (minPos <= maxPos) ? 1 : -1; |
2819 | |
2820 | // Range endpoints should not be inside DBCS characters or between a CR and LF, |
2821 | // but just in case, move them. |
2822 | startPos = doc->MovePositionOutsideChar(minPos, 1, true); |
2823 | endPos = doc->MovePositionOutsideChar(maxPos, 1, true); |
2824 | |
2825 | lineRangeStart = doc->SciLineFromPosition(startPos); |
2826 | lineRangeEnd = doc->SciLineFromPosition(endPos); |
2827 | lineRangeBreak = lineRangeEnd + increment; |
2828 | } |
2829 | Range LineRange(Sci::Line line) const { |
2830 | Range range(doc->LineStart(line), doc->LineEnd(line)); |
2831 | if (increment == 1) { |
2832 | if (line == lineRangeStart) |
2833 | range.start = startPos; |
2834 | if (line == lineRangeEnd) |
2835 | range.end = endPos; |
2836 | } else { |
2837 | if (line == lineRangeEnd) |
2838 | range.start = endPos; |
2839 | if (line == lineRangeStart) |
2840 | range.end = startPos; |
2841 | } |
2842 | return range; |
2843 | } |
2844 | }; |
2845 | |
2846 | // Define a way for the Regular Expression code to access the document |
2847 | class DocumentIndexer : public CharacterIndexer { |
2848 | Document *pdoc; |
2849 | Sci::Position end; |
2850 | public: |
2851 | DocumentIndexer(Document *pdoc_, Sci::Position end_) noexcept : |
2852 | pdoc(pdoc_), end(end_) { |
2853 | } |
2854 | |
2855 | DocumentIndexer(const DocumentIndexer &) = delete; |
2856 | DocumentIndexer(DocumentIndexer &&) = delete; |
2857 | DocumentIndexer &operator=(const DocumentIndexer &) = delete; |
2858 | DocumentIndexer &operator=(DocumentIndexer &&) = delete; |
2859 | |
2860 | ~DocumentIndexer() override = default; |
2861 | |
2862 | char CharAt(Sci::Position index) const noexcept override { |
2863 | if (index < 0 || index >= end) |
2864 | return 0; |
2865 | else |
2866 | return pdoc->CharAt(index); |
2867 | } |
2868 | }; |
2869 | |
2870 | #ifndef NO_CXX11_REGEX |
2871 | |
2872 | class ByteIterator { |
2873 | public: |
2874 | using iterator_category = std::bidirectional_iterator_tag; |
2875 | using value_type = char; |
2876 | using difference_type = ptrdiff_t; |
2877 | using pointer = char*; |
2878 | using reference = char&; |
2879 | |
2880 | const Document *doc; |
2881 | Sci::Position position; |
2882 | |
2883 | explicit ByteIterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept : |
2884 | doc(doc_), position(position_) { |
2885 | } |
2886 | char operator*() const noexcept { |
2887 | return doc->CharAt(position); |
2888 | } |
2889 | ByteIterator &operator++() noexcept { |
2890 | position++; |
2891 | return *this; |
2892 | } |
2893 | ByteIterator operator++(int) noexcept { |
2894 | ByteIterator retVal(*this); |
2895 | position++; |
2896 | return retVal; |
2897 | } |
2898 | ByteIterator &operator--() noexcept { |
2899 | position--; |
2900 | return *this; |
2901 | } |
2902 | bool operator==(const ByteIterator &other) const noexcept { |
2903 | return doc == other.doc && position == other.position; |
2904 | } |
2905 | bool operator!=(const ByteIterator &other) const noexcept { |
2906 | return doc != other.doc || position != other.position; |
2907 | } |
2908 | Sci::Position Pos() const noexcept { |
2909 | return position; |
2910 | } |
2911 | Sci::Position PosRoundUp() const noexcept { |
2912 | return position; |
2913 | } |
2914 | }; |
2915 | |
2916 | // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide. |
2917 | // Would be better to use sizeof(wchar_t) or similar to differentiate |
2918 | // but easier for now to hard-code platforms. |
2919 | // C++11 has char16_t and char32_t but neither Clang nor Visual C++ |
2920 | // appear to allow specializing basic_regex over these. |
2921 | |
2922 | #ifdef _WIN32 |
2923 | #define WCHAR_T_IS_16 1 |
2924 | #else |
2925 | #define WCHAR_T_IS_16 0 |
2926 | #endif |
2927 | |
2928 | #if WCHAR_T_IS_16 |
2929 | |
2930 | // On Windows, report non-BMP characters as 2 separate surrogates as that |
2931 | // matches wregex since it is based on wchar_t. |
2932 | class UTF8Iterator { |
2933 | // These 3 fields determine the iterator position and are used for comparisons |
2934 | const Document *doc; |
2935 | Sci::Position position; |
2936 | size_t characterIndex; |
2937 | // Remaining fields are derived from the determining fields so are excluded in comparisons |
2938 | unsigned int lenBytes; |
2939 | size_t lenCharacters; |
2940 | wchar_t buffered[2]; |
2941 | public: |
2942 | using iterator_category = std::bidirectional_iterator_tag; |
2943 | using value_type = wchar_t; |
2944 | using difference_type = ptrdiff_t; |
2945 | using pointer = wchar_t*; |
2946 | using reference = wchar_t&; |
2947 | |
2948 | explicit UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept : |
2949 | doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0), buffered{} { |
2950 | buffered[0] = 0; |
2951 | buffered[1] = 0; |
2952 | if (doc) { |
2953 | ReadCharacter(); |
2954 | } |
2955 | } |
2956 | wchar_t operator*() const noexcept { |
2957 | assert(lenCharacters != 0); |
2958 | return buffered[characterIndex]; |
2959 | } |
2960 | UTF8Iterator &operator++() noexcept { |
2961 | if ((characterIndex + 1) < (lenCharacters)) { |
2962 | characterIndex++; |
2963 | } else { |
2964 | position += lenBytes; |
2965 | ReadCharacter(); |
2966 | characterIndex = 0; |
2967 | } |
2968 | return *this; |
2969 | } |
2970 | UTF8Iterator operator++(int) noexcept { |
2971 | UTF8Iterator retVal(*this); |
2972 | if ((characterIndex + 1) < (lenCharacters)) { |
2973 | characterIndex++; |
2974 | } else { |
2975 | position += lenBytes; |
2976 | ReadCharacter(); |
2977 | characterIndex = 0; |
2978 | } |
2979 | return retVal; |
2980 | } |
2981 | UTF8Iterator &operator--() noexcept { |
2982 | if (characterIndex) { |
2983 | characterIndex--; |
2984 | } else { |
2985 | position = doc->NextPosition(position, -1); |
2986 | ReadCharacter(); |
2987 | characterIndex = lenCharacters - 1; |
2988 | } |
2989 | return *this; |
2990 | } |
2991 | bool operator==(const UTF8Iterator &other) const noexcept { |
2992 | // Only test the determining fields, not the character widths and values derived from this |
2993 | return doc == other.doc && |
2994 | position == other.position && |
2995 | characterIndex == other.characterIndex; |
2996 | } |
2997 | bool operator!=(const UTF8Iterator &other) const noexcept { |
2998 | // Only test the determining fields, not the character widths and values derived from this |
2999 | return doc != other.doc || |
3000 | position != other.position || |
3001 | characterIndex != other.characterIndex; |
3002 | } |
3003 | Sci::Position Pos() const noexcept { |
3004 | return position; |
3005 | } |
3006 | Sci::Position PosRoundUp() const noexcept { |
3007 | if (characterIndex) |
3008 | return position + lenBytes; // Force to end of character |
3009 | else |
3010 | return position; |
3011 | } |
3012 | private: |
3013 | void ReadCharacter() noexcept { |
3014 | const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position); |
3015 | lenBytes = charExtracted.widthBytes; |
3016 | if (charExtracted.character == unicodeReplacementChar) { |
3017 | lenCharacters = 1; |
3018 | buffered[0] = static_cast<wchar_t>(charExtracted.character); |
3019 | } else { |
3020 | lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered); |
3021 | } |
3022 | } |
3023 | }; |
3024 | |
3025 | #else |
3026 | |
3027 | // On Unix, report non-BMP characters as single characters |
3028 | |
3029 | class UTF8Iterator { |
3030 | const Document *doc; |
3031 | Sci::Position position; |
3032 | public: |
3033 | using iterator_category = std::bidirectional_iterator_tag; |
3034 | using value_type = wchar_t; |
3035 | using difference_type = ptrdiff_t; |
3036 | using pointer = wchar_t*; |
3037 | using reference = wchar_t&; |
3038 | |
3039 | explicit UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept : |
3040 | doc(doc_), position(position_) { |
3041 | } |
3042 | wchar_t operator*() const noexcept { |
3043 | const Document::CharacterExtracted = doc->ExtractCharacter(position); |
3044 | return charExtracted.character; |
3045 | } |
3046 | UTF8Iterator &operator++() noexcept { |
3047 | position = doc->NextPosition(position, 1); |
3048 | return *this; |
3049 | } |
3050 | UTF8Iterator operator++(int) noexcept { |
3051 | UTF8Iterator retVal(*this); |
3052 | position = doc->NextPosition(position, 1); |
3053 | return retVal; |
3054 | } |
3055 | UTF8Iterator &operator--() noexcept { |
3056 | position = doc->NextPosition(position, -1); |
3057 | return *this; |
3058 | } |
3059 | bool operator==(const UTF8Iterator &other) const noexcept { |
3060 | return doc == other.doc && position == other.position; |
3061 | } |
3062 | bool operator!=(const UTF8Iterator &other) const noexcept { |
3063 | return doc != other.doc || position != other.position; |
3064 | } |
3065 | Sci::Position Pos() const noexcept { |
3066 | return position; |
3067 | } |
3068 | Sci::Position PosRoundUp() const noexcept { |
3069 | return position; |
3070 | } |
3071 | }; |
3072 | |
3073 | #endif |
3074 | |
3075 | std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos) { |
3076 | std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default; |
3077 | if (!doc->IsLineStartPosition(startPos)) |
3078 | flagsMatch |= std::regex_constants::match_not_bol; |
3079 | if (!doc->IsLineEndPosition(endPos)) |
3080 | flagsMatch |= std::regex_constants::match_not_eol; |
3081 | return flagsMatch; |
3082 | } |
3083 | |
3084 | template<typename Iterator, typename Regex> |
3085 | bool MatchOnLines(const Document *doc, const Regex ®exp, const RESearchRange &resr, RESearch &search) { |
3086 | std::match_results<Iterator> match; |
3087 | |
3088 | // MSVC and libc++ have problems with ^ and $ matching line ends inside a range. |
3089 | // CRLF line ends are also a problem as ^ and $ only treat LF as a line end. |
3090 | // The std::regex::multiline option was added to C++17 to improve behaviour but |
3091 | // has not been implemented by compiler runtimes with MSVC always in multiline |
3092 | // mode and libc++ and libstdc++ always in single-line mode. |
3093 | // If multiline regex worked well then the line by line iteration could be removed |
3094 | // for the forwards case and replaced with the following 4 lines: |
3095 | #ifdef REGEX_MULTILINE |
3096 | Iterator itStart(doc, resr.startPos); |
3097 | Iterator itEnd(doc, resr.endPos); |
3098 | const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos); |
3099 | const bool matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch); |
3100 | #else |
3101 | // Line by line. |
3102 | bool matched = false; |
3103 | for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) { |
3104 | const Range lineRange = resr.LineRange(line); |
3105 | Iterator itStart(doc, lineRange.start); |
3106 | Iterator itEnd(doc, lineRange.end); |
3107 | std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end); |
3108 | matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch); |
3109 | // Check for the last match on this line. |
3110 | if (matched) { |
3111 | if (resr.increment == -1) { |
3112 | while (matched) { |
3113 | Iterator itNext(doc, match[0].second.PosRoundUp()); |
3114 | flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end); |
3115 | std::match_results<Iterator> matchNext; |
3116 | matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch); |
3117 | if (matched) { |
3118 | if (match[0].first == match[0].second) { |
3119 | // Empty match means failure so exit |
3120 | return false; |
3121 | } |
3122 | match = matchNext; |
3123 | } |
3124 | } |
3125 | matched = true; |
3126 | } |
3127 | break; |
3128 | } |
3129 | } |
3130 | #endif |
3131 | if (matched) { |
3132 | for (size_t co = 0; co < match.size() && co < RESearch::MAXTAG; co++) { |
3133 | search.bopat[co] = match[co].first.Pos(); |
3134 | search.eopat[co] = match[co].second.PosRoundUp(); |
3135 | const Sci::Position lenMatch = search.eopat[co] - search.bopat[co]; |
3136 | search.pat[co].resize(lenMatch); |
3137 | for (Sci::Position iPos = 0; iPos < lenMatch; iPos++) { |
3138 | search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]); |
3139 | } |
3140 | } |
3141 | } |
3142 | return matched; |
3143 | } |
3144 | |
3145 | Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s, |
3146 | bool caseSensitive, Sci::Position *length, RESearch &search) { |
3147 | const RESearchRange resr(doc, minPos, maxPos); |
3148 | try { |
3149 | //ElapsedPeriod ep; |
3150 | std::regex::flag_type flagsRe = std::regex::ECMAScript; |
3151 | // Flags that appear to have no effect: |
3152 | // | std::regex::collate | std::regex::extended; |
3153 | if (!caseSensitive) |
3154 | flagsRe = flagsRe | std::regex::icase; |
3155 | |
3156 | // Clear the RESearch so can fill in matches |
3157 | search.Clear(); |
3158 | |
3159 | bool matched = false; |
3160 | if (CpUtf8 == doc->dbcsCodePage) { |
3161 | const std::wstring ws = WStringFromUTF8(s); |
3162 | std::wregex regexp; |
3163 | regexp.assign(ws, flagsRe); |
3164 | matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search); |
3165 | |
3166 | } else { |
3167 | std::regex regexp; |
3168 | regexp.assign(s, flagsRe); |
3169 | matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search); |
3170 | } |
3171 | |
3172 | Sci::Position posMatch = -1; |
3173 | if (matched) { |
3174 | posMatch = search.bopat[0]; |
3175 | *length = search.eopat[0] - search.bopat[0]; |
3176 | } |
3177 | // Example - search in doc/ScintillaHistory.html for |
3178 | // [[:upper:]]eta[[:space:]] |
3179 | // On MacBook, normally around 1 second but with locale imbued -> 14 seconds. |
3180 | //const double durSearch = ep.Duration(true); |
3181 | //Platform::DebugPrintf("Search:%9.6g \n", durSearch); |
3182 | return posMatch; |
3183 | } catch (std::regex_error &) { |
3184 | // Failed to create regular expression |
3185 | throw RegexError(); |
3186 | } catch (...) { |
3187 | // Failed in some other way |
3188 | return -1; |
3189 | } |
3190 | } |
3191 | |
3192 | #endif |
3193 | |
3194 | } |
3195 | |
3196 | Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s, |
3197 | bool caseSensitive, bool, bool, FindOption flags, |
3198 | Sci::Position *length) { |
3199 | |
3200 | #ifndef NO_CXX11_REGEX |
3201 | if (FlagSet(flags, FindOption::Cxx11RegEx)) { |
3202 | return Cxx11RegexFindText(doc, minPos, maxPos, s, |
3203 | caseSensitive, length, search); |
3204 | } |
3205 | #endif |
3206 | |
3207 | const RESearchRange resr(doc, minPos, maxPos); |
3208 | |
3209 | const bool posix = FlagSet(flags, FindOption::Posix); |
3210 | |
3211 | const char *errmsg = search.Compile(s, *length, caseSensitive, posix); |
3212 | if (errmsg) { |
3213 | return -1; |
3214 | } |
3215 | // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\)) |
3216 | // Replace first '.' with '-' in each property file variable reference: |
3217 | // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\)) |
3218 | // Replace: $(\1-\2) |
3219 | Sci::Position pos = -1; |
3220 | Sci::Position lenRet = 0; |
3221 | const bool searchforLineStart = s[0] == '^'; |
3222 | const char searchEnd = s[*length - 1]; |
3223 | const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0'; |
3224 | const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\'); |
3225 | for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) { |
3226 | Sci::Position startOfLine = doc->LineStart(line); |
3227 | Sci::Position endOfLine = doc->LineEnd(line); |
3228 | if (resr.increment == 1) { |
3229 | if (line == resr.lineRangeStart) { |
3230 | if ((resr.startPos != startOfLine) && searchforLineStart) |
3231 | continue; // Can't match start of line if start position after start of line |
3232 | startOfLine = resr.startPos; |
3233 | } |
3234 | if (line == resr.lineRangeEnd) { |
3235 | if ((resr.endPos != endOfLine) && searchforLineEnd) |
3236 | continue; // Can't match end of line if end position before end of line |
3237 | endOfLine = resr.endPos; |
3238 | } |
3239 | } else { |
3240 | if (line == resr.lineRangeEnd) { |
3241 | if ((resr.endPos != startOfLine) && searchforLineStart) |
3242 | continue; // Can't match start of line if end position after start of line |
3243 | startOfLine = resr.endPos; |
3244 | } |
3245 | if (line == resr.lineRangeStart) { |
3246 | if ((resr.startPos != endOfLine) && searchforLineEnd) |
3247 | continue; // Can't match end of line if start position before end of line |
3248 | endOfLine = resr.startPos; |
3249 | } |
3250 | } |
3251 | |
3252 | const DocumentIndexer di(doc, endOfLine); |
3253 | int success = search.Execute(di, startOfLine, endOfLine); |
3254 | if (success) { |
3255 | pos = search.bopat[0]; |
3256 | // Ensure only whole characters selected |
3257 | search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false); |
3258 | lenRet = search.eopat[0] - search.bopat[0]; |
3259 | // There can be only one start of a line, so no need to look for last match in line |
3260 | if ((resr.increment == -1) && !searchforLineStart) { |
3261 | // Check for the last match on this line. |
3262 | int repetitions = 1000; // Break out of infinite loop |
3263 | while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) { |
3264 | success = search.Execute(di, pos+1, endOfLine); |
3265 | if (success) { |
3266 | if (search.eopat[0] <= minPos) { |
3267 | pos = search.bopat[0]; |
3268 | lenRet = search.eopat[0] - search.bopat[0]; |
3269 | } else { |
3270 | success = 0; |
3271 | } |
3272 | } |
3273 | } |
3274 | } |
3275 | break; |
3276 | } |
3277 | } |
3278 | *length = lenRet; |
3279 | return pos; |
3280 | } |
3281 | |
3282 | const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) { |
3283 | substituted.clear(); |
3284 | const DocumentIndexer di(doc, doc->Length()); |
3285 | search.GrabMatches(di); |
3286 | for (Sci::Position j = 0; j < *length; j++) { |
3287 | if (text[j] == '\\') { |
3288 | if (text[j + 1] >= '0' && text[j + 1] <= '9') { |
3289 | const unsigned int patNum = text[j + 1] - '0'; |
3290 | const Sci::Position len = search.eopat[patNum] - search.bopat[patNum]; |
3291 | if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur |
3292 | substituted.append(search.pat[patNum].c_str(), len); |
3293 | j++; |
3294 | } else { |
3295 | j++; |
3296 | switch (text[j]) { |
3297 | case 'a': |
3298 | substituted.push_back('\a'); |
3299 | break; |
3300 | case 'b': |
3301 | substituted.push_back('\b'); |
3302 | break; |
3303 | case 'f': |
3304 | substituted.push_back('\f'); |
3305 | break; |
3306 | case 'n': |
3307 | substituted.push_back('\n'); |
3308 | break; |
3309 | case 'r': |
3310 | substituted.push_back('\r'); |
3311 | break; |
3312 | case 't': |
3313 | substituted.push_back('\t'); |
3314 | break; |
3315 | case 'v': |
3316 | substituted.push_back('\v'); |
3317 | break; |
3318 | case '\\': |
3319 | substituted.push_back('\\'); |
3320 | break; |
3321 | default: |
3322 | substituted.push_back('\\'); |
3323 | j--; |
3324 | } |
3325 | } |
3326 | } else { |
3327 | substituted.push_back(text[j]); |
3328 | } |
3329 | } |
3330 | *length = substituted.length(); |
3331 | return substituted.c_str(); |
3332 | } |
3333 | |
3334 | #ifndef SCI_OWNREGEX |
3335 | |
3336 | RegexSearchBase *Scintilla::Internal::CreateRegexSearch(CharClassify *charClassTable) { |
3337 | return new BuiltinRegex(charClassTable); |
3338 | } |
3339 | |
3340 | #endif |
3341 | |