1// Scintilla source code edit control
2/** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5// Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6// The License.txt file describes the conditions under which this software may be distributed.
7
8#include <cstddef>
9#include <cstdlib>
10#include <cassert>
11#include <cstring>
12#include <cstdio>
13#include <cmath>
14
15#include <stdexcept>
16#include <string>
17#include <string_view>
18#include <vector>
19#include <forward_list>
20#include <optional>
21#include <algorithm>
22#include <memory>
23#include <chrono>
24
25#ifndef NO_CXX11_REGEX
26#include <regex>
27#endif
28
29#include "ScintillaTypes.h"
30#include "ILoader.h"
31#include "ILexer.h"
32
33#include "Debugging.h"
34
35#include "CharacterType.h"
36#include "CharacterCategoryMap.h"
37#include "Position.h"
38#include "SplitVector.h"
39#include "Partitioning.h"
40#include "RunStyles.h"
41#include "CellBuffer.h"
42#include "PerLine.h"
43#include "CharClassify.h"
44#include "Decoration.h"
45#include "CaseFolder.h"
46#include "Document.h"
47#include "RESearch.h"
48#include "UniConversion.h"
49#include "ElapsedPeriod.h"
50
51using namespace Scintilla;
52using namespace Scintilla::Internal;
53
54LexInterface::LexInterface(Document *pdoc_) noexcept : pdoc(pdoc_), performingStyle(false) {
55}
56
57LexInterface::~LexInterface() noexcept = default;
58
59void LexInterface::SetInstance(ILexer5 *instance_) {
60 instance.reset(instance_);
61 pdoc->LexerChanged();
62}
63
64void LexInterface::Colourise(Sci::Position start, Sci::Position end) {
65 if (pdoc && instance && !performingStyle) {
66 // Protect against reentrance, which may occur, for example, when
67 // fold points are discovered while performing styling and the folding
68 // code looks for child lines which may trigger styling.
69 performingStyle = true;
70
71 const Sci::Position lengthDoc = pdoc->Length();
72 if (end == -1)
73 end = lengthDoc;
74 const Sci::Position len = end - start;
75
76 PLATFORM_ASSERT(len >= 0);
77 PLATFORM_ASSERT(start + len <= lengthDoc);
78
79 int styleStart = 0;
80 if (start > 0)
81 styleStart = pdoc->StyleAt(start - 1);
82
83 if (len > 0) {
84 instance->Lex(start, len, styleStart, pdoc);
85 instance->Fold(start, len, styleStart, pdoc);
86 }
87
88 performingStyle = false;
89 }
90}
91
92LineEndType LexInterface::LineEndTypesSupported() {
93 if (instance) {
94 return static_cast<LineEndType>(instance->LineEndTypesSupported());
95 }
96 return LineEndType::Default;
97}
98
99bool LexInterface::UseContainerLexing() const noexcept {
100 return !instance;
101}
102
103ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept :
104 duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) {
105}
106
107void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept {
108 // Only adjust for multiple actions to avoid instability
109 if (numberActions < 8)
110 return;
111
112 // Alpha value for exponential smoothing.
113 // Most recent value contributes 25% to smoothed value.
114 constexpr double alpha = 0.25;
115
116 const double durationOne = durationOfActions / numberActions;
117 duration = std::clamp(alpha * durationOne + (1.0 - alpha) * duration,
118 minDuration, maxDuration);
119}
120
121double ActionDuration::Duration() const noexcept {
122 return duration;
123}
124
125size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcept {
126 return std::lround(secondsAllowed / Duration());
127}
128
129Document::Document(DocumentOption options) :
130 cb(!FlagSet(options, DocumentOption::StylesNone), FlagSet(options, DocumentOption::TextLarge)),
131 durationStyleOneByte(0.000001, 0.0000001, 0.00001) {
132 refCount = 0;
133#ifdef _WIN32
134 eolMode = EndOfLine::CrLf;
135#else
136 eolMode = EndOfLine::Lf;
137#endif
138 dbcsCodePage = CpUtf8;
139 lineEndBitSet = LineEndType::Default;
140 endStyled = 0;
141 styleClock = 0;
142 enteredModification = 0;
143 enteredStyling = 0;
144 enteredReadOnlyCount = 0;
145 insertionSet = false;
146 tabInChars = 8;
147 indentInChars = 0;
148 actualIndentInChars = 8;
149 useTabs = true;
150 tabIndents = true;
151 backspaceUnindents = false;
152
153 matchesValid = false;
154
155 perLineData[ldMarkers] = std::make_unique<LineMarkers>();
156 perLineData[ldLevels] = std::make_unique<LineLevels>();
157 perLineData[ldState] = std::make_unique<LineState>();
158 perLineData[ldMargin] = std::make_unique<LineAnnotation>();
159 perLineData[ldAnnotation] = std::make_unique<LineAnnotation>();
160 perLineData[ldEOLAnnotation] = std::make_unique<LineAnnotation>();
161
162 decorations = DecorationListCreate(IsLarge());
163
164 cb.SetPerLine(this);
165 cb.SetUTF8Substance(CpUtf8 == dbcsCodePage);
166}
167
168Document::~Document() {
169 for (const WatcherWithUserData &watcher : watchers) {
170 watcher.watcher->NotifyDeleted(this, watcher.userData);
171 }
172}
173
174// Increase reference count and return its previous value.
175int Document::AddRef() {
176 return refCount++;
177}
178
179// Decrease reference count and return its previous value.
180// Delete the document if reference count reaches zero.
181int SCI_METHOD Document::Release() {
182 const int curRefCount = --refCount;
183 if (curRefCount == 0)
184 delete this;
185 return curRefCount;
186}
187
188void Document::Init() {
189 for (const std::unique_ptr<PerLine> &pl : perLineData) {
190 if (pl)
191 pl->Init();
192 }
193}
194
195void Document::InsertLine(Sci::Line line) {
196 for (const std::unique_ptr<PerLine> &pl : perLineData) {
197 if (pl)
198 pl->InsertLine(line);
199 }
200}
201
202void Document::InsertLines(Sci::Line line, Sci::Line lines) {
203 for (const auto &pl : perLineData) {
204 if (pl)
205 pl->InsertLines(line, lines);
206 }
207}
208
209void Document::RemoveLine(Sci::Line line) {
210 for (const std::unique_ptr<PerLine> &pl : perLineData) {
211 if (pl)
212 pl->RemoveLine(line);
213 }
214}
215
216LineMarkers *Document::Markers() const noexcept {
217 return dynamic_cast<LineMarkers *>(perLineData[ldMarkers].get());
218}
219
220LineLevels *Document::Levels() const noexcept {
221 return dynamic_cast<LineLevels *>(perLineData[ldLevels].get());
222}
223
224LineState *Document::States() const noexcept {
225 return dynamic_cast<LineState *>(perLineData[ldState].get());
226}
227
228LineAnnotation *Document::Margins() const noexcept {
229 return dynamic_cast<LineAnnotation *>(perLineData[ldMargin].get());
230}
231
232LineAnnotation *Document::Annotations() const noexcept {
233 return dynamic_cast<LineAnnotation *>(perLineData[ldAnnotation].get());
234}
235
236LineAnnotation *Document::EOLAnnotations() const noexcept {
237 return dynamic_cast<LineAnnotation *>(perLineData[ldEOLAnnotation].get());
238}
239
240LineEndType Document::LineEndTypesSupported() const {
241 if ((CpUtf8 == dbcsCodePage) && pli)
242 return pli->LineEndTypesSupported();
243 else
244 return LineEndType::Default;
245}
246
247bool Document::SetDBCSCodePage(int dbcsCodePage_) {
248 if (dbcsCodePage != dbcsCodePage_) {
249 dbcsCodePage = dbcsCodePage_;
250 SetCaseFolder(nullptr);
251 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
252 cb.SetUTF8Substance(CpUtf8 == dbcsCodePage);
253 ModifiedAt(0); // Need to restyle whole document
254 return true;
255 } else {
256 return false;
257 }
258}
259
260bool Document::SetLineEndTypesAllowed(LineEndType lineEndBitSet_) {
261 if (lineEndBitSet != lineEndBitSet_) {
262 lineEndBitSet = lineEndBitSet_;
263 const LineEndType lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
264 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
265 ModifiedAt(0);
266 cb.SetLineEndTypes(lineEndBitSetActive);
267 return true;
268 } else {
269 return false;
270 }
271 } else {
272 return false;
273 }
274}
275
276void Document::SetSavePoint() {
277 cb.SetSavePoint();
278 NotifySavePoint(true);
279}
280
281void Document::TentativeUndo() {
282 if (!TentativeActive())
283 return;
284 CheckReadOnly();
285 if (enteredModification == 0) {
286 enteredModification++;
287 if (!cb.IsReadOnly()) {
288 const bool startSavePoint = cb.IsSavePoint();
289 bool multiLine = false;
290 const int steps = cb.TentativeSteps();
291 //Platform::DebugPrintf("Steps=%d\n", steps);
292 for (int step = 0; step < steps; step++) {
293 const Sci::Line prevLinesTotal = LinesTotal();
294 const Action &action = cb.GetUndoStep();
295 if (action.at == ActionType::remove) {
296 NotifyModified(DocModification(
297 ModificationFlags::BeforeInsert | ModificationFlags::Undo, action));
298 } else if (action.at == ActionType::container) {
299 DocModification dm(ModificationFlags::Container | ModificationFlags::Undo);
300 dm.token = action.position;
301 NotifyModified(dm);
302 } else {
303 NotifyModified(DocModification(
304 ModificationFlags::BeforeDelete | ModificationFlags::Undo, action));
305 }
306 cb.PerformUndoStep();
307 if (action.at != ActionType::container) {
308 ModifiedAt(action.position);
309 }
310
311 ModificationFlags modFlags = ModificationFlags::Undo;
312 // With undo, an insertion action becomes a deletion notification
313 if (action.at == ActionType::remove) {
314 modFlags |= ModificationFlags::InsertText;
315 } else if (action.at == ActionType::insert) {
316 modFlags |= ModificationFlags::DeleteText;
317 }
318 if (steps > 1)
319 modFlags |= ModificationFlags::MultiStepUndoRedo;
320 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
321 if (linesAdded != 0)
322 multiLine = true;
323 if (step == steps - 1) {
324 modFlags |= ModificationFlags::LastStepInUndoRedo;
325 if (multiLine)
326 modFlags |= ModificationFlags::MultilineUndoRedo;
327 }
328 NotifyModified(DocModification(modFlags, action.position, action.lenData,
329 linesAdded, action.data.get()));
330 }
331
332 const bool endSavePoint = cb.IsSavePoint();
333 if (startSavePoint != endSavePoint)
334 NotifySavePoint(endSavePoint);
335
336 cb.TentativeCommit();
337 }
338 enteredModification--;
339 }
340}
341
342int Document::GetMark(Sci::Line line) const noexcept {
343 return Markers()->MarkValue(line);
344}
345
346Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept {
347 return Markers()->MarkerNext(lineStart, mask);
348}
349
350int Document::AddMark(Sci::Line line, int markerNum) {
351 if (line >= 0 && line <= LinesTotal()) {
352 const int prev = Markers()->AddMark(line, markerNum, LinesTotal());
353 const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line);
354 NotifyModified(mh);
355 return prev;
356 } else {
357 return -1;
358 }
359}
360
361void Document::AddMarkSet(Sci::Line line, int valueSet) {
362 if (line < 0 || line > LinesTotal()) {
363 return;
364 }
365 unsigned int m = valueSet;
366 for (int i = 0; m; i++, m >>= 1) {
367 if (m & 1)
368 Markers()->AddMark(line, i, LinesTotal());
369 }
370 const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line);
371 NotifyModified(mh);
372}
373
374void Document::DeleteMark(Sci::Line line, int markerNum) {
375 Markers()->DeleteMark(line, markerNum, false);
376 const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line);
377 NotifyModified(mh);
378}
379
380void Document::DeleteMarkFromHandle(int markerHandle) {
381 Markers()->DeleteMarkFromHandle(markerHandle);
382 DocModification mh(ModificationFlags::ChangeMarker);
383 mh.line = -1;
384 NotifyModified(mh);
385}
386
387void Document::DeleteAllMarks(int markerNum) {
388 bool someChanges = false;
389 for (Sci::Line line = 0; line < LinesTotal(); line++) {
390 if (Markers()->DeleteMark(line, markerNum, true))
391 someChanges = true;
392 }
393 if (someChanges) {
394 DocModification mh(ModificationFlags::ChangeMarker);
395 mh.line = -1;
396 NotifyModified(mh);
397 }
398}
399
400Sci::Line Document::LineFromHandle(int markerHandle) const noexcept {
401 return Markers()->LineFromHandle(markerHandle);
402}
403
404int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept {
405 return Markers()->NumberFromLine(line, which);
406}
407
408int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept {
409 return Markers()->HandleFromLine(line, which);
410}
411
412Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const {
413 return cb.LineStart(line);
414}
415
416bool Document::IsLineStartPosition(Sci::Position position) const {
417 return LineStart(LineFromPosition(position)) == position;
418}
419
420Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const {
421 if (line >= LinesTotal() - 1) {
422 return LineStart(line + 1);
423 } else {
424 Sci::Position position = LineStart(line + 1);
425 if (LineEndType::Unicode == cb.GetLineEndTypes()) {
426 const unsigned char bytes[] = {
427 cb.UCharAt(position-3),
428 cb.UCharAt(position-2),
429 cb.UCharAt(position-1),
430 };
431 if (UTF8IsSeparator(bytes)) {
432 return position - UTF8SeparatorLength;
433 }
434 if (UTF8IsNEL(bytes+1)) {
435 return position - UTF8NELLength;
436 }
437 }
438 position--; // Back over CR or LF
439 // When line terminator is CR+LF, may need to go back one more
440 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
441 position--;
442 }
443 return position;
444 }
445}
446
447void SCI_METHOD Document::SetErrorStatus(int status) {
448 // Tell the watchers an error has occurred.
449 for (const WatcherWithUserData &watcher : watchers) {
450 watcher.watcher->NotifyErrorOccurred(this, watcher.userData, static_cast<Status>(status));
451 }
452}
453
454Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const {
455 return cb.LineFromPosition(pos);
456}
457
458Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept {
459 // Avoids casting in callers for this very common function
460 return cb.LineFromPosition(pos);
461}
462
463Sci::Position Document::LineEndPosition(Sci::Position position) const {
464 return LineEnd(LineFromPosition(position));
465}
466
467bool Document::IsLineEndPosition(Sci::Position position) const {
468 return LineEnd(LineFromPosition(position)) == position;
469}
470
471bool Document::IsPositionInLineEnd(Sci::Position position) const {
472 return position >= LineEnd(LineFromPosition(position));
473}
474
475Sci::Position Document::VCHomePosition(Sci::Position position) const {
476 const Sci::Line line = SciLineFromPosition(position);
477 const Sci::Position startPosition = LineStart(line);
478 const Sci::Position endLine = LineEnd(line);
479 Sci::Position startText = startPosition;
480 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
481 startText++;
482 if (position == startText)
483 return startPosition;
484 else
485 return startText;
486}
487
488Sci::Position Document::IndexLineStart(Sci::Line line, LineCharacterIndexType lineCharacterIndex) const noexcept {
489 return cb.IndexLineStart(line, lineCharacterIndex);
490}
491
492Sci::Line Document::LineFromPositionIndex(Sci::Position pos, LineCharacterIndexType lineCharacterIndex) const noexcept {
493 return cb.LineFromPositionIndex(pos, lineCharacterIndex);
494}
495
496Sci::Line Document::LineFromPositionAfter(Sci::Line line, Sci::Position length) const noexcept {
497 const Sci::Position posAfter = cb.LineStart(line) + length;
498 if (posAfter >= LengthNoExcept()) {
499 return LinesTotal();
500 }
501 const Sci::Line lineAfter = SciLineFromPosition(posAfter);
502 if (lineAfter > line) {
503 return lineAfter;
504 } else {
505 // Want to make some progress so return next line
506 return lineAfter + 1;
507 }
508}
509
510int SCI_METHOD Document::SetLevel(Sci_Position line, int level) {
511 const int prev = Levels()->SetLevel(line, level, LinesTotal());
512 if (prev != level) {
513 DocModification mh(ModificationFlags::ChangeFold | ModificationFlags::ChangeMarker,
514 LineStart(line), 0, 0, nullptr, line);
515 mh.foldLevelNow = static_cast<FoldLevel>(level);
516 mh.foldLevelPrev = static_cast<FoldLevel>(prev);
517 NotifyModified(mh);
518 }
519 return prev;
520}
521
522int SCI_METHOD Document::GetLevel(Sci_Position line) const {
523 return Levels()->GetLevel(line);
524}
525
526FoldLevel Document::GetFoldLevel(Sci_Position line) const {
527 return static_cast<FoldLevel>(Levels()->GetLevel(line));
528}
529
530void Document::ClearLevels() {
531 Levels()->ClearLevels();
532}
533
534static bool IsSubordinate(FoldLevel levelStart, FoldLevel levelTry) noexcept {
535 if (LevelIsWhitespace(levelTry))
536 return true;
537 else
538 return LevelNumber(levelStart) < LevelNumber(levelTry);
539}
540
541Sci::Line Document::GetLastChild(Sci::Line lineParent, std::optional<FoldLevel> level, Sci::Line lastLine) {
542 const FoldLevel levelStart = LevelNumberPart(level ? *level : GetFoldLevel(lineParent));
543 const Sci::Line maxLine = LinesTotal();
544 const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1;
545 Sci::Line lineMaxSubord = lineParent;
546 while (lineMaxSubord < maxLine - 1) {
547 EnsureStyledTo(LineStart(lineMaxSubord + 2));
548 if (!IsSubordinate(levelStart, GetFoldLevel(lineMaxSubord + 1)))
549 break;
550 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !LevelIsWhitespace(GetFoldLevel(lineMaxSubord)))
551 break;
552 lineMaxSubord++;
553 }
554 if (lineMaxSubord > lineParent) {
555 if (levelStart > LevelNumberPart(GetFoldLevel(lineMaxSubord + 1))) {
556 // Have chewed up some whitespace that belongs to a parent so seek back
557 if (LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) {
558 lineMaxSubord--;
559 }
560 }
561 }
562 return lineMaxSubord;
563}
564
565Sci::Line Document::GetFoldParent(Sci::Line line) const {
566 const FoldLevel level = LevelNumberPart(GetFoldLevel(line));
567 Sci::Line lineLook = line - 1;
568 while ((lineLook > 0) && (
569 (!LevelIsHeader(GetFoldLevel(lineLook))) ||
570 (LevelNumberPart(GetFoldLevel(lineLook)) >= level))
571 ) {
572 lineLook--;
573 }
574 if (LevelIsHeader(GetFoldLevel(lineLook)) &&
575 (LevelNumberPart(GetFoldLevel(lineLook)) < level)) {
576 return lineLook;
577 } else {
578 return -1;
579 }
580}
581
582void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) {
583 const FoldLevel level = GetFoldLevel(line);
584 const Sci::Line lookLastLine = std::max(line, lastLine) + 1;
585
586 Sci::Line lookLine = line;
587 FoldLevel lookLineLevel = level;
588 FoldLevel lookLineLevelNum = LevelNumberPart(lookLineLevel);
589 while ((lookLine > 0) && (LevelIsWhitespace(lookLineLevel) ||
590 (LevelIsHeader(lookLineLevel) && (lookLineLevelNum >= LevelNumberPart(GetFoldLevel(lookLine + 1)))))) {
591 lookLineLevel = GetFoldLevel(--lookLine);
592 lookLineLevelNum = LevelNumberPart(lookLineLevel);
593 }
594
595 Sci::Line beginFoldBlock = LevelIsHeader(lookLineLevel) ? lookLine : GetFoldParent(lookLine);
596 if (beginFoldBlock == -1) {
597 highlightDelimiter.Clear();
598 return;
599 }
600
601 Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, {}, lookLastLine);
602 Sci::Line firstChangeableLineBefore = -1;
603 if (endFoldBlock < line) {
604 lookLine = beginFoldBlock - 1;
605 lookLineLevel = GetFoldLevel(lookLine);
606 lookLineLevelNum = LevelNumberPart(lookLineLevel);
607 while ((lookLine >= 0) && (lookLineLevelNum >= FoldLevel::Base)) {
608 if (LevelIsHeader(lookLineLevel)) {
609 if (GetLastChild(lookLine, {}, lookLastLine) == line) {
610 beginFoldBlock = lookLine;
611 endFoldBlock = line;
612 firstChangeableLineBefore = line - 1;
613 }
614 }
615 if ((lookLine > 0) && (lookLineLevelNum == FoldLevel::Base) && (LevelNumberPart(GetFoldLevel(lookLine - 1)) > lookLineLevelNum))
616 break;
617 lookLineLevel = GetFoldLevel(--lookLine);
618 lookLineLevelNum = LevelNumberPart(lookLineLevel);
619 }
620 }
621 if (firstChangeableLineBefore == -1) {
622 for (lookLine = line - 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel);
623 lookLine >= beginFoldBlock;
624 lookLineLevel = GetFoldLevel(--lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) {
625 if (LevelIsWhitespace(lookLineLevel) || (lookLineLevelNum > LevelNumberPart(level))) {
626 firstChangeableLineBefore = lookLine;
627 break;
628 }
629 }
630 }
631 if (firstChangeableLineBefore == -1)
632 firstChangeableLineBefore = beginFoldBlock - 1;
633
634 Sci::Line firstChangeableLineAfter = -1;
635 for (lookLine = line + 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel);
636 lookLine <= endFoldBlock;
637 lookLineLevel = GetFoldLevel(++lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) {
638 if (LevelIsHeader(lookLineLevel) && (lookLineLevelNum < LevelNumberPart(GetFoldLevel(lookLine + 1)))) {
639 firstChangeableLineAfter = lookLine;
640 break;
641 }
642 }
643 if (firstChangeableLineAfter == -1)
644 firstChangeableLineAfter = endFoldBlock + 1;
645
646 highlightDelimiter.beginFoldBlock = beginFoldBlock;
647 highlightDelimiter.endFoldBlock = endFoldBlock;
648 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
649 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
650}
651
652Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const noexcept {
653 return std::clamp<Sci::Position>(pos, 0, LengthNoExcept());
654}
655
656bool Document::IsCrLf(Sci::Position pos) const noexcept {
657 if (pos < 0)
658 return false;
659 if (pos >= (LengthNoExcept() - 1))
660 return false;
661 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
662}
663
664int Document::LenChar(Sci::Position pos) const noexcept {
665 if (pos < 0 || pos >= LengthNoExcept()) {
666 // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds.
667 return 1;
668 } else if (IsCrLf(pos)) {
669 return 2;
670 }
671
672 const unsigned char leadByte = cb.UCharAt(pos);
673 if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
674 // Common case: ASCII character
675 return 1;
676 }
677 if (CpUtf8 == dbcsCodePage) {
678 const int widthCharBytes = UTF8BytesOfLead[leadByte];
679 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
680 for (int b = 1; b < widthCharBytes; b++) {
681 charBytes[b] = cb.UCharAt(pos + b);
682 }
683 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
684 if (utf8status & UTF8MaskInvalid) {
685 // Treat as invalid and use up just one byte
686 return 1;
687 } else {
688 return utf8status & UTF8MaskWidth;
689 }
690 } else {
691 if (IsDBCSLeadByteNoExcept(leadByte) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1))) {
692 return 2;
693 } else {
694 return 1;
695 }
696 }
697}
698
699bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept {
700 Sci::Position trail = pos;
701 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1)))
702 trail--;
703 start = (trail > 0) ? trail-1 : trail;
704
705 const unsigned char leadByte = cb.UCharAt(start);
706 const int widthCharBytes = UTF8BytesOfLead[leadByte];
707 if (widthCharBytes == 1) {
708 return false;
709 } else {
710 const int trailBytes = widthCharBytes - 1;
711 const Sci::Position len = pos - start;
712 if (len > trailBytes)
713 // pos too far from lead
714 return false;
715 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
716 for (Sci::Position b=1; b<widthCharBytes && ((start+b) < cb.Length()); b++)
717 charBytes[b] = cb.CharAt(start+b);
718 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
719 if (utf8status & UTF8MaskInvalid)
720 return false;
721 end = start + widthCharBytes;
722 return true;
723 }
724}
725
726// Normalise a position so that it is not part way through a multi-byte character.
727// This can occur in two situations -
728// When lines are terminated with \r\n pairs which should be treated as one character.
729// When displaying DBCS text such as Japanese.
730// If moving, move the position in the indicated direction.
731Sci::Position Document::MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir, bool checkLineEnd) const noexcept {
732 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
733 // If out of range, just return minimum/maximum value.
734 if (pos <= 0)
735 return 0;
736 if (pos >= LengthNoExcept())
737 return LengthNoExcept();
738
739 // PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept());
740 if (checkLineEnd && IsCrLf(pos - 1)) {
741 if (moveDir > 0)
742 return pos + 1;
743 else
744 return pos - 1;
745 }
746
747 if (dbcsCodePage) {
748 if (CpUtf8 == dbcsCodePage) {
749 const unsigned char ch = cb.UCharAt(pos);
750 // If ch is not a trail byte then pos is valid intercharacter position
751 if (UTF8IsTrailByte(ch)) {
752 Sci::Position startUTF = pos;
753 Sci::Position endUTF = pos;
754 if (InGoodUTF8(pos, startUTF, endUTF)) {
755 // ch is a trail byte within a UTF-8 character
756 if (moveDir > 0)
757 pos = endUTF;
758 else
759 pos = startUTF;
760 }
761 // Else invalid UTF-8 so return position of isolated trail byte
762 }
763 } else {
764 // Anchor DBCS calculations at start of line because start of line can
765 // not be a DBCS trail byte.
766 const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos));
767 if (pos == posStartLine)
768 return pos;
769
770 // Step back until a non-lead-byte is found.
771 Sci::Position posCheck = pos;
772 while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1)))
773 posCheck--;
774
775 // Check from known start of character.
776 while (posCheck < pos) {
777 const int mbsize = IsDBCSDualByteAt(posCheck) ? 2 : 1;
778 if (posCheck + mbsize == pos) {
779 return pos;
780 } else if (posCheck + mbsize > pos) {
781 if (moveDir > 0) {
782 return posCheck + mbsize;
783 } else {
784 return posCheck;
785 }
786 }
787 posCheck += mbsize;
788 }
789 }
790 }
791
792 return pos;
793}
794
795// NextPosition moves between valid positions - it can not handle a position in the middle of a
796// multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
797// A \r\n pair is treated as two characters.
798Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept {
799 // If out of range, just return minimum/maximum value.
800 const int increment = (moveDir > 0) ? 1 : -1;
801 if (pos + increment <= 0)
802 return 0;
803 if (pos + increment >= cb.Length())
804 return cb.Length();
805
806 if (dbcsCodePage) {
807 if (CpUtf8 == dbcsCodePage) {
808 if (increment == 1) {
809 // Simple forward movement case so can avoid some checks
810 const unsigned char leadByte = cb.UCharAt(pos);
811 if (UTF8IsAscii(leadByte)) {
812 // Single byte character or invalid
813 pos++;
814 } else {
815 const int widthCharBytes = UTF8BytesOfLead[leadByte];
816 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
817 for (int b=1; b<widthCharBytes; b++)
818 charBytes[b] = cb.CharAt(pos+b);
819 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
820 if (utf8status & UTF8MaskInvalid)
821 pos++;
822 else
823 pos += utf8status & UTF8MaskWidth;
824 }
825 } else {
826 // Examine byte before position
827 pos--;
828 const unsigned char ch = cb.UCharAt(pos);
829 // If ch is not a trail byte then pos is valid intercharacter position
830 if (UTF8IsTrailByte(ch)) {
831 // If ch is a trail byte in a valid UTF-8 character then return start of character
832 Sci::Position startUTF = pos;
833 Sci::Position endUTF = pos;
834 if (InGoodUTF8(pos, startUTF, endUTF)) {
835 pos = startUTF;
836 }
837 // Else invalid UTF-8 so return position of isolated trail byte
838 }
839 }
840 } else {
841 if (moveDir > 0) {
842 const int mbsize = IsDBCSDualByteAt(pos) ? 2 : 1;
843 pos += mbsize;
844 if (pos > cb.Length())
845 pos = cb.Length();
846 } else {
847 // Anchor DBCS calculations at start of line because start of line can
848 // not be a DBCS trail byte.
849 const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos));
850 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
851 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
852 if ((pos - 1) <= posStartLine) {
853 return pos - 1;
854 } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) {
855 // Should actually be trail byte
856 if (IsDBCSDualByteAt(pos - 2)) {
857 return pos - 2;
858 } else {
859 // Invalid byte pair so treat as one byte wide
860 return pos - 1;
861 }
862 } else {
863 // Otherwise, step back until a non-lead-byte is found.
864 Sci::Position posTemp = pos - 1;
865 while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp)))
866 ;
867 // Now posTemp+1 must point to the beginning of a character,
868 // so figure out whether we went back an even or an odd
869 // number of bytes and go back 1 or 2 bytes, respectively.
870 const Sci::Position widthLast = ((pos - posTemp) & 1) + 1;
871 if ((widthLast == 2) && (IsDBCSDualByteAt(pos - widthLast))) {
872 return pos - widthLast;
873 }
874 // Byte before pos may be valid character or may be an invalid second byte
875 return pos - 1;
876 }
877 }
878 }
879 } else {
880 pos += increment;
881 }
882
883 return pos;
884}
885
886bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept {
887 // Returns true if pos changed
888 Sci::Position posNext = NextPosition(pos, moveDir);
889 if (posNext == pos) {
890 return false;
891 } else {
892 pos = posNext;
893 return true;
894 }
895}
896
897Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept {
898 if (position >= LengthNoExcept()) {
899 return CharacterExtracted(unicodeReplacementChar, 0);
900 }
901 const unsigned char leadByte = cb.UCharAt(position);
902 if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
903 // Common case: ASCII character
904 return CharacterExtracted(leadByte, 1);
905 }
906 if (CpUtf8 == dbcsCodePage) {
907 const int widthCharBytes = UTF8BytesOfLead[leadByte];
908 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
909 for (int b = 1; b<widthCharBytes; b++)
910 charBytes[b] = cb.UCharAt(position + b);
911 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
912 if (utf8status & UTF8MaskInvalid) {
913 // Treat as invalid and use up just one byte
914 return CharacterExtracted(unicodeReplacementChar, 1);
915 } else {
916 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
917 }
918 } else {
919 if (IsDBCSLeadByteNoExcept(leadByte)) {
920 const unsigned char trailByte = cb.UCharAt(position + 1);
921 if (IsDBCSTrailByteNoExcept(trailByte)) {
922 return CharacterExtracted::DBCS(leadByte, trailByte);
923 }
924 }
925 return CharacterExtracted(leadByte, 1);
926 }
927}
928
929Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept {
930 if (position <= 0) {
931 return CharacterExtracted(unicodeReplacementChar, 0);
932 }
933 const unsigned char previousByte = cb.UCharAt(position - 1);
934 if (0 == dbcsCodePage) {
935 return CharacterExtracted(previousByte, 1);
936 }
937 if (CpUtf8 == dbcsCodePage) {
938 if (UTF8IsAscii(previousByte)) {
939 return CharacterExtracted(previousByte, 1);
940 }
941 position--;
942 // If previousByte is not a trail byte then its invalid
943 if (UTF8IsTrailByte(previousByte)) {
944 // If previousByte is a trail byte in a valid UTF-8 character then find start of character
945 Sci::Position startUTF = position;
946 Sci::Position endUTF = position;
947 if (InGoodUTF8(position, startUTF, endUTF)) {
948 const Sci::Position widthCharBytes = endUTF - startUTF;
949 unsigned char charBytes[UTF8MaxBytes] = { 0, 0, 0, 0 };
950 for (Sci::Position b = 0; b<widthCharBytes; b++)
951 charBytes[b] = cb.UCharAt(startUTF + b);
952 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
953 if (utf8status & UTF8MaskInvalid) {
954 // Treat as invalid and use up just one byte
955 return CharacterExtracted(unicodeReplacementChar, 1);
956 } else {
957 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
958 }
959 }
960 // Else invalid UTF-8 so return position of isolated trail byte
961 }
962 return CharacterExtracted(unicodeReplacementChar, 1);
963 } else {
964 // Moving backwards in DBCS is complex so use NextPosition
965 const Sci::Position posStartCharacter = NextPosition(position, -1);
966 return CharacterAfter(posStartCharacter);
967 }
968}
969
970// Return -1 on out-of-bounds
971Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const {
972 Sci::Position pos = positionStart;
973 if (dbcsCodePage) {
974 const int increment = (characterOffset > 0) ? 1 : -1;
975 while (characterOffset != 0) {
976 const Sci::Position posNext = NextPosition(pos, increment);
977 if (posNext == pos)
978 return Sci::invalidPosition;
979 pos = posNext;
980 characterOffset -= increment;
981 }
982 } else {
983 pos = positionStart + characterOffset;
984 if ((pos < 0) || (pos > Length()))
985 return Sci::invalidPosition;
986 }
987 return pos;
988}
989
990Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept {
991 Sci::Position pos = positionStart;
992 if (dbcsCodePage) {
993 const int increment = (characterOffset > 0) ? 1 : -1;
994 while (characterOffset != 0) {
995 const Sci::Position posNext = NextPosition(pos, increment);
996 if (posNext == pos)
997 return Sci::invalidPosition;
998 if (std::abs(pos-posNext) > 3) // 4 byte character = 2*UTF16.
999 characterOffset -= increment;
1000 pos = posNext;
1001 characterOffset -= increment;
1002 }
1003 } else {
1004 pos = positionStart + characterOffset;
1005 if ((pos < 0) || (pos > LengthNoExcept()))
1006 return Sci::invalidPosition;
1007 }
1008 return pos;
1009}
1010
1011int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const {
1012 int bytesInCharacter = 1;
1013 const unsigned char leadByte = cb.UCharAt(position);
1014 int character = leadByte;
1015 if (dbcsCodePage && !UTF8IsAscii(leadByte)) {
1016 if (CpUtf8 == dbcsCodePage) {
1017 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1018 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
1019 for (int b=1; b<widthCharBytes; b++)
1020 charBytes[b] = cb.UCharAt(position+b);
1021 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
1022 if (utf8status & UTF8MaskInvalid) {
1023 // Report as singleton surrogate values which are invalid Unicode
1024 character = 0xDC80 + leadByte;
1025 } else {
1026 bytesInCharacter = utf8status & UTF8MaskWidth;
1027 character = UnicodeFromUTF8(charBytes);
1028 }
1029 } else {
1030 if (IsDBCSLeadByteNoExcept(leadByte)) {
1031 const unsigned char trailByte = cb.UCharAt(position + 1);
1032 if (IsDBCSTrailByteNoExcept(trailByte)) {
1033 bytesInCharacter = 2;
1034 character = (leadByte << 8) | trailByte;
1035 }
1036 }
1037 }
1038 }
1039 if (pWidth) {
1040 *pWidth = bytesInCharacter;
1041 }
1042 return character;
1043}
1044
1045int SCI_METHOD Document::CodePage() const {
1046 return dbcsCodePage;
1047}
1048
1049bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
1050 // Used by lexers so must match IDocument method exactly
1051 return IsDBCSLeadByteNoExcept(ch);
1052}
1053
1054bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept {
1055 // Used inside core Scintilla
1056 // Byte ranges found in Wikipedia articles with relevant search strings in each case
1057 const unsigned char uch = ch;
1058 switch (dbcsCodePage) {
1059 case 932:
1060 // Shift_jis
1061 return ((uch >= 0x81) && (uch <= 0x9F)) ||
1062 ((uch >= 0xE0) && (uch <= 0xFC));
1063 // Lead bytes F0 to FC may be a Microsoft addition.
1064 case 936:
1065 // GBK
1066 return (uch >= 0x81) && (uch <= 0xFE);
1067 case 949:
1068 // Korean Wansung KS C-5601-1987
1069 return (uch >= 0x81) && (uch <= 0xFE);
1070 case 950:
1071 // Big5
1072 return (uch >= 0x81) && (uch <= 0xFE);
1073 case 1361:
1074 // Korean Johab KS C-5601-1992
1075 return
1076 ((uch >= 0x84) && (uch <= 0xD3)) ||
1077 ((uch >= 0xD8) && (uch <= 0xDE)) ||
1078 ((uch >= 0xE0) && (uch <= 0xF9));
1079 }
1080 return false;
1081}
1082
1083bool Document::IsDBCSTrailByteNoExcept(char ch) const noexcept {
1084 const unsigned char trail = ch;
1085 switch (dbcsCodePage) {
1086 case 932:
1087 // Shift_jis
1088 return (trail != 0x7F) &&
1089 ((trail >= 0x40) && (trail <= 0xFC));
1090 case 936:
1091 // GBK
1092 return (trail != 0x7F) &&
1093 ((trail >= 0x40) && (trail <= 0xFE));
1094 case 949:
1095 // Korean Wansung KS C-5601-1987
1096 return
1097 ((trail >= 0x41) && (trail <= 0x5A)) ||
1098 ((trail >= 0x61) && (trail <= 0x7A)) ||
1099 ((trail >= 0x81) && (trail <= 0xFE));
1100 case 950:
1101 // Big5
1102 return
1103 ((trail >= 0x40) && (trail <= 0x7E)) ||
1104 ((trail >= 0xA1) && (trail <= 0xFE));
1105 case 1361:
1106 // Korean Johab KS C-5601-1992
1107 return
1108 ((trail >= 0x31) && (trail <= 0x7E)) ||
1109 ((trail >= 0x81) && (trail <= 0xFE));
1110 }
1111 return false;
1112}
1113
1114int Document::DBCSDrawBytes(std::string_view text) const noexcept {
1115 if (text.length() <= 1) {
1116 return static_cast<int>(text.length());
1117 }
1118 if (IsDBCSLeadByteNoExcept(text[0])) {
1119 return IsDBCSTrailByteNoExcept(text[1]) ? 2 : 1;
1120 } else {
1121 return 1;
1122 }
1123}
1124
1125bool Document::IsDBCSDualByteAt(Sci::Position pos) const noexcept {
1126 return IsDBCSLeadByteNoExcept(cb.CharAt(pos))
1127 && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1));
1128}
1129
1130// Need to break text into segments near end but taking into account the
1131// encoding to not break inside a UTF-8 or DBCS character and also trying
1132// to avoid breaking inside a pair of combining characters, or inside
1133// ligatures.
1134// TODO: implement grapheme cluster boundaries,
1135// see https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries.
1136//
1137// The segment length must always be long enough (more than 4 bytes)
1138// so that there will be at least one whole character to make a segment.
1139// For UTF-8, text must consist only of valid whole characters.
1140// In preference order from best to worst:
1141// 1) Break before or after spaces or controls
1142// 2) Break at word and punctuation boundary for better kerning and ligature support
1143// 3) Break after whole character, this may break combining characters
1144
1145size_t Document::SafeSegment(std::string_view text) const noexcept {
1146 // check space first as most written language use spaces.
1147 for (std::string_view::iterator it = text.end() - 1; it != text.begin(); --it) {
1148 if (IsBreakSpace(*it)) {
1149 return it - text.begin();
1150 }
1151 }
1152
1153 if (!dbcsCodePage || dbcsCodePage == CpUtf8) {
1154 // backward iterate for UTF-8 and single byte encoding to find word and punctuation boundary.
1155 std::string_view::iterator it = text.end() - 1;
1156 const bool punctuation = IsPunctuation(*it);
1157 do {
1158 --it;
1159 if (punctuation != IsPunctuation(*it)) {
1160 return it - text.begin() + 1;
1161 }
1162 } while (it != text.begin());
1163
1164 it = text.end() - 1;
1165 if (dbcsCodePage) {
1166 // for UTF-8 go back to the start of last character.
1167 for (int trail = 0; trail < UTF8MaxBytes - 1 && UTF8IsTrailByte(*it); trail++) {
1168 --it;
1169 }
1170 }
1171 return it - text.begin();
1172 }
1173
1174 {
1175 // forward iterate for DBCS to find word and punctuation boundary.
1176 size_t lastPunctuationBreak = 0;
1177 size_t lastEncodingAllowedBreak = 0;
1178 CharacterClass ccPrev = CharacterClass::space;
1179 for (size_t j = 0; j < text.length();) {
1180 const unsigned char ch = text[j];
1181 lastEncodingAllowedBreak = j++;
1182
1183 CharacterClass cc = CharacterClass::word;
1184 if (UTF8IsAscii(ch)) {
1185 if (IsPunctuation(ch)) {
1186 cc = CharacterClass::punctuation;
1187 }
1188 } else {
1189 j += IsDBCSLeadByteNoExcept(ch);
1190 }
1191 if (cc != ccPrev) {
1192 ccPrev = cc;
1193 lastPunctuationBreak = lastEncodingAllowedBreak;
1194 }
1195 }
1196 return lastPunctuationBreak ? lastPunctuationBreak : lastEncodingAllowedBreak;
1197 }
1198}
1199
1200EncodingFamily Document::CodePageFamily() const noexcept {
1201 if (CpUtf8 == dbcsCodePage)
1202 return EncodingFamily::unicode;
1203 else if (dbcsCodePage)
1204 return EncodingFamily::dbcs;
1205 else
1206 return EncodingFamily::eightBit;
1207}
1208
1209void Document::ModifiedAt(Sci::Position pos) noexcept {
1210 if (endStyled > pos)
1211 endStyled = pos;
1212}
1213
1214void Document::CheckReadOnly() {
1215 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
1216 enteredReadOnlyCount++;
1217 NotifyModifyAttempt();
1218 enteredReadOnlyCount--;
1219 }
1220}
1221
1222// Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
1223// SetStyleAt does not change the persistent state of a document
1224
1225bool Document::DeleteChars(Sci::Position pos, Sci::Position len) {
1226 if (pos < 0)
1227 return false;
1228 if (len <= 0)
1229 return false;
1230 if ((pos + len) > LengthNoExcept())
1231 return false;
1232 CheckReadOnly();
1233 if (enteredModification != 0) {
1234 return false;
1235 } else {
1236 enteredModification++;
1237 if (!cb.IsReadOnly()) {
1238 NotifyModified(
1239 DocModification(
1240 ModificationFlags::BeforeDelete | ModificationFlags::User,
1241 pos, len,
1242 0, nullptr));
1243 const Sci::Line prevLinesTotal = LinesTotal();
1244 const bool startSavePoint = cb.IsSavePoint();
1245 bool startSequence = false;
1246 const char *text = cb.DeleteChars(pos, len, startSequence);
1247 if (startSavePoint && cb.IsCollectingUndo())
1248 NotifySavePoint(false);
1249 if ((pos < LengthNoExcept()) || (pos == 0))
1250 ModifiedAt(pos);
1251 else
1252 ModifiedAt(pos-1);
1253 NotifyModified(
1254 DocModification(
1255 ModificationFlags::DeleteText | ModificationFlags::User |
1256 (startSequence?ModificationFlags::StartAction:ModificationFlags::None),
1257 pos, len,
1258 LinesTotal() - prevLinesTotal, text));
1259 }
1260 enteredModification--;
1261 }
1262 return !cb.IsReadOnly();
1263}
1264
1265/**
1266 * Insert a string with a length.
1267 */
1268Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) {
1269 if (insertLength <= 0) {
1270 return 0;
1271 }
1272 CheckReadOnly(); // Application may change read only state here
1273 if (cb.IsReadOnly()) {
1274 return 0;
1275 }
1276 if (enteredModification != 0) {
1277 return 0;
1278 }
1279 enteredModification++;
1280 insertionSet = false;
1281 insertion.clear();
1282 NotifyModified(
1283 DocModification(
1284 ModificationFlags::InsertCheck,
1285 position, insertLength,
1286 0, s));
1287 if (insertionSet) {
1288 s = insertion.c_str();
1289 insertLength = insertion.length();
1290 }
1291 NotifyModified(
1292 DocModification(
1293 ModificationFlags::BeforeInsert | ModificationFlags::User,
1294 position, insertLength,
1295 0, s));
1296 const Sci::Line prevLinesTotal = LinesTotal();
1297 const bool startSavePoint = cb.IsSavePoint();
1298 bool startSequence = false;
1299 const char *text = cb.InsertString(position, s, insertLength, startSequence);
1300 if (startSavePoint && cb.IsCollectingUndo())
1301 NotifySavePoint(false);
1302 ModifiedAt(position);
1303 NotifyModified(
1304 DocModification(
1305 ModificationFlags::InsertText | ModificationFlags::User |
1306 (startSequence?ModificationFlags::StartAction:ModificationFlags::None),
1307 position, insertLength,
1308 LinesTotal() - prevLinesTotal, text));
1309 if (insertionSet) { // Free memory as could be large
1310 std::string().swap(insertion);
1311 }
1312 enteredModification--;
1313 return insertLength;
1314}
1315
1316void Document::ChangeInsertion(const char *s, Sci::Position length) {
1317 insertionSet = true;
1318 insertion.assign(s, length);
1319}
1320
1321int SCI_METHOD Document::AddData(const char *data, Sci_Position length) {
1322 try {
1323 const Sci::Position position = Length();
1324 InsertString(position, data, length);
1325 } catch (std::bad_alloc &) {
1326 return static_cast<int>(Status::BadAlloc);
1327 } catch (...) {
1328 return static_cast<int>(Status::Failure);
1329 }
1330 return static_cast<int>(Status::Ok);
1331}
1332
1333void * SCI_METHOD Document::ConvertToDocument() {
1334 return this;
1335}
1336
1337Sci::Position Document::Undo() {
1338 Sci::Position newPos = -1;
1339 CheckReadOnly();
1340 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1341 enteredModification++;
1342 if (!cb.IsReadOnly()) {
1343 const bool startSavePoint = cb.IsSavePoint();
1344 bool multiLine = false;
1345 const int steps = cb.StartUndo();
1346 //Platform::DebugPrintf("Steps=%d\n", steps);
1347 Sci::Position coalescedRemovePos = -1;
1348 Sci::Position coalescedRemoveLen = 0;
1349 Sci::Position prevRemoveActionPos = -1;
1350 Sci::Position prevRemoveActionLen = 0;
1351 for (int step = 0; step < steps; step++) {
1352 const Sci::Line prevLinesTotal = LinesTotal();
1353 const Action &action = cb.GetUndoStep();
1354 if (action.at == ActionType::remove) {
1355 NotifyModified(DocModification(
1356 ModificationFlags::BeforeInsert | ModificationFlags::Undo, action));
1357 } else if (action.at == ActionType::container) {
1358 DocModification dm(ModificationFlags::Container | ModificationFlags::Undo);
1359 dm.token = action.position;
1360 NotifyModified(dm);
1361 if (!action.mayCoalesce) {
1362 coalescedRemovePos = -1;
1363 coalescedRemoveLen = 0;
1364 prevRemoveActionPos = -1;
1365 prevRemoveActionLen = 0;
1366 }
1367 } else {
1368 NotifyModified(DocModification(
1369 ModificationFlags::BeforeDelete | ModificationFlags::Undo, action));
1370 }
1371 cb.PerformUndoStep();
1372 if (action.at != ActionType::container) {
1373 ModifiedAt(action.position);
1374 newPos = action.position;
1375 }
1376
1377 ModificationFlags modFlags = ModificationFlags::Undo;
1378 // With undo, an insertion action becomes a deletion notification
1379 if (action.at == ActionType::remove) {
1380 newPos += action.lenData;
1381 modFlags |= ModificationFlags::InsertText;
1382 if ((coalescedRemoveLen > 0) &&
1383 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1384 coalescedRemoveLen += action.lenData;
1385 newPos = coalescedRemovePos + coalescedRemoveLen;
1386 } else {
1387 coalescedRemovePos = action.position;
1388 coalescedRemoveLen = action.lenData;
1389 }
1390 prevRemoveActionPos = action.position;
1391 prevRemoveActionLen = action.lenData;
1392 } else if (action.at == ActionType::insert) {
1393 modFlags |= ModificationFlags::DeleteText;
1394 coalescedRemovePos = -1;
1395 coalescedRemoveLen = 0;
1396 prevRemoveActionPos = -1;
1397 prevRemoveActionLen = 0;
1398 }
1399 if (steps > 1)
1400 modFlags |= ModificationFlags::MultiStepUndoRedo;
1401 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1402 if (linesAdded != 0)
1403 multiLine = true;
1404 if (step == steps - 1) {
1405 modFlags |= ModificationFlags::LastStepInUndoRedo;
1406 if (multiLine)
1407 modFlags |= ModificationFlags::MultilineUndoRedo;
1408 }
1409 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1410 linesAdded, action.data.get()));
1411 }
1412
1413 const bool endSavePoint = cb.IsSavePoint();
1414 if (startSavePoint != endSavePoint)
1415 NotifySavePoint(endSavePoint);
1416 }
1417 enteredModification--;
1418 }
1419 return newPos;
1420}
1421
1422Sci::Position Document::Redo() {
1423 Sci::Position newPos = -1;
1424 CheckReadOnly();
1425 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1426 enteredModification++;
1427 if (!cb.IsReadOnly()) {
1428 const bool startSavePoint = cb.IsSavePoint();
1429 bool multiLine = false;
1430 const int steps = cb.StartRedo();
1431 for (int step = 0; step < steps; step++) {
1432 const Sci::Line prevLinesTotal = LinesTotal();
1433 const Action &action = cb.GetRedoStep();
1434 if (action.at == ActionType::insert) {
1435 NotifyModified(DocModification(
1436 ModificationFlags::BeforeInsert | ModificationFlags::Redo, action));
1437 } else if (action.at == ActionType::container) {
1438 DocModification dm(ModificationFlags::Container | ModificationFlags::Redo);
1439 dm.token = action.position;
1440 NotifyModified(dm);
1441 } else {
1442 NotifyModified(DocModification(
1443 ModificationFlags::BeforeDelete | ModificationFlags::Redo, action));
1444 }
1445 cb.PerformRedoStep();
1446 if (action.at != ActionType::container) {
1447 ModifiedAt(action.position);
1448 newPos = action.position;
1449 }
1450
1451 ModificationFlags modFlags = ModificationFlags::Redo;
1452 if (action.at == ActionType::insert) {
1453 newPos += action.lenData;
1454 modFlags |= ModificationFlags::InsertText;
1455 } else if (action.at == ActionType::remove) {
1456 modFlags |= ModificationFlags::DeleteText;
1457 }
1458 if (steps > 1)
1459 modFlags |= ModificationFlags::MultiStepUndoRedo;
1460 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1461 if (linesAdded != 0)
1462 multiLine = true;
1463 if (step == steps - 1) {
1464 modFlags |= ModificationFlags::LastStepInUndoRedo;
1465 if (multiLine)
1466 modFlags |= ModificationFlags::MultilineUndoRedo;
1467 }
1468 NotifyModified(
1469 DocModification(modFlags, action.position, action.lenData,
1470 linesAdded, action.data.get()));
1471 }
1472
1473 const bool endSavePoint = cb.IsSavePoint();
1474 if (startSavePoint != endSavePoint)
1475 NotifySavePoint(endSavePoint);
1476 }
1477 enteredModification--;
1478 }
1479 return newPos;
1480}
1481
1482void Document::DelChar(Sci::Position pos) {
1483 DeleteChars(pos, LenChar(pos));
1484}
1485
1486void Document::DelCharBack(Sci::Position pos) {
1487 if (pos <= 0) {
1488 return;
1489 } else if (IsCrLf(pos - 2)) {
1490 DeleteChars(pos - 2, 2);
1491 } else if (dbcsCodePage) {
1492 const Sci::Position startChar = NextPosition(pos, -1);
1493 DeleteChars(startChar, pos - startChar);
1494 } else {
1495 DeleteChars(pos - 1, 1);
1496 }
1497}
1498
1499static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept {
1500 return ((pos / tabSize) + 1) * tabSize;
1501}
1502
1503static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) {
1504 std::string indentation;
1505 if (!insertSpaces) {
1506 while (indent >= tabSize) {
1507 indentation += '\t';
1508 indent -= tabSize;
1509 }
1510 }
1511 while (indent > 0) {
1512 indentation += ' ';
1513 indent--;
1514 }
1515 return indentation;
1516}
1517
1518int SCI_METHOD Document::GetLineIndentation(Sci_Position line) {
1519 int indent = 0;
1520 if ((line >= 0) && (line < LinesTotal())) {
1521 const Sci::Position lineStart = LineStart(line);
1522 const Sci::Position length = Length();
1523 for (Sci::Position i = lineStart; i < length; i++) {
1524 const char ch = cb.CharAt(i);
1525 if (ch == ' ')
1526 indent++;
1527 else if (ch == '\t')
1528 indent = static_cast<int>(NextTab(indent, tabInChars));
1529 else
1530 return indent;
1531 }
1532 }
1533 return indent;
1534}
1535
1536Sci::Position Document::SetLineIndentation(Sci::Line line, Sci::Position indent) {
1537 const int indentOfLine = GetLineIndentation(line);
1538 if (indent < 0)
1539 indent = 0;
1540 if (indent != indentOfLine) {
1541 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1542 const Sci::Position thisLineStart = LineStart(line);
1543 const Sci::Position indentPos = GetLineIndentPosition(line);
1544 UndoGroup ug(this);
1545 DeleteChars(thisLineStart, indentPos - thisLineStart);
1546 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1547 linebuf.length());
1548 } else {
1549 return GetLineIndentPosition(line);
1550 }
1551}
1552
1553Sci::Position Document::GetLineIndentPosition(Sci::Line line) const {
1554 if (line < 0)
1555 return 0;
1556 Sci::Position pos = LineStart(line);
1557 const Sci::Position length = Length();
1558 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1559 pos++;
1560 }
1561 return pos;
1562}
1563
1564Sci::Position Document::GetColumn(Sci::Position pos) {
1565 Sci::Position column = 0;
1566 const Sci::Line line = SciLineFromPosition(pos);
1567 if ((line >= 0) && (line < LinesTotal())) {
1568 for (Sci::Position i = LineStart(line); i < pos;) {
1569 const char ch = cb.CharAt(i);
1570 if (ch == '\t') {
1571 column = NextTab(column, tabInChars);
1572 i++;
1573 } else if (ch == '\r') {
1574 return column;
1575 } else if (ch == '\n') {
1576 return column;
1577 } else if (i >= Length()) {
1578 return column;
1579 } else {
1580 column++;
1581 i = NextPosition(i, 1);
1582 }
1583 }
1584 }
1585 return column;
1586}
1587
1588Sci::Position Document::CountCharacters(Sci::Position startPos, Sci::Position endPos) const noexcept {
1589 startPos = MovePositionOutsideChar(startPos, 1, false);
1590 endPos = MovePositionOutsideChar(endPos, -1, false);
1591 Sci::Position count = 0;
1592 Sci::Position i = startPos;
1593 while (i < endPos) {
1594 count++;
1595 i = NextPosition(i, 1);
1596 }
1597 return count;
1598}
1599
1600Sci::Position Document::CountUTF16(Sci::Position startPos, Sci::Position endPos) const noexcept {
1601 startPos = MovePositionOutsideChar(startPos, 1, false);
1602 endPos = MovePositionOutsideChar(endPos, -1, false);
1603 Sci::Position count = 0;
1604 Sci::Position i = startPos;
1605 while (i < endPos) {
1606 count++;
1607 const Sci::Position next = NextPosition(i, 1);
1608 if ((next - i) > 3)
1609 count++;
1610 i = next;
1611 }
1612 return count;
1613}
1614
1615Sci::Position Document::FindColumn(Sci::Line line, Sci::Position column) {
1616 Sci::Position position = LineStart(line);
1617 if ((line >= 0) && (line < LinesTotal())) {
1618 Sci::Position columnCurrent = 0;
1619 while ((columnCurrent < column) && (position < Length())) {
1620 const char ch = cb.CharAt(position);
1621 if (ch == '\t') {
1622 columnCurrent = NextTab(columnCurrent, tabInChars);
1623 if (columnCurrent > column)
1624 return position;
1625 position++;
1626 } else if (ch == '\r') {
1627 return position;
1628 } else if (ch == '\n') {
1629 return position;
1630 } else {
1631 columnCurrent++;
1632 position = NextPosition(position, 1);
1633 }
1634 }
1635 }
1636 return position;
1637}
1638
1639void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) {
1640 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1641 for (Sci::Line line = lineBottom; line >= lineTop; line--) {
1642 const Sci::Position indentOfLine = GetLineIndentation(line);
1643 if (forwards) {
1644 if (LineStart(line) < LineEnd(line)) {
1645 SetLineIndentation(line, indentOfLine + IndentSize());
1646 }
1647 } else {
1648 SetLineIndentation(line, indentOfLine - IndentSize());
1649 }
1650 }
1651}
1652
1653// Convert line endings for a piece of text to a particular mode.
1654// Stop at len or when a NUL is found.
1655std::string Document::TransformLineEnds(const char *s, size_t len, EndOfLine eolModeWanted) {
1656 std::string dest;
1657 for (size_t i = 0; (i < len) && (s[i]); i++) {
1658 if (s[i] == '\n' || s[i] == '\r') {
1659 if (eolModeWanted == EndOfLine::Cr) {
1660 dest.push_back('\r');
1661 } else if (eolModeWanted == EndOfLine::Lf) {
1662 dest.push_back('\n');
1663 } else { // eolModeWanted == EndOfLine::CrLf
1664 dest.push_back('\r');
1665 dest.push_back('\n');
1666 }
1667 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1668 i++;
1669 }
1670 } else {
1671 dest.push_back(s[i]);
1672 }
1673 }
1674 return dest;
1675}
1676
1677void Document::ConvertLineEnds(EndOfLine eolModeSet) {
1678 UndoGroup ug(this);
1679
1680 for (Sci::Position pos = 0; pos < Length(); pos++) {
1681 if (cb.CharAt(pos) == '\r') {
1682 if (cb.CharAt(pos + 1) == '\n') {
1683 // CRLF
1684 if (eolModeSet == EndOfLine::Cr) {
1685 DeleteChars(pos + 1, 1); // Delete the LF
1686 } else if (eolModeSet == EndOfLine::Lf) {
1687 DeleteChars(pos, 1); // Delete the CR
1688 } else {
1689 pos++;
1690 }
1691 } else {
1692 // CR
1693 if (eolModeSet == EndOfLine::CrLf) {
1694 pos += InsertString(pos + 1, "\n", 1); // Insert LF
1695 } else if (eolModeSet == EndOfLine::Lf) {
1696 pos += InsertString(pos, "\n", 1); // Insert LF
1697 DeleteChars(pos, 1); // Delete CR
1698 pos--;
1699 }
1700 }
1701 } else if (cb.CharAt(pos) == '\n') {
1702 // LF
1703 if (eolModeSet == EndOfLine::CrLf) {
1704 pos += InsertString(pos, "\r", 1); // Insert CR
1705 } else if (eolModeSet == EndOfLine::Cr) {
1706 pos += InsertString(pos, "\r", 1); // Insert CR
1707 DeleteChars(pos, 1); // Delete LF
1708 pos--;
1709 }
1710 }
1711 }
1712
1713}
1714
1715DocumentOption Document::Options() const noexcept {
1716 return (IsLarge() ? DocumentOption::TextLarge : DocumentOption::Default) |
1717 (cb.HasStyles() ? DocumentOption::Default : DocumentOption::StylesNone);
1718}
1719
1720bool Document::IsWhiteLine(Sci::Line line) const {
1721 Sci::Position currentChar = LineStart(line);
1722 const Sci::Position endLine = LineEnd(line);
1723 while (currentChar < endLine) {
1724 if (!IsSpaceOrTab(cb.CharAt(currentChar))) {
1725 return false;
1726 }
1727 ++currentChar;
1728 }
1729 return true;
1730}
1731
1732Sci::Position Document::ParaUp(Sci::Position pos) const {
1733 Sci::Line line = SciLineFromPosition(pos);
1734 line--;
1735 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1736 line--;
1737 }
1738 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1739 line--;
1740 }
1741 line++;
1742 return LineStart(line);
1743}
1744
1745Sci::Position Document::ParaDown(Sci::Position pos) const {
1746 Sci::Line line = SciLineFromPosition(pos);
1747 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1748 line++;
1749 }
1750 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1751 line++;
1752 }
1753 if (line < LinesTotal())
1754 return LineStart(line);
1755 else // end of a document
1756 return LineEnd(line-1);
1757}
1758
1759CharacterClass Document::WordCharacterClass(unsigned int ch) const {
1760 if (dbcsCodePage && (ch >= 0x80)) {
1761 if (CpUtf8 == dbcsCodePage) {
1762 // Use hard coded Unicode class
1763 const CharacterCategory cc = charMap.CategoryFor(ch);
1764 switch (cc) {
1765
1766 // Separator, Line/Paragraph
1767 case ccZl:
1768 case ccZp:
1769 return CharacterClass::newLine;
1770
1771 // Separator, Space
1772 case ccZs:
1773 // Other
1774 case ccCc:
1775 case ccCf:
1776 case ccCs:
1777 case ccCo:
1778 case ccCn:
1779 return CharacterClass::space;
1780
1781 // Letter
1782 case ccLu:
1783 case ccLl:
1784 case ccLt:
1785 case ccLm:
1786 case ccLo:
1787 // Number
1788 case ccNd:
1789 case ccNl:
1790 case ccNo:
1791 // Mark - includes combining diacritics
1792 case ccMn:
1793 case ccMc:
1794 case ccMe:
1795 return CharacterClass::word;
1796
1797 // Punctuation
1798 case ccPc:
1799 case ccPd:
1800 case ccPs:
1801 case ccPe:
1802 case ccPi:
1803 case ccPf:
1804 case ccPo:
1805 // Symbol
1806 case ccSm:
1807 case ccSc:
1808 case ccSk:
1809 case ccSo:
1810 return CharacterClass::punctuation;
1811
1812 }
1813 } else {
1814 // Asian DBCS
1815 return CharacterClass::word;
1816 }
1817 }
1818 return charClass.GetClass(static_cast<unsigned char>(ch));
1819}
1820
1821/**
1822 * Used by commands that want to select whole words.
1823 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1824 */
1825Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const {
1826 CharacterClass ccStart = CharacterClass::word;
1827 if (delta < 0) {
1828 if (!onlyWordCharacters) {
1829 const CharacterExtracted ce = CharacterBefore(pos);
1830 ccStart = WordCharacterClass(ce.character);
1831 }
1832 while (pos > 0) {
1833 const CharacterExtracted ce = CharacterBefore(pos);
1834 if (WordCharacterClass(ce.character) != ccStart)
1835 break;
1836 pos -= ce.widthBytes;
1837 }
1838 } else {
1839 if (!onlyWordCharacters && pos < LengthNoExcept()) {
1840 const CharacterExtracted ce = CharacterAfter(pos);
1841 ccStart = WordCharacterClass(ce.character);
1842 }
1843 while (pos < LengthNoExcept()) {
1844 const CharacterExtracted ce = CharacterAfter(pos);
1845 if (WordCharacterClass(ce.character) != ccStart)
1846 break;
1847 pos += ce.widthBytes;
1848 }
1849 }
1850 return MovePositionOutsideChar(pos, delta, true);
1851}
1852
1853/**
1854 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1855 * (delta < 0).
1856 * This is looking for a transition between character classes although there is also some
1857 * additional movement to transit white space.
1858 * Used by cursor movement by word commands.
1859 */
1860Sci::Position Document::NextWordStart(Sci::Position pos, int delta) const {
1861 if (delta < 0) {
1862 while (pos > 0) {
1863 const CharacterExtracted ce = CharacterBefore(pos);
1864 if (WordCharacterClass(ce.character) != CharacterClass::space)
1865 break;
1866 pos -= ce.widthBytes;
1867 }
1868 if (pos > 0) {
1869 CharacterExtracted ce = CharacterBefore(pos);
1870 const CharacterClass ccStart = WordCharacterClass(ce.character);
1871 while (pos > 0) {
1872 ce = CharacterBefore(pos);
1873 if (WordCharacterClass(ce.character) != ccStart)
1874 break;
1875 pos -= ce.widthBytes;
1876 }
1877 }
1878 } else {
1879 CharacterExtracted ce = CharacterAfter(pos);
1880 const CharacterClass ccStart = WordCharacterClass(ce.character);
1881 while (pos < LengthNoExcept()) {
1882 ce = CharacterAfter(pos);
1883 if (WordCharacterClass(ce.character) != ccStart)
1884 break;
1885 pos += ce.widthBytes;
1886 }
1887 while (pos < LengthNoExcept()) {
1888 ce = CharacterAfter(pos);
1889 if (WordCharacterClass(ce.character) != CharacterClass::space)
1890 break;
1891 pos += ce.widthBytes;
1892 }
1893 }
1894 return pos;
1895}
1896
1897/**
1898 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1899 * (delta < 0).
1900 * This is looking for a transition between character classes although there is also some
1901 * additional movement to transit white space.
1902 * Used by cursor movement by word commands.
1903 */
1904Sci::Position Document::NextWordEnd(Sci::Position pos, int delta) const {
1905 if (delta < 0) {
1906 if (pos > 0) {
1907 CharacterExtracted ce = CharacterBefore(pos);
1908 const CharacterClass ccStart = WordCharacterClass(ce.character);
1909 if (ccStart != CharacterClass::space) {
1910 while (pos > 0) {
1911 ce = CharacterBefore(pos);
1912 if (WordCharacterClass(ce.character) != ccStart)
1913 break;
1914 pos -= ce.widthBytes;
1915 }
1916 }
1917 while (pos > 0) {
1918 ce = CharacterBefore(pos);
1919 if (WordCharacterClass(ce.character) != CharacterClass::space)
1920 break;
1921 pos -= ce.widthBytes;
1922 }
1923 }
1924 } else {
1925 while (pos < LengthNoExcept()) {
1926 const CharacterExtracted ce = CharacterAfter(pos);
1927 if (WordCharacterClass(ce.character) != CharacterClass::space)
1928 break;
1929 pos += ce.widthBytes;
1930 }
1931 if (pos < LengthNoExcept()) {
1932 CharacterExtracted ce = CharacterAfter(pos);
1933 const CharacterClass ccStart = WordCharacterClass(ce.character);
1934 while (pos < LengthNoExcept()) {
1935 ce = CharacterAfter(pos);
1936 if (WordCharacterClass(ce.character) != ccStart)
1937 break;
1938 pos += ce.widthBytes;
1939 }
1940 }
1941 }
1942 return pos;
1943}
1944
1945namespace {
1946
1947constexpr bool IsWordEdge(CharacterClass cc, CharacterClass ccNext) noexcept {
1948 return (cc != ccNext) &&
1949 (cc == CharacterClass::word || cc == CharacterClass::punctuation);
1950}
1951
1952}
1953
1954/**
1955 * Check that the character at the given position is a word or punctuation character and that
1956 * the previous character is of a different character class.
1957 */
1958bool Document::IsWordStartAt(Sci::Position pos) const {
1959 if (pos >= LengthNoExcept())
1960 return false;
1961 if (pos >= 0) {
1962 const CharacterExtracted cePos = CharacterAfter(pos);
1963 // At start of document, treat as if space before so can be word start
1964 const CharacterExtracted cePrev = (pos > 0) ?
1965 CharacterBefore(pos) : CharacterExtracted(' ', 1);
1966 return IsWordEdge(WordCharacterClass(cePos.character), WordCharacterClass(cePrev.character));
1967 }
1968 return true;
1969}
1970
1971/**
1972 * Check that the character before the given position is a word or punctuation character and that
1973 * the next character is of a different character class.
1974 */
1975bool Document::IsWordEndAt(Sci::Position pos) const {
1976 if (pos <= 0)
1977 return false;
1978 if (pos <= LengthNoExcept()) {
1979 // At end of document, treat as if space after so can be word end
1980 const CharacterExtracted cePos = (pos < LengthNoExcept()) ?
1981 CharacterAfter(pos) : CharacterExtracted(' ', 1);
1982 const CharacterExtracted cePrev = CharacterBefore(pos);
1983 return IsWordEdge(WordCharacterClass(cePrev.character), WordCharacterClass(cePos.character));
1984 }
1985 return true;
1986}
1987
1988/**
1989 * Check that the given range is has transitions between character classes at both
1990 * ends and where the characters on the inside are word or punctuation characters.
1991 */
1992bool Document::IsWordAt(Sci::Position start, Sci::Position end) const {
1993 return (start < end) && IsWordStartAt(start) && IsWordEndAt(end);
1994}
1995
1996bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const {
1997 return (!word && !wordStart) ||
1998 (word && IsWordAt(pos, pos + length)) ||
1999 (wordStart && IsWordStartAt(pos));
2000}
2001
2002bool Document::HasCaseFolder() const noexcept {
2003 return pcf != nullptr;
2004}
2005
2006void Document::SetCaseFolder(std::unique_ptr<CaseFolder> pcf_) noexcept {
2007 pcf = std::move(pcf_);
2008}
2009
2010Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept {
2011 const unsigned char leadByte = cb.UCharAt(position);
2012 if (UTF8IsAscii(leadByte)) {
2013 // Common case: ASCII character
2014 return CharacterExtracted(leadByte, 1);
2015 }
2016 const int widthCharBytes = UTF8BytesOfLead[leadByte];
2017 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
2018 for (int b=1; b<widthCharBytes; b++)
2019 charBytes[b] = cb.UCharAt(position + b);
2020 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
2021 if (utf8status & UTF8MaskInvalid) {
2022 // Treat as invalid and use up just one byte
2023 return CharacterExtracted(unicodeReplacementChar, 1);
2024 } else {
2025 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
2026 }
2027}
2028
2029namespace {
2030
2031// Equivalent of memchr over the split view
2032ptrdiff_t SplitFindChar(const SplitView &view, size_t start, size_t length, int ch) noexcept {
2033 size_t range1Length = 0;
2034 if (start < view.length1) {
2035 range1Length = std::min(length, view.length1 - start);
2036 const char *match = static_cast<const char *>(memchr(view.segment1 + start, ch, range1Length));
2037 if (match) {
2038 return match - view.segment1;
2039 }
2040 start += range1Length;
2041 }
2042 const char *match2 = static_cast<const char *>(memchr(view.segment2 + start, ch, length - range1Length));
2043 if (match2) {
2044 return match2 - view.segment2;
2045 }
2046 return -1;
2047}
2048
2049// Equivalent of memcmp over the split view
2050// This does not call memcmp as search texts are commonly too short to overcome the
2051// call overhead.
2052bool SplitMatch(const SplitView &view, size_t start, std::string_view text) noexcept {
2053 for (size_t i = 0; i < text.length(); i++) {
2054 if (view.CharAt(i + start) != text[i]) {
2055 return false;
2056 }
2057 }
2058 return true;
2059}
2060
2061}
2062
2063/**
2064 * Find text in document, supporting both forward and backward
2065 * searches (just pass minPos > maxPos to do a backward search)
2066 * Has not been tested with backwards DBCS searches yet.
2067 */
2068Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, const char *search,
2069 FindOption flags, Sci::Position *length) {
2070 if (*length <= 0)
2071 return minPos;
2072 const bool caseSensitive = FlagSet(flags, FindOption::MatchCase);
2073 const bool word = FlagSet(flags, FindOption::WholeWord);
2074 const bool wordStart = FlagSet(flags, FindOption::WordStart);
2075 const bool regExp = FlagSet(flags, FindOption::RegExp);
2076 if (regExp) {
2077 if (!regex)
2078 regex = std::unique_ptr<RegexSearchBase>(CreateRegexSearch(&charClass));
2079 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
2080 } else {
2081
2082 const bool forward = minPos <= maxPos;
2083 const int increment = forward ? 1 : -1;
2084
2085 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2086 const Sci::Position startPos = MovePositionOutsideChar(minPos, increment, false);
2087 const Sci::Position endPos = MovePositionOutsideChar(maxPos, increment, false);
2088
2089 // Compute actual search ranges needed
2090 const Sci::Position lengthFind = *length;
2091
2092 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
2093 const Sci::Position limitPos = std::max(startPos, endPos);
2094 Sci::Position pos = startPos;
2095 if (!forward) {
2096 // Back all of a character
2097 pos = NextPosition(pos, increment);
2098 }
2099 const SplitView cbView = cb.AllView();
2100 if (caseSensitive) {
2101 const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2102 const unsigned char charStartSearch = search[0];
2103 if (forward && ((0 == dbcsCodePage) || (CpUtf8 == dbcsCodePage && !UTF8IsTrailByte(charStartSearch)))) {
2104 // This is a fast case where there is no need to test byte values to iterate
2105 // so becomes the equivalent of a memchr+memcmp loop.
2106 // UTF-8 search will not be self-synchronizing when starts with trail byte
2107 const std::string_view suffix(search + 1, lengthFind - 1);
2108 while (pos < endSearch) {
2109 pos = SplitFindChar(cbView, pos, limitPos - pos, charStartSearch);
2110 if (pos < 0) {
2111 break;
2112 }
2113 if (SplitMatch(cbView, pos + 1, suffix) && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2114 return pos;
2115 }
2116 pos++;
2117 }
2118 } else {
2119 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2120 const unsigned char leadByte = cbView.CharAt(pos);
2121 if (leadByte == charStartSearch) {
2122 bool found = (pos + lengthFind) <= limitPos;
2123 // SplitMatch could be called here but it is slower with g++ -O2
2124 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
2125 found = cbView.CharAt(pos + indexSearch) == search[indexSearch];
2126 }
2127 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2128 return pos;
2129 }
2130 }
2131 if (forward && UTF8IsAscii(leadByte)) {
2132 pos++;
2133 } else {
2134 if (dbcsCodePage) {
2135 if (!NextCharacter(pos, increment)) {
2136 break;
2137 }
2138 } else {
2139 pos += increment;
2140 }
2141 }
2142 }
2143 }
2144 } else if (CpUtf8 == dbcsCodePage) {
2145 constexpr size_t maxFoldingExpansion = 4;
2146 std::vector<char> searchThing((lengthFind+1) * UTF8MaxBytes * maxFoldingExpansion + 1);
2147 const size_t lenSearch =
2148 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2149 while (forward ? (pos < endPos) : (pos >= endPos)) {
2150 int widthFirstCharacter = 0;
2151 Sci::Position posIndexDocument = pos;
2152 size_t indexSearch = 0;
2153 bool characterMatches = true;
2154 for (;;) {
2155 const unsigned char leadByte = cbView.CharAt(posIndexDocument);
2156 char bytes[UTF8MaxBytes + 1];
2157 int widthChar = 1;
2158 if (!UTF8IsAscii(leadByte)) {
2159 const int widthCharBytes = UTF8BytesOfLead[leadByte];
2160 bytes[0] = leadByte;
2161 for (int b=1; b<widthCharBytes; b++) {
2162 bytes[b] = cbView.CharAt(posIndexDocument+b);
2163 }
2164 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
2165 }
2166 if (!widthFirstCharacter) {
2167 widthFirstCharacter = widthChar;
2168 }
2169 if ((posIndexDocument + widthChar) > limitPos) {
2170 break;
2171 }
2172 size_t lenFlat = 1;
2173 if (widthChar == 1) {
2174 characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte);
2175 } else {
2176 char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
2177 lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2178 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2179 assert((indexSearch + lenFlat) <= searchThing.size());
2180 // Does folded match the buffer
2181 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2182 }
2183 if (!characterMatches) {
2184 break;
2185 }
2186 posIndexDocument += widthChar;
2187 indexSearch += lenFlat;
2188 if (indexSearch >= lenSearch) {
2189 break;
2190 }
2191 }
2192 if (characterMatches && (indexSearch == lenSearch)) {
2193 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
2194 *length = posIndexDocument - pos;
2195 return pos;
2196 }
2197 }
2198 if (forward) {
2199 pos += widthFirstCharacter;
2200 } else {
2201 if (!NextCharacter(pos, increment)) {
2202 break;
2203 }
2204 }
2205 }
2206 } else if (dbcsCodePage) {
2207 constexpr size_t maxBytesCharacter = 2;
2208 constexpr size_t maxFoldingExpansion = 4;
2209 std::vector<char> searchThing((lengthFind+1) * maxBytesCharacter * maxFoldingExpansion + 1);
2210 const size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2211 while (forward ? (pos < endPos) : (pos >= endPos)) {
2212 int widthFirstCharacter = 0;
2213 Sci::Position indexDocument = 0;
2214 size_t indexSearch = 0;
2215 bool characterMatches = true;
2216 while (((pos + indexDocument) < limitPos) &&
2217 (indexSearch < lenSearch)) {
2218 const unsigned char leadByte = cbView.CharAt(pos + indexDocument);
2219 const int widthChar = (!UTF8IsAscii(leadByte) && IsDBCSLeadByteNoExcept(leadByte)) ? 2 : 1;
2220 if (!widthFirstCharacter) {
2221 widthFirstCharacter = widthChar;
2222 }
2223 if ((pos + indexDocument + widthChar) > limitPos) {
2224 break;
2225 }
2226 size_t lenFlat = 1;
2227 if (widthChar == 1) {
2228 characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte);
2229 } else {
2230 char bytes[maxBytesCharacter + 1];
2231 bytes[0] = leadByte;
2232 bytes[1] = cbView.CharAt(pos + indexDocument + 1);
2233 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
2234 lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2235 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2236 assert((indexSearch + lenFlat) <= searchThing.size());
2237 // Does folded match the buffer
2238 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2239 }
2240 if (!characterMatches) {
2241 break;
2242 }
2243 indexDocument += widthChar;
2244 indexSearch += lenFlat;
2245 }
2246 if (characterMatches && (indexSearch == lenSearch)) {
2247 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
2248 *length = indexDocument;
2249 return pos;
2250 }
2251 }
2252 if (forward) {
2253 pos += widthFirstCharacter;
2254 } else {
2255 if (!NextCharacter(pos, increment)) {
2256 break;
2257 }
2258 }
2259 }
2260 } else {
2261 const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2262 std::vector<char> searchThing(lengthFind + 1);
2263 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2264 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2265 bool found = (pos + lengthFind) <= limitPos;
2266 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
2267 const char ch = cbView.CharAt(pos + indexSearch);
2268 const char chTest = searchThing[indexSearch];
2269 if (UTF8IsAscii(ch)) {
2270 found = chTest == MakeLowerCase(ch);
2271 } else {
2272 char folded[2];
2273 pcf->Fold(folded, sizeof(folded), &ch, 1);
2274 found = folded[0] == chTest;
2275 }
2276 }
2277 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2278 return pos;
2279 }
2280 pos += increment;
2281 }
2282 }
2283 }
2284 //Platform::DebugPrintf("Not found\n");
2285 return -1;
2286}
2287
2288const char *Document::SubstituteByPosition(const char *text, Sci::Position *length) {
2289 if (regex)
2290 return regex->SubstituteByPosition(this, text, length);
2291 else
2292 return nullptr;
2293}
2294
2295LineCharacterIndexType Document::LineCharacterIndex() const noexcept {
2296 return cb.LineCharacterIndex();
2297}
2298
2299void Document::AllocateLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) {
2300 return cb.AllocateLineCharacterIndex(lineCharacterIndex);
2301}
2302
2303void Document::ReleaseLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) {
2304 return cb.ReleaseLineCharacterIndex(lineCharacterIndex);
2305}
2306
2307Sci::Line Document::LinesTotal() const noexcept {
2308 return cb.Lines();
2309}
2310
2311void Document::AllocateLines(Sci::Line lines) {
2312 cb.AllocateLines(lines);
2313}
2314
2315void Document::SetDefaultCharClasses(bool includeWordClass) {
2316 charClass.SetDefaultCharClasses(includeWordClass);
2317}
2318
2319void Document::SetCharClasses(const unsigned char *chars, CharacterClass newCharClass) {
2320 charClass.SetCharClasses(chars, newCharClass);
2321}
2322
2323int Document::GetCharsOfClass(CharacterClass characterClass, unsigned char *buffer) const {
2324 return charClass.GetCharsOfClass(characterClass, buffer);
2325}
2326
2327void Document::SetCharacterCategoryOptimization(int countCharacters) {
2328 charMap.Optimize(countCharacters);
2329}
2330
2331int Document::CharacterCategoryOptimization() const noexcept {
2332 return charMap.Size();
2333}
2334
2335void SCI_METHOD Document::StartStyling(Sci_Position position) {
2336 endStyled = position;
2337}
2338
2339bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) {
2340 if (enteredStyling != 0) {
2341 return false;
2342 } else {
2343 enteredStyling++;
2344 const Sci::Position prevEndStyled = endStyled;
2345 if (cb.SetStyleFor(endStyled, length, style)) {
2346 const DocModification mh(ModificationFlags::ChangeStyle | ModificationFlags::User,
2347 prevEndStyled, length);
2348 NotifyModified(mh);
2349 }
2350 endStyled += length;
2351 enteredStyling--;
2352 return true;
2353 }
2354}
2355
2356bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) {
2357 if (enteredStyling != 0) {
2358 return false;
2359 } else {
2360 enteredStyling++;
2361 bool didChange = false;
2362 Sci::Position startMod = 0;
2363 Sci::Position endMod = 0;
2364 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
2365 PLATFORM_ASSERT(endStyled < Length());
2366 if (cb.SetStyleAt(endStyled, styles[iPos])) {
2367 if (!didChange) {
2368 startMod = endStyled;
2369 }
2370 didChange = true;
2371 endMod = endStyled;
2372 }
2373 }
2374 if (didChange) {
2375 const DocModification mh(ModificationFlags::ChangeStyle | ModificationFlags::User,
2376 startMod, endMod - startMod + 1);
2377 NotifyModified(mh);
2378 }
2379 enteredStyling--;
2380 return true;
2381 }
2382}
2383
2384void Document::EnsureStyledTo(Sci::Position pos) {
2385 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
2386 IncrementStyleClock();
2387 if (pli && !pli->UseContainerLexing()) {
2388 const Sci::Line lineEndStyled = SciLineFromPosition(GetEndStyled());
2389 const Sci::Position endStyledTo = LineStart(lineEndStyled);
2390 pli->Colourise(endStyledTo, pos);
2391 } else {
2392 // Ask the watchers to style, and stop as soon as one responds.
2393 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
2394 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
2395 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
2396 }
2397 }
2398 }
2399}
2400
2401void Document::StyleToAdjustingLineDuration(Sci::Position pos) {
2402 const Sci::Position stylingStart = GetEndStyled();
2403 ElapsedPeriod epStyling;
2404 EnsureStyledTo(pos);
2405 durationStyleOneByte.AddSample(pos - stylingStart, epStyling.Duration());
2406}
2407
2408void Document::LexerChanged() {
2409 // Tell the watchers the lexer has changed.
2410 for (const WatcherWithUserData &watcher : watchers) {
2411 watcher.watcher->NotifyLexerChanged(this, watcher.userData);
2412 }
2413}
2414
2415LexInterface *Document::GetLexInterface() const noexcept {
2416 return pli.get();
2417}
2418
2419void Document::SetLexInterface(std::unique_ptr<LexInterface> pLexInterface) noexcept {
2420 pli = std::move(pLexInterface);
2421}
2422
2423int SCI_METHOD Document::SetLineState(Sci_Position line, int state) {
2424 const int statePrevious = States()->SetLineState(line, state);
2425 if (state != statePrevious) {
2426 const DocModification mh(ModificationFlags::ChangeLineState, LineStart(line), 0, 0, nullptr,
2427 static_cast<Sci::Line>(line));
2428 NotifyModified(mh);
2429 }
2430 return statePrevious;
2431}
2432
2433int SCI_METHOD Document::GetLineState(Sci_Position line) const {
2434 return States()->GetLineState(line);
2435}
2436
2437Sci::Line Document::GetMaxLineState() const noexcept {
2438 return States()->GetMaxLineState();
2439}
2440
2441void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) {
2442 const DocModification mh(ModificationFlags::LexerState, start,
2443 end-start, 0, nullptr, 0);
2444 NotifyModified(mh);
2445}
2446
2447StyledText Document::MarginStyledText(Sci::Line line) const noexcept {
2448 const LineAnnotation *pla = Margins();
2449 return StyledText(pla->Length(line), pla->Text(line),
2450 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2451}
2452
2453void Document::MarginSetText(Sci::Line line, const char *text) {
2454 Margins()->SetText(line, text);
2455 const DocModification mh(ModificationFlags::ChangeMargin, LineStart(line),
2456 0, 0, nullptr, line);
2457 NotifyModified(mh);
2458}
2459
2460void Document::MarginSetStyle(Sci::Line line, int style) {
2461 Margins()->SetStyle(line, style);
2462 NotifyModified(DocModification(ModificationFlags::ChangeMargin, LineStart(line),
2463 0, 0, nullptr, line));
2464}
2465
2466void Document::MarginSetStyles(Sci::Line line, const unsigned char *styles) {
2467 Margins()->SetStyles(line, styles);
2468 NotifyModified(DocModification(ModificationFlags::ChangeMargin, LineStart(line),
2469 0, 0, nullptr, line));
2470}
2471
2472void Document::MarginClearAll() {
2473 const Sci::Line maxEditorLine = LinesTotal();
2474 for (Sci::Line l=0; l<maxEditorLine; l++)
2475 MarginSetText(l, nullptr);
2476 // Free remaining data
2477 Margins()->ClearAll();
2478}
2479
2480StyledText Document::AnnotationStyledText(Sci::Line line) const noexcept {
2481 const LineAnnotation *pla = Annotations();
2482 return StyledText(pla->Length(line), pla->Text(line),
2483 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2484}
2485
2486void Document::AnnotationSetText(Sci::Line line, const char *text) {
2487 if (line >= 0 && line < LinesTotal()) {
2488 const Sci::Line linesBefore = AnnotationLines(line);
2489 Annotations()->SetText(line, text);
2490 const int linesAfter = AnnotationLines(line);
2491 DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line),
2492 0, 0, nullptr, line);
2493 mh.annotationLinesAdded = linesAfter - linesBefore;
2494 NotifyModified(mh);
2495 }
2496}
2497
2498void Document::AnnotationSetStyle(Sci::Line line, int style) {
2499 if (line >= 0 && line < LinesTotal()) {
2500 Annotations()->SetStyle(line, style);
2501 const DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line),
2502 0, 0, nullptr, line);
2503 NotifyModified(mh);
2504 }
2505}
2506
2507void Document::AnnotationSetStyles(Sci::Line line, const unsigned char *styles) {
2508 if (line >= 0 && line < LinesTotal()) {
2509 Annotations()->SetStyles(line, styles);
2510 }
2511}
2512
2513int Document::AnnotationLines(Sci::Line line) const noexcept {
2514 return Annotations()->Lines(line);
2515}
2516
2517void Document::AnnotationClearAll() {
2518 const Sci::Line maxEditorLine = LinesTotal();
2519 for (Sci::Line l=0; l<maxEditorLine; l++)
2520 AnnotationSetText(l, nullptr);
2521 // Free remaining data
2522 Annotations()->ClearAll();
2523}
2524
2525StyledText Document::EOLAnnotationStyledText(Sci::Line line) const noexcept {
2526 const LineAnnotation *pla = EOLAnnotations();
2527 return StyledText(pla->Length(line), pla->Text(line),
2528 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2529}
2530
2531void Document::EOLAnnotationSetText(Sci::Line line, const char *text) {
2532 if (line >= 0 && line < LinesTotal()) {
2533 EOLAnnotations()->SetText(line, text);
2534 const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line),
2535 0, 0, nullptr, line);
2536 NotifyModified(mh);
2537 }
2538}
2539
2540void Document::EOLAnnotationSetStyle(Sci::Line line, int style) {
2541 if (line >= 0 && line < LinesTotal()) {
2542 EOLAnnotations()->SetStyle(line, style);
2543 const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line),
2544 0, 0, nullptr, line);
2545 NotifyModified(mh);
2546 }
2547}
2548
2549void Document::EOLAnnotationClearAll() {
2550 const Sci::Line maxEditorLine = LinesTotal();
2551 for (Sci::Line l=0; l<maxEditorLine; l++)
2552 EOLAnnotationSetText(l, nullptr);
2553 // Free remaining data
2554 EOLAnnotations()->ClearAll();
2555}
2556
2557void Document::IncrementStyleClock() noexcept {
2558 styleClock = (styleClock + 1) % 0x100000;
2559}
2560
2561void SCI_METHOD Document::DecorationSetCurrentIndicator(int indicator) {
2562 decorations->SetCurrentIndicator(indicator);
2563}
2564
2565void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) {
2566 const FillResult<Sci::Position> fr = decorations->FillRange(
2567 position, value, fillLength);
2568 if (fr.changed) {
2569 const DocModification mh(ModificationFlags::ChangeIndicator | ModificationFlags::User,
2570 fr.position, fr.fillLength);
2571 NotifyModified(mh);
2572 }
2573}
2574
2575bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2576 const WatcherWithUserData wwud(watcher, userData);
2577 std::vector<WatcherWithUserData>::iterator it =
2578 std::find(watchers.begin(), watchers.end(), wwud);
2579 if (it != watchers.end())
2580 return false;
2581 watchers.push_back(wwud);
2582 return true;
2583}
2584
2585bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) noexcept {
2586 try {
2587 // This can never fail as WatcherWithUserData constructor and == are noexcept
2588 // but std::find is not noexcept.
2589 std::vector<WatcherWithUserData>::iterator it =
2590 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2591 if (it != watchers.end()) {
2592 watchers.erase(it);
2593 return true;
2594 }
2595 } catch (...) {
2596 // Ignore any exception
2597 }
2598 return false;
2599}
2600
2601void Document::NotifyModifyAttempt() {
2602 for (const WatcherWithUserData &watcher : watchers) {
2603 watcher.watcher->NotifyModifyAttempt(this, watcher.userData);
2604 }
2605}
2606
2607void Document::NotifySavePoint(bool atSavePoint) {
2608 for (const WatcherWithUserData &watcher : watchers) {
2609 watcher.watcher->NotifySavePoint(this, watcher.userData, atSavePoint);
2610 }
2611}
2612
2613void Document::NotifyModified(DocModification mh) {
2614 if (FlagSet(mh.modificationType, ModificationFlags::InsertText)) {
2615 decorations->InsertSpace(mh.position, mh.length);
2616 } else if (FlagSet(mh.modificationType, ModificationFlags::DeleteText)) {
2617 decorations->DeleteRange(mh.position, mh.length);
2618 }
2619 for (const WatcherWithUserData &watcher : watchers) {
2620 watcher.watcher->NotifyModified(this, mh, watcher.userData);
2621 }
2622}
2623
2624bool Document::IsWordPartSeparator(unsigned int ch) const {
2625 return (WordCharacterClass(ch) == CharacterClass::word) && IsPunctuation(ch);
2626}
2627
2628Sci::Position Document::WordPartLeft(Sci::Position pos) const {
2629 if (pos > 0) {
2630 pos -= CharacterBefore(pos).widthBytes;
2631 CharacterExtracted ceStart = CharacterAfter(pos);
2632 if (IsWordPartSeparator(ceStart.character)) {
2633 while (pos > 0 && IsWordPartSeparator(CharacterAfter(pos).character)) {
2634 pos -= CharacterBefore(pos).widthBytes;
2635 }
2636 }
2637 if (pos > 0) {
2638 ceStart = CharacterAfter(pos);
2639 pos -= CharacterBefore(pos).widthBytes;
2640 if (IsLowerCase(ceStart.character)) {
2641 while (pos > 0 && IsLowerCase(CharacterAfter(pos).character))
2642 pos -= CharacterBefore(pos).widthBytes;
2643 if (!IsUpperCase(CharacterAfter(pos).character) && !IsLowerCase(CharacterAfter(pos).character))
2644 pos += CharacterAfter(pos).widthBytes;
2645 } else if (IsUpperCase(ceStart.character)) {
2646 while (pos > 0 && IsUpperCase(CharacterAfter(pos).character))
2647 pos -= CharacterBefore(pos).widthBytes;
2648 if (!IsUpperCase(CharacterAfter(pos).character))
2649 pos += CharacterAfter(pos).widthBytes;
2650 } else if (IsADigit(ceStart.character)) {
2651 while (pos > 0 && IsADigit(CharacterAfter(pos).character))
2652 pos -= CharacterBefore(pos).widthBytes;
2653 if (!IsADigit(CharacterAfter(pos).character))
2654 pos += CharacterAfter(pos).widthBytes;
2655 } else if (IsPunctuation(ceStart.character)) {
2656 while (pos > 0 && IsPunctuation(CharacterAfter(pos).character))
2657 pos -= CharacterBefore(pos).widthBytes;
2658 if (!IsPunctuation(CharacterAfter(pos).character))
2659 pos += CharacterAfter(pos).widthBytes;
2660 } else if (IsASpace(ceStart.character)) {
2661 while (pos > 0 && IsASpace(CharacterAfter(pos).character))
2662 pos -= CharacterBefore(pos).widthBytes;
2663 if (!IsASpace(CharacterAfter(pos).character))
2664 pos += CharacterAfter(pos).widthBytes;
2665 } else if (!IsASCII(ceStart.character)) {
2666 while (pos > 0 && !IsASCII(CharacterAfter(pos).character))
2667 pos -= CharacterBefore(pos).widthBytes;
2668 if (IsASCII(CharacterAfter(pos).character))
2669 pos += CharacterAfter(pos).widthBytes;
2670 } else {
2671 pos += CharacterAfter(pos).widthBytes;
2672 }
2673 }
2674 }
2675 return pos;
2676}
2677
2678Sci::Position Document::WordPartRight(Sci::Position pos) const {
2679 CharacterExtracted ceStart = CharacterAfter(pos);
2680 const Sci::Position length = LengthNoExcept();
2681 if (IsWordPartSeparator(ceStart.character)) {
2682 while (pos < length && IsWordPartSeparator(CharacterAfter(pos).character))
2683 pos += CharacterAfter(pos).widthBytes;
2684 ceStart = CharacterAfter(pos);
2685 }
2686 if (!IsASCII(ceStart.character)) {
2687 while (pos < length && !IsASCII(CharacterAfter(pos).character))
2688 pos += CharacterAfter(pos).widthBytes;
2689 } else if (IsLowerCase(ceStart.character)) {
2690 while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2691 pos += CharacterAfter(pos).widthBytes;
2692 } else if (IsUpperCase(ceStart.character)) {
2693 if (IsLowerCase(CharacterAfter(pos + ceStart.widthBytes).character)) {
2694 pos += CharacterAfter(pos).widthBytes;
2695 while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2696 pos += CharacterAfter(pos).widthBytes;
2697 } else {
2698 while (pos < length && IsUpperCase(CharacterAfter(pos).character))
2699 pos += CharacterAfter(pos).widthBytes;
2700 }
2701 if (IsLowerCase(CharacterAfter(pos).character) && IsUpperCase(CharacterBefore(pos).character))
2702 pos -= CharacterBefore(pos).widthBytes;
2703 } else if (IsADigit(ceStart.character)) {
2704 while (pos < length && IsADigit(CharacterAfter(pos).character))
2705 pos += CharacterAfter(pos).widthBytes;
2706 } else if (IsPunctuation(ceStart.character)) {
2707 while (pos < length && IsPunctuation(CharacterAfter(pos).character))
2708 pos += CharacterAfter(pos).widthBytes;
2709 } else if (IsASpace(ceStart.character)) {
2710 while (pos < length && IsASpace(CharacterAfter(pos).character))
2711 pos += CharacterAfter(pos).widthBytes;
2712 } else {
2713 pos += CharacterAfter(pos).widthBytes;
2714 }
2715 return pos;
2716}
2717
2718Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) noexcept {
2719 const char sStart = cb.StyleAt(pos);
2720 if (delta < 0) {
2721 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos))))
2722 pos--;
2723 pos++;
2724 } else {
2725 while (pos < (LengthNoExcept()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos))))
2726 pos++;
2727 }
2728 return pos;
2729}
2730
2731static char BraceOpposite(char ch) noexcept {
2732 switch (ch) {
2733 case '(':
2734 return ')';
2735 case ')':
2736 return '(';
2737 case '[':
2738 return ']';
2739 case ']':
2740 return '[';
2741 case '{':
2742 return '}';
2743 case '}':
2744 return '{';
2745 case '<':
2746 return '>';
2747 case '>':
2748 return '<';
2749 default:
2750 return '\0';
2751 }
2752}
2753
2754// TODO: should be able to extend styled region to find matching brace
2755Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /*maxReStyle*/, Sci::Position startPos, bool useStartPos) noexcept {
2756 const char chBrace = CharAt(position);
2757 const char chSeek = BraceOpposite(chBrace);
2758 if (chSeek == '\0')
2759 return - 1;
2760 const int styBrace = StyleIndexAt(position);
2761 int direction = -1;
2762 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2763 direction = 1;
2764 int depth = 1;
2765 position = useStartPos ? startPos : NextPosition(position, direction);
2766 while ((position >= 0) && (position < LengthNoExcept())) {
2767 const char chAtPos = CharAt(position);
2768 const int styAtPos = StyleIndexAt(position);
2769 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2770 if (chAtPos == chBrace)
2771 depth++;
2772 if (chAtPos == chSeek)
2773 depth--;
2774 if (depth == 0)
2775 return position;
2776 }
2777 const Sci::Position positionBeforeMove = position;
2778 position = NextPosition(position, direction);
2779 if (position == positionBeforeMove)
2780 break;
2781 }
2782 return - 1;
2783}
2784
2785/**
2786 * Implementation of RegexSearchBase for the default built-in regular expression engine
2787 */
2788class BuiltinRegex : public RegexSearchBase {
2789public:
2790 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2791
2792 Sci::Position FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
2793 bool caseSensitive, bool word, bool wordStart, FindOption flags,
2794 Sci::Position *length) override;
2795
2796 const char *SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) override;
2797
2798private:
2799 RESearch search;
2800 std::string substituted;
2801};
2802
2803namespace {
2804
2805/**
2806* RESearchRange keeps track of search range.
2807*/
2808class RESearchRange {
2809public:
2810 const Document *doc;
2811 int increment;
2812 Sci::Position startPos;
2813 Sci::Position endPos;
2814 Sci::Line lineRangeStart;
2815 Sci::Line lineRangeEnd;
2816 Sci::Line lineRangeBreak;
2817 RESearchRange(const Document *doc_, Sci::Position minPos, Sci::Position maxPos) noexcept : doc(doc_) {
2818 increment = (minPos <= maxPos) ? 1 : -1;
2819
2820 // Range endpoints should not be inside DBCS characters or between a CR and LF,
2821 // but just in case, move them.
2822 startPos = doc->MovePositionOutsideChar(minPos, 1, true);
2823 endPos = doc->MovePositionOutsideChar(maxPos, 1, true);
2824
2825 lineRangeStart = doc->SciLineFromPosition(startPos);
2826 lineRangeEnd = doc->SciLineFromPosition(endPos);
2827 lineRangeBreak = lineRangeEnd + increment;
2828 }
2829 Range LineRange(Sci::Line line) const {
2830 Range range(doc->LineStart(line), doc->LineEnd(line));
2831 if (increment == 1) {
2832 if (line == lineRangeStart)
2833 range.start = startPos;
2834 if (line == lineRangeEnd)
2835 range.end = endPos;
2836 } else {
2837 if (line == lineRangeEnd)
2838 range.start = endPos;
2839 if (line == lineRangeStart)
2840 range.end = startPos;
2841 }
2842 return range;
2843 }
2844};
2845
2846// Define a way for the Regular Expression code to access the document
2847class DocumentIndexer : public CharacterIndexer {
2848 Document *pdoc;
2849 Sci::Position end;
2850public:
2851 DocumentIndexer(Document *pdoc_, Sci::Position end_) noexcept :
2852 pdoc(pdoc_), end(end_) {
2853 }
2854
2855 DocumentIndexer(const DocumentIndexer &) = delete;
2856 DocumentIndexer(DocumentIndexer &&) = delete;
2857 DocumentIndexer &operator=(const DocumentIndexer &) = delete;
2858 DocumentIndexer &operator=(DocumentIndexer &&) = delete;
2859
2860 ~DocumentIndexer() override = default;
2861
2862 char CharAt(Sci::Position index) const noexcept override {
2863 if (index < 0 || index >= end)
2864 return 0;
2865 else
2866 return pdoc->CharAt(index);
2867 }
2868};
2869
2870#ifndef NO_CXX11_REGEX
2871
2872class ByteIterator {
2873public:
2874 using iterator_category = std::bidirectional_iterator_tag;
2875 using value_type = char;
2876 using difference_type = ptrdiff_t;
2877 using pointer = char*;
2878 using reference = char&;
2879
2880 const Document *doc;
2881 Sci::Position position;
2882
2883 explicit ByteIterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2884 doc(doc_), position(position_) {
2885 }
2886 char operator*() const noexcept {
2887 return doc->CharAt(position);
2888 }
2889 ByteIterator &operator++() noexcept {
2890 position++;
2891 return *this;
2892 }
2893 ByteIterator operator++(int) noexcept {
2894 ByteIterator retVal(*this);
2895 position++;
2896 return retVal;
2897 }
2898 ByteIterator &operator--() noexcept {
2899 position--;
2900 return *this;
2901 }
2902 bool operator==(const ByteIterator &other) const noexcept {
2903 return doc == other.doc && position == other.position;
2904 }
2905 bool operator!=(const ByteIterator &other) const noexcept {
2906 return doc != other.doc || position != other.position;
2907 }
2908 Sci::Position Pos() const noexcept {
2909 return position;
2910 }
2911 Sci::Position PosRoundUp() const noexcept {
2912 return position;
2913 }
2914};
2915
2916// On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2917// Would be better to use sizeof(wchar_t) or similar to differentiate
2918// but easier for now to hard-code platforms.
2919// C++11 has char16_t and char32_t but neither Clang nor Visual C++
2920// appear to allow specializing basic_regex over these.
2921
2922#ifdef _WIN32
2923#define WCHAR_T_IS_16 1
2924#else
2925#define WCHAR_T_IS_16 0
2926#endif
2927
2928#if WCHAR_T_IS_16
2929
2930// On Windows, report non-BMP characters as 2 separate surrogates as that
2931// matches wregex since it is based on wchar_t.
2932class UTF8Iterator {
2933 // These 3 fields determine the iterator position and are used for comparisons
2934 const Document *doc;
2935 Sci::Position position;
2936 size_t characterIndex;
2937 // Remaining fields are derived from the determining fields so are excluded in comparisons
2938 unsigned int lenBytes;
2939 size_t lenCharacters;
2940 wchar_t buffered[2];
2941public:
2942 using iterator_category = std::bidirectional_iterator_tag;
2943 using value_type = wchar_t;
2944 using difference_type = ptrdiff_t;
2945 using pointer = wchar_t*;
2946 using reference = wchar_t&;
2947
2948 explicit UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2949 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0), buffered{} {
2950 buffered[0] = 0;
2951 buffered[1] = 0;
2952 if (doc) {
2953 ReadCharacter();
2954 }
2955 }
2956 wchar_t operator*() const noexcept {
2957 assert(lenCharacters != 0);
2958 return buffered[characterIndex];
2959 }
2960 UTF8Iterator &operator++() noexcept {
2961 if ((characterIndex + 1) < (lenCharacters)) {
2962 characterIndex++;
2963 } else {
2964 position += lenBytes;
2965 ReadCharacter();
2966 characterIndex = 0;
2967 }
2968 return *this;
2969 }
2970 UTF8Iterator operator++(int) noexcept {
2971 UTF8Iterator retVal(*this);
2972 if ((characterIndex + 1) < (lenCharacters)) {
2973 characterIndex++;
2974 } else {
2975 position += lenBytes;
2976 ReadCharacter();
2977 characterIndex = 0;
2978 }
2979 return retVal;
2980 }
2981 UTF8Iterator &operator--() noexcept {
2982 if (characterIndex) {
2983 characterIndex--;
2984 } else {
2985 position = doc->NextPosition(position, -1);
2986 ReadCharacter();
2987 characterIndex = lenCharacters - 1;
2988 }
2989 return *this;
2990 }
2991 bool operator==(const UTF8Iterator &other) const noexcept {
2992 // Only test the determining fields, not the character widths and values derived from this
2993 return doc == other.doc &&
2994 position == other.position &&
2995 characterIndex == other.characterIndex;
2996 }
2997 bool operator!=(const UTF8Iterator &other) const noexcept {
2998 // Only test the determining fields, not the character widths and values derived from this
2999 return doc != other.doc ||
3000 position != other.position ||
3001 characterIndex != other.characterIndex;
3002 }
3003 Sci::Position Pos() const noexcept {
3004 return position;
3005 }
3006 Sci::Position PosRoundUp() const noexcept {
3007 if (characterIndex)
3008 return position + lenBytes; // Force to end of character
3009 else
3010 return position;
3011 }
3012private:
3013 void ReadCharacter() noexcept {
3014 const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
3015 lenBytes = charExtracted.widthBytes;
3016 if (charExtracted.character == unicodeReplacementChar) {
3017 lenCharacters = 1;
3018 buffered[0] = static_cast<wchar_t>(charExtracted.character);
3019 } else {
3020 lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
3021 }
3022 }
3023};
3024
3025#else
3026
3027// On Unix, report non-BMP characters as single characters
3028
3029class UTF8Iterator {
3030 const Document *doc;
3031 Sci::Position position;
3032public:
3033 using iterator_category = std::bidirectional_iterator_tag;
3034 using value_type = wchar_t;
3035 using difference_type = ptrdiff_t;
3036 using pointer = wchar_t*;
3037 using reference = wchar_t&;
3038
3039 explicit UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
3040 doc(doc_), position(position_) {
3041 }
3042 wchar_t operator*() const noexcept {
3043 const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
3044 return charExtracted.character;
3045 }
3046 UTF8Iterator &operator++() noexcept {
3047 position = doc->NextPosition(position, 1);
3048 return *this;
3049 }
3050 UTF8Iterator operator++(int) noexcept {
3051 UTF8Iterator retVal(*this);
3052 position = doc->NextPosition(position, 1);
3053 return retVal;
3054 }
3055 UTF8Iterator &operator--() noexcept {
3056 position = doc->NextPosition(position, -1);
3057 return *this;
3058 }
3059 bool operator==(const UTF8Iterator &other) const noexcept {
3060 return doc == other.doc && position == other.position;
3061 }
3062 bool operator!=(const UTF8Iterator &other) const noexcept {
3063 return doc != other.doc || position != other.position;
3064 }
3065 Sci::Position Pos() const noexcept {
3066 return position;
3067 }
3068 Sci::Position PosRoundUp() const noexcept {
3069 return position;
3070 }
3071};
3072
3073#endif
3074
3075std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos) {
3076 std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
3077 if (!doc->IsLineStartPosition(startPos))
3078 flagsMatch |= std::regex_constants::match_not_bol;
3079 if (!doc->IsLineEndPosition(endPos))
3080 flagsMatch |= std::regex_constants::match_not_eol;
3081 return flagsMatch;
3082}
3083
3084template<typename Iterator, typename Regex>
3085bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
3086 std::match_results<Iterator> match;
3087
3088 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range.
3089 // CRLF line ends are also a problem as ^ and $ only treat LF as a line end.
3090 // The std::regex::multiline option was added to C++17 to improve behaviour but
3091 // has not been implemented by compiler runtimes with MSVC always in multiline
3092 // mode and libc++ and libstdc++ always in single-line mode.
3093 // If multiline regex worked well then the line by line iteration could be removed
3094 // for the forwards case and replaced with the following 4 lines:
3095#ifdef REGEX_MULTILINE
3096 Iterator itStart(doc, resr.startPos);
3097 Iterator itEnd(doc, resr.endPos);
3098 const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos);
3099 const bool matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3100#else
3101 // Line by line.
3102 bool matched = false;
3103 for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3104 const Range lineRange = resr.LineRange(line);
3105 Iterator itStart(doc, lineRange.start);
3106 Iterator itEnd(doc, lineRange.end);
3107 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
3108 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3109 // Check for the last match on this line.
3110 if (matched) {
3111 if (resr.increment == -1) {
3112 while (matched) {
3113 Iterator itNext(doc, match[0].second.PosRoundUp());
3114 flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
3115 std::match_results<Iterator> matchNext;
3116 matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
3117 if (matched) {
3118 if (match[0].first == match[0].second) {
3119 // Empty match means failure so exit
3120 return false;
3121 }
3122 match = matchNext;
3123 }
3124 }
3125 matched = true;
3126 }
3127 break;
3128 }
3129 }
3130#endif
3131 if (matched) {
3132 for (size_t co = 0; co < match.size() && co < RESearch::MAXTAG; co++) {
3133 search.bopat[co] = match[co].first.Pos();
3134 search.eopat[co] = match[co].second.PosRoundUp();
3135 const Sci::Position lenMatch = search.eopat[co] - search.bopat[co];
3136 search.pat[co].resize(lenMatch);
3137 for (Sci::Position iPos = 0; iPos < lenMatch; iPos++) {
3138 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
3139 }
3140 }
3141 }
3142 return matched;
3143}
3144
3145Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
3146 bool caseSensitive, Sci::Position *length, RESearch &search) {
3147 const RESearchRange resr(doc, minPos, maxPos);
3148 try {
3149 //ElapsedPeriod ep;
3150 std::regex::flag_type flagsRe = std::regex::ECMAScript;
3151 // Flags that appear to have no effect:
3152 // | std::regex::collate | std::regex::extended;
3153 if (!caseSensitive)
3154 flagsRe = flagsRe | std::regex::icase;
3155
3156 // Clear the RESearch so can fill in matches
3157 search.Clear();
3158
3159 bool matched = false;
3160 if (CpUtf8 == doc->dbcsCodePage) {
3161 const std::wstring ws = WStringFromUTF8(s);
3162 std::wregex regexp;
3163 regexp.assign(ws, flagsRe);
3164 matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
3165
3166 } else {
3167 std::regex regexp;
3168 regexp.assign(s, flagsRe);
3169 matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
3170 }
3171
3172 Sci::Position posMatch = -1;
3173 if (matched) {
3174 posMatch = search.bopat[0];
3175 *length = search.eopat[0] - search.bopat[0];
3176 }
3177 // Example - search in doc/ScintillaHistory.html for
3178 // [[:upper:]]eta[[:space:]]
3179 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
3180 //const double durSearch = ep.Duration(true);
3181 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
3182 return posMatch;
3183 } catch (std::regex_error &) {
3184 // Failed to create regular expression
3185 throw RegexError();
3186 } catch (...) {
3187 // Failed in some other way
3188 return -1;
3189 }
3190}
3191
3192#endif
3193
3194}
3195
3196Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
3197 bool caseSensitive, bool, bool, FindOption flags,
3198 Sci::Position *length) {
3199
3200#ifndef NO_CXX11_REGEX
3201 if (FlagSet(flags, FindOption::Cxx11RegEx)) {
3202 return Cxx11RegexFindText(doc, minPos, maxPos, s,
3203 caseSensitive, length, search);
3204 }
3205#endif
3206
3207 const RESearchRange resr(doc, minPos, maxPos);
3208
3209 const bool posix = FlagSet(flags, FindOption::Posix);
3210
3211 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
3212 if (errmsg) {
3213 return -1;
3214 }
3215 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
3216 // Replace first '.' with '-' in each property file variable reference:
3217 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
3218 // Replace: $(\1-\2)
3219 Sci::Position pos = -1;
3220 Sci::Position lenRet = 0;
3221 const bool searchforLineStart = s[0] == '^';
3222 const char searchEnd = s[*length - 1];
3223 const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
3224 const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\');
3225 for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3226 Sci::Position startOfLine = doc->LineStart(line);
3227 Sci::Position endOfLine = doc->LineEnd(line);
3228 if (resr.increment == 1) {
3229 if (line == resr.lineRangeStart) {
3230 if ((resr.startPos != startOfLine) && searchforLineStart)
3231 continue; // Can't match start of line if start position after start of line
3232 startOfLine = resr.startPos;
3233 }
3234 if (line == resr.lineRangeEnd) {
3235 if ((resr.endPos != endOfLine) && searchforLineEnd)
3236 continue; // Can't match end of line if end position before end of line
3237 endOfLine = resr.endPos;
3238 }
3239 } else {
3240 if (line == resr.lineRangeEnd) {
3241 if ((resr.endPos != startOfLine) && searchforLineStart)
3242 continue; // Can't match start of line if end position after start of line
3243 startOfLine = resr.endPos;
3244 }
3245 if (line == resr.lineRangeStart) {
3246 if ((resr.startPos != endOfLine) && searchforLineEnd)
3247 continue; // Can't match end of line if start position before end of line
3248 endOfLine = resr.startPos;
3249 }
3250 }
3251
3252 const DocumentIndexer di(doc, endOfLine);
3253 int success = search.Execute(di, startOfLine, endOfLine);
3254 if (success) {
3255 pos = search.bopat[0];
3256 // Ensure only whole characters selected
3257 search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
3258 lenRet = search.eopat[0] - search.bopat[0];
3259 // There can be only one start of a line, so no need to look for last match in line
3260 if ((resr.increment == -1) && !searchforLineStart) {
3261 // Check for the last match on this line.
3262 int repetitions = 1000; // Break out of infinite loop
3263 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
3264 success = search.Execute(di, pos+1, endOfLine);
3265 if (success) {
3266 if (search.eopat[0] <= minPos) {
3267 pos = search.bopat[0];
3268 lenRet = search.eopat[0] - search.bopat[0];
3269 } else {
3270 success = 0;
3271 }
3272 }
3273 }
3274 }
3275 break;
3276 }
3277 }
3278 *length = lenRet;
3279 return pos;
3280}
3281
3282const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) {
3283 substituted.clear();
3284 const DocumentIndexer di(doc, doc->Length());
3285 search.GrabMatches(di);
3286 for (Sci::Position j = 0; j < *length; j++) {
3287 if (text[j] == '\\') {
3288 if (text[j + 1] >= '0' && text[j + 1] <= '9') {
3289 const unsigned int patNum = text[j + 1] - '0';
3290 const Sci::Position len = search.eopat[patNum] - search.bopat[patNum];
3291 if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
3292 substituted.append(search.pat[patNum].c_str(), len);
3293 j++;
3294 } else {
3295 j++;
3296 switch (text[j]) {
3297 case 'a':
3298 substituted.push_back('\a');
3299 break;
3300 case 'b':
3301 substituted.push_back('\b');
3302 break;
3303 case 'f':
3304 substituted.push_back('\f');
3305 break;
3306 case 'n':
3307 substituted.push_back('\n');
3308 break;
3309 case 'r':
3310 substituted.push_back('\r');
3311 break;
3312 case 't':
3313 substituted.push_back('\t');
3314 break;
3315 case 'v':
3316 substituted.push_back('\v');
3317 break;
3318 case '\\':
3319 substituted.push_back('\\');
3320 break;
3321 default:
3322 substituted.push_back('\\');
3323 j--;
3324 }
3325 }
3326 } else {
3327 substituted.push_back(text[j]);
3328 }
3329 }
3330 *length = substituted.length();
3331 return substituted.c_str();
3332}
3333
3334#ifndef SCI_OWNREGEX
3335
3336RegexSearchBase *Scintilla::Internal::CreateRegexSearch(CharClassify *charClassTable) {
3337 return new BuiltinRegex(charClassTable);
3338}
3339
3340#endif
3341