Document.cxx source code [DeepinIDE/3rdparty/unioncode-scintilla515/scintilla/src/Document.cxx]

1	// Scintilla source code edit control
2	/* @file Document.cxx*
3	** Text document that handles notifications, DBCS, styling, words and end of line.
4	**/
5	// Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6	// The License.txt file describes the conditions under which this software may be distributed.
7
8	#include <cstddef>
9	#include <cstdlib>
10	#include <cassert>
11	#include <cstring>
12	#include <cstdio>
13	#include <cmath>
14
15	#include <stdexcept>
16	#include <string>
17	#include <string_view>
18	#include <vector>
19	#include <forward_list>
20	#include <optional>
21	#include <algorithm>
22	#include <memory>
23	#include <chrono>
24
25	#ifndef NO_CXX11_REGEX
26	#include <regex>
27	#endif
28
29	#include "ScintillaTypes.h"
30	#include "ILoader.h"
31	#include "ILexer.h"
32
33	#include "Debugging.h"
34
35	#include "CharacterType.h"
36	#include "CharacterCategoryMap.h"
37	#include "Position.h"
38	#include "SplitVector.h"
39	#include "Partitioning.h"
40	#include "RunStyles.h"
41	#include "CellBuffer.h"
42	#include "PerLine.h"
43	#include "CharClassify.h"
44	#include "Decoration.h"
45	#include "CaseFolder.h"
46	#include "Document.h"
47	#include "RESearch.h"
48	#include "UniConversion.h"
49	#include "ElapsedPeriod.h"
50
51	using namespace Scintilla;
52	using namespace Scintilla::Internal;
53
54	LexInterface::LexInterface(Document pdoc_) noexcept* : pdoc(pdoc_), performingStyle(false) {
55	}
56
57	LexInterface::~LexInterface() noexcept = default;
58
59	void LexInterface::SetInstance(ILexer5 *instance_) {
60	instance.reset(instance_);
61	pdoc->LexerChanged();
62	}
63
64	void LexInterface::Colourise(Sci::Position start, Sci::Position end) {
65	if (pdoc && instance && !performingStyle) {
66	// Protect against reentrance, which may occur, for example, when
67	// fold points are discovered while performing styling and the folding
68	// code looks for child lines which may trigger styling.
69	performingStyle = true;
70
71	const Sci::Position lengthDoc = pdoc->Length();
72	if (end == -`1`)
73	end = lengthDoc;
74	const Sci::Position len = end - start;
75
76	PLATFORM_ASSERT(len >= `0`);
77	PLATFORM_ASSERT(start + len <= lengthDoc);
78
79	int styleStart = `0`;
80	if (start > `0`)
81	styleStart = pdoc->StyleAt(start - `1`);
82
83	if (len > `0`) {
84	instance ->Lex(start, len, styleStart, pdoc);
85	instance ->Fold(start, len, styleStart, pdoc);
86	}
87
88	performingStyle = false;
89	}
90	}
91
92	LineEndType LexInterface::LineEndTypesSupported() {
93	if (instance) {
94	return static_cast<LineEndType>(instance ->LineEndTypesSupported());
95	}
96	return LineEndType::Default;
97	}
98
99	bool LexInterface::UseContainerLexing() const noexcept {
100	return !instance;
101	}
102
103	ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept :
104	duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) {
105	}
106
107	void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept {
108	// Only adjust for multiple actions to avoid instability
109	if (numberActions < `8`)
110	return;
111
112	// Alpha value for exponential smoothing.
113	// Most recent value contributes 25% to smoothed value.
114	constexpr double alpha = `0.25`;
115
116	const double durationOne = durationOfActions / numberActions;
117	duration = std::clamp(alpha * durationOne + (`1.0` - alpha) * duration,
118	minDuration, maxDuration);
119	}
120
121	double ActionDuration::Duration() const noexcept {
122	return duration;
123	}
124
125	size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcept {
126	return std::lround(secondsAllowed / Duration());
127	}
128
129	Document::Document(DocumentOption options) :
130	cb (!FlagSet(options, DocumentOption::StylesNone), FlagSet(options, DocumentOption::TextLarge)),
131	durationStyleOneByte (`0.000001`, `0.0000001`, `0.00001`) {
132	refCount = `0`;
133	#ifdef _WIN32
134	eolMode = EndOfLine::CrLf;
135	#else
136	eolMode = EndOfLine::Lf;
137	#endif
138	dbcsCodePage = CpUtf8;
139	lineEndBitSet = LineEndType::Default;
140	endStyled = `0`;
141	styleClock = `0`;
142	enteredModification = `0`;
143	enteredStyling = `0`;
144	enteredReadOnlyCount = `0`;
145	insertionSet = false;
146	tabInChars = `8`;
147	indentInChars = `0`;
148	actualIndentInChars = `8`;
149	useTabs = true;
150	tabIndents = true;
151	backspaceUnindents = false;
152
153	matchesValid = false;
154
155	perLineData[ldMarkers] = std::make_unique<LineMarkers>();
156	perLineData[ldLevels] = std::make_unique<LineLevels>();
157	perLineData[ldState] = std::make_unique<LineState>();
158	perLineData[ldMargin] = std::make_unique<LineAnnotation>();
159	perLineData[ldAnnotation] = std::make_unique<LineAnnotation>();
160	perLineData[ldEOLAnnotation] = std::make_unique<LineAnnotation>();
161
162	decorations = DecorationListCreate(IsLarge());
163
164	cb.SetPerLine(this);
165	cb.SetUTF8Substance(CpUtf8 == dbcsCodePage);
166	}
167
168	Document::~Document() {
169	for (const WatcherWithUserData &watcher : watchers) {
170	watcher.watcher->NotifyDeleted(this, watcher.userData);
171	}
172	}
173
174	// Increase reference count and return its previous value.
175	int Document::AddRef() {
176	return refCount++;
177	}
178
179	// Decrease reference count and return its previous value.
180	// Delete the document if reference count reaches zero.
181	int SCI_METHOD Document::Release() {
182	const int curRefCount = --refCount;
183	if (curRefCount == `0`)
184	delete this;
185	return curRefCount;
186	}
187
188	void Document::Init() {
189	for (const std::unique_ptr<PerLine> &pl : perLineData) {
190	if (pl)
191	pl ->Init();
192	}
193	}
194
195	void Document::InsertLine(Sci::Line line) {
196	for (const std::unique_ptr<PerLine> &pl : perLineData) {
197	if (pl)
198	pl ->InsertLine(line);
199	}
200	}
201
202	void Document::InsertLines(Sci::Line line, Sci::Line lines) {
203	for (const auto &pl : perLineData) {
204	if (pl)
205	pl ->InsertLines(line, lines);
206	}
207	}
208
209	void Document::RemoveLine(Sci::Line line) {
210	for (const std::unique_ptr<PerLine> &pl : perLineData) {
211	if (pl)
212	pl ->RemoveLine(line);
213	}
214	}
215
216	LineMarkers Document::Markers() const* noexcept {
217	return dynamic_cast<LineMarkers *>(perLineData[ldMarkers].get());
218	}
219
220	LineLevels Document::Levels() const* noexcept {
221	return dynamic_cast<LineLevels *>(perLineData[ldLevels].get());
222	}
223
224	LineState Document::States() const* noexcept {
225	return dynamic_cast<LineState *>(perLineData[ldState].get());
226	}
227
228	LineAnnotation Document::Margins() const* noexcept {
229	return dynamic_cast<LineAnnotation *>(perLineData[ldMargin].get());
230	}
231
232	LineAnnotation Document::Annotations() const* noexcept {
233	return dynamic_cast<LineAnnotation *>(perLineData[ldAnnotation].get());
234	}
235
236	LineAnnotation Document::EOLAnnotations() const* noexcept {
237	return dynamic_cast<LineAnnotation *>(perLineData[ldEOLAnnotation].get());
238	}
239
240	LineEndType Document::LineEndTypesSupported() const {
241	if ((CpUtf8 == dbcsCodePage) && pli)
242	return pli ->LineEndTypesSupported();
243	else
244	return LineEndType::Default;
245	}
246
247	bool Document::SetDBCSCodePage(int dbcsCodePage_) {
248	if (dbcsCodePage != dbcsCodePage_) {
249	dbcsCodePage = dbcsCodePage_;
250	SetCaseFolder(nullptr);
251	cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
252	cb.SetUTF8Substance(CpUtf8 == dbcsCodePage);
253	ModifiedAt(`0`); // Need to restyle whole document
254	return true;
255	} else {
256	return false;
257	}
258	}
259
260	bool Document::SetLineEndTypesAllowed(LineEndType lineEndBitSet_) {
261	if (lineEndBitSet != lineEndBitSet_) {
262	lineEndBitSet = lineEndBitSet_;
263	const LineEndType lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
264	if (lineEndBitSetActive != cb.GetLineEndTypes()) {
265	ModifiedAt(`0`);
266	cb.SetLineEndTypes(lineEndBitSetActive);
267	return true;
268	} else {
269	return false;
270	}
271	} else {
272	return false;
273	}
274	}
275
276	void Document::SetSavePoint() {
277	cb.SetSavePoint();
278	NotifySavePoint(true);
279	}
280
281	void Document::TentativeUndo() {
282	if (!TentativeActive())
283	return;
284	CheckReadOnly();
285	if (enteredModification == `0`) {
286	enteredModification++;
287	if (!cb.IsReadOnly()) {
288	const bool startSavePoint = cb.IsSavePoint();
289	bool multiLine = false;
290	const int steps = cb.TentativeSteps();
291	//Platform::DebugPrintf("Steps=%d\n", steps);
292	for (int step = `0`; step < steps; step++) {
293	const Sci::Line prevLinesTotal = LinesTotal();
294	const Action &action = cb.GetUndoStep();
295	if (action.at == ActionType::remove) {
296	NotifyModified(DocModification (
297	ModificationFlags::BeforeInsert \| ModificationFlags::Undo, action));
298	} else if (action.at == ActionType::container) {
299	DocModification dm(ModificationFlags::Container \| ModificationFlags::Undo);
300	dm.token = action.position;
301	NotifyModified(dm);
302	} else {
303	NotifyModified(DocModification (
304	ModificationFlags::BeforeDelete \| ModificationFlags::Undo, action));
305	}
306	cb.PerformUndoStep();
307	if (action.at != ActionType::container) {
308	ModifiedAt(action.position);
309	}
310
311	ModificationFlags modFlags = ModificationFlags::Undo;
312	// With undo, an insertion action becomes a deletion notification
313	if (action.at == ActionType::remove) {
314	modFlags \|= ModificationFlags::InsertText;
315	} else if (action.at == ActionType::insert) {
316	modFlags \|= ModificationFlags::DeleteText;
317	}
318	if (steps > `1`)
319	modFlags \|= ModificationFlags::MultiStepUndoRedo;
320	const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
321	if (linesAdded != `0`)
322	multiLine = true;
323	if (step == steps - `1`) {
324	modFlags \|= ModificationFlags::LastStepInUndoRedo;
325	if (multiLine)
326	modFlags \|= ModificationFlags::MultilineUndoRedo;
327	}
328	NotifyModified(DocModification (modFlags, action.position, action.lenData,
329	linesAdded, action.data.get()));
330	}
331
332	const bool endSavePoint = cb.IsSavePoint();
333	if (startSavePoint != endSavePoint)
334	NotifySavePoint(endSavePoint);
335
336	cb.TentativeCommit();
337	}
338	enteredModification--;
339	}
340	}
341
342	int Document::GetMark(Sci::Line line) const noexcept {
343	return Markers()->MarkValue(line);
344	}
345
346	Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept {
347	return Markers()->MarkerNext(lineStart, mask);
348	}
349
350	int Document::AddMark(Sci::Line line, int markerNum) {
351	if (line >= `0` && line <= LinesTotal()) {
352	const int prev = Markers()->AddMark(line, markerNum, LinesTotal());
353	const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), `0`, `0`, nullptr, line);
354	NotifyModified(mh);
355	return prev;
356	} else {
357	return -`1`;
358	}
359	}
360
361	void Document::AddMarkSet(Sci::Line line, int valueSet) {
362	if (line < `0` \|\| line > LinesTotal()) {
363	return;
364	}
365	unsigned int m = valueSet;
366	for (int i = `0`; m; i++, m >>= `1`) {
367	if (m & `1`)
368	Markers()->AddMark(line, i, LinesTotal());
369	}
370	const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), `0`, `0`, nullptr, line);
371	NotifyModified(mh);
372	}
373
374	void Document::DeleteMark(Sci::Line line, int markerNum) {
375	Markers()->DeleteMark(line, markerNum, false);
376	const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), `0`, `0`, nullptr, line);
377	NotifyModified(mh);
378	}
379
380	void Document::DeleteMarkFromHandle(int markerHandle) {
381	Markers()->DeleteMarkFromHandle(markerHandle);
382	DocModification mh(ModificationFlags::ChangeMarker);
383	mh.line = -`1`;
384	NotifyModified(mh);
385	}
386
387	void Document::DeleteAllMarks(int markerNum) {
388	bool someChanges = false;
389	for (Sci::Line line = `0`; line < LinesTotal(); line++) {
390	if (Markers()->DeleteMark(line, markerNum, true))
391	someChanges = true;
392	}
393	if (someChanges) {
394	DocModification mh(ModificationFlags::ChangeMarker);
395	mh.line = -`1`;
396	NotifyModified(mh);
397	}
398	}
399
400	Sci::Line Document::LineFromHandle(int markerHandle) const noexcept {
401	return Markers()->LineFromHandle(markerHandle);
402	}
403
404	int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept {
405	return Markers()->NumberFromLine(line, which);
406	}
407
408	int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept {
409	return Markers()->HandleFromLine(line, which);
410	}
411
412	Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const {
413	return cb.LineStart(line);
414	}
415
416	bool Document::IsLineStartPosition(Sci::Position position) const {
417	return LineStart(LineFromPosition(position)) == position;
418	}
419
420	Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const {
421	if (line >= LinesTotal() - `1`) {
422	return LineStart(line + `1`);
423	} else {
424	Sci::Position position = LineStart(line + `1`);
425	if (LineEndType::Unicode == cb.GetLineEndTypes()) {
426	const unsigned char bytes[] = {
427	cb.UCharAt(position-`3`),
428	cb.UCharAt(position-`2`),
429	cb.UCharAt(position-`1`),
430	};
431	if (UTF8IsSeparator(bytes)) {
432	return position - UTF8SeparatorLength;
433	}
434	if (UTF8IsNEL(bytes+`1`)) {
435	return position - UTF8NELLength;
436	}
437	}
438	position--; // Back over CR or LF
439	// When line terminator is CR+LF, may need to go back one more
440	if ((position > LineStart(line)) && (cb.CharAt(position - `1`) == `'\r'`)) {
441	position--;
442	}
443	return position;
444	}
445	}
446
447	void SCI_METHOD Document::SetErrorStatus(int status) {
448	// Tell the watchers an error has occurred.
449	for (const WatcherWithUserData &watcher : watchers) {
450	watcher.watcher->NotifyErrorOccurred(this, watcher.userData, static_cast<Status>(status));
451	}
452	}
453
454	Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const {
455	return cb.LineFromPosition(pos);
456	}
457
458	Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept {
459	// Avoids casting in callers for this very common function
460	return cb.LineFromPosition(pos);
461	}
462
463	Sci::Position Document::LineEndPosition(Sci::Position position) const {
464	return LineEnd(LineFromPosition(position));
465	}
466
467	bool Document::IsLineEndPosition(Sci::Position position) const {
468	return LineEnd(LineFromPosition(position)) == position;
469	}
470
471	bool Document::IsPositionInLineEnd(Sci::Position position) const {
472	return position >= LineEnd(LineFromPosition(position));
473	}
474
475	Sci::Position Document::VCHomePosition(Sci::Position position) const {
476	const Sci::Line line = SciLineFromPosition(position);
477	const Sci::Position startPosition = LineStart(line);
478	const Sci::Position endLine = LineEnd(line);
479	Sci::Position startText = startPosition;
480	while (startText < endLine && (cb.CharAt(startText) == `' '` \|\| cb.CharAt(startText) == `'\t'`))
481	startText++;
482	if (position == startText)
483	return startPosition;
484	else
485	return startText;
486	}
487
488	Sci::Position Document::IndexLineStart(Sci::Line line, LineCharacterIndexType lineCharacterIndex) const noexcept {
489	return cb.IndexLineStart(line, lineCharacterIndex);
490	}
491
492	Sci::Line Document::LineFromPositionIndex(Sci::Position pos, LineCharacterIndexType lineCharacterIndex) const noexcept {
493	return cb.LineFromPositionIndex(pos, lineCharacterIndex);
494	}
495
496	Sci::Line Document::LineFromPositionAfter(Sci::Line line, Sci::Position length) const noexcept {
497	const Sci::Position posAfter = cb.LineStart(line) + length;
498	if (posAfter >= LengthNoExcept()) {
499	return LinesTotal();
500	}
501	const Sci::Line lineAfter = SciLineFromPosition(posAfter);
502	if (lineAfter > line) {
503	return lineAfter;
504	} else {
505	// Want to make some progress so return next line
506	return lineAfter + `1`;
507	}
508	}
509
510	int SCI_METHOD Document::SetLevel(Sci_Position line, int level) {
511	const int prev = Levels()->SetLevel(line, level, LinesTotal());
512	if (prev != level) {
513	DocModification mh(ModificationFlags::ChangeFold \| ModificationFlags::ChangeMarker,
514	LineStart(line), `0`, `0`, nullptr, line);
515	mh.foldLevelNow = static_cast<FoldLevel>(level);
516	mh.foldLevelPrev = static_cast<FoldLevel>(prev);
517	NotifyModified(mh);
518	}
519	return prev;
520	}
521
522	int SCI_METHOD Document::GetLevel(Sci_Position line) const {
523	return Levels()->GetLevel(line);
524	}
525
526	FoldLevel Document::GetFoldLevel(Sci_Position line) const {
527	return static_cast<FoldLevel>(Levels()->GetLevel(line));
528	}
529
530	void Document::ClearLevels() {
531	Levels()->ClearLevels();
532	}
533
534	static bool IsSubordinate(FoldLevel levelStart, FoldLevel levelTry) noexcept {
535	if (LevelIsWhitespace(levelTry))
536	return true;
537	else
538	return LevelNumber(levelStart) < LevelNumber(levelTry);
539	}
540
541	Sci::Line Document::GetLastChild(Sci::Line lineParent, std::optional<FoldLevel> level, Sci::Line lastLine) {
542	const FoldLevel levelStart = LevelNumberPart(level ? *level : GetFoldLevel(lineParent));
543	const Sci::Line maxLine = LinesTotal();
544	const Sci::Line lookLastLine = (lastLine != -`1`) ? std::min(LinesTotal() - `1`, lastLine) : -`1`;
545	Sci::Line lineMaxSubord = lineParent;
546	while (lineMaxSubord < maxLine - `1`) {
547	EnsureStyledTo(LineStart(lineMaxSubord + `2`));
548	if (!IsSubordinate(levelStart, GetFoldLevel(lineMaxSubord + `1`)))
549	break;
550	if ((lookLastLine != -`1`) && (lineMaxSubord >= lookLastLine) && !LevelIsWhitespace(GetFoldLevel(lineMaxSubord)))
551	break;
552	lineMaxSubord++;
553	}
554	if (lineMaxSubord > lineParent) {
555	if (levelStart > LevelNumberPart(GetFoldLevel(lineMaxSubord + `1`))) {
556	// Have chewed up some whitespace that belongs to a parent so seek back
557	if (LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) {
558	lineMaxSubord--;
559	}
560	}
561	}
562	return lineMaxSubord;
563	}
564
565	Sci::Line Document::GetFoldParent(Sci::Line line) const {
566	const FoldLevel level = LevelNumberPart(GetFoldLevel(line));
567	Sci::Line lineLook = line - `1`;
568	while ((lineLook > `0`) && (
569	(!LevelIsHeader(GetFoldLevel(lineLook))) \|\|
570	(LevelNumberPart(GetFoldLevel(lineLook)) >= level))
571	) {
572	lineLook--;
573	}
574	if (LevelIsHeader(GetFoldLevel(lineLook)) &&
575	(LevelNumberPart(GetFoldLevel(lineLook)) < level)) {
576	return lineLook;
577	} else {
578	return -`1`;
579	}
580	}
581
582	void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) {
583	const FoldLevel level = GetFoldLevel(line);
584	const Sci::Line lookLastLine = std::max(line, lastLine) + `1`;
585
586	Sci::Line lookLine = line;
587	FoldLevel lookLineLevel = level;
588	FoldLevel lookLineLevelNum = LevelNumberPart(lookLineLevel);
589	while ((lookLine > `0`) && (LevelIsWhitespace(lookLineLevel) \|\|
590	(LevelIsHeader(lookLineLevel) && (lookLineLevelNum >= LevelNumberPart(GetFoldLevel(lookLine + `1`)))))) {
591	lookLineLevel = GetFoldLevel(--lookLine);
592	lookLineLevelNum = LevelNumberPart(lookLineLevel);
593	}
594
595	Sci::Line beginFoldBlock = LevelIsHeader(lookLineLevel) ? lookLine : GetFoldParent(lookLine);
596	if (beginFoldBlock == -`1`) {
597	highlightDelimiter.Clear();
598	return;
599	}
600
601	Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, {}, lookLastLine);
602	Sci::Line firstChangeableLineBefore = -`1`;
603	if (endFoldBlock < line) {
604	lookLine = beginFoldBlock - `1`;
605	lookLineLevel = GetFoldLevel(lookLine);
606	lookLineLevelNum = LevelNumberPart(lookLineLevel);
607	while ((lookLine >= `0`) && (lookLineLevelNum >= FoldLevel::Base)) {
608	if (LevelIsHeader(lookLineLevel)) {
609	if (GetLastChild(lookLine, {}, lookLastLine) == line) {
610	beginFoldBlock = lookLine;
611	endFoldBlock = line;
612	firstChangeableLineBefore = line - `1`;
613	}
614	}
615	if ((lookLine > `0`) && (lookLineLevelNum == FoldLevel::Base) && (LevelNumberPart(GetFoldLevel(lookLine - `1`)) > lookLineLevelNum))
616	break;
617	lookLineLevel = GetFoldLevel(--lookLine);
618	lookLineLevelNum = LevelNumberPart(lookLineLevel);
619	}
620	}
621	if (firstChangeableLineBefore == -`1`) {
622	for (lookLine = line - `1`, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel);
623	lookLine >= beginFoldBlock;
624	lookLineLevel = GetFoldLevel(--lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) {
625	if (LevelIsWhitespace(lookLineLevel) \|\| (lookLineLevelNum > LevelNumberPart(level))) {
626	firstChangeableLineBefore = lookLine;
627	break;
628	}
629	}
630	}
631	if (firstChangeableLineBefore == -`1`)
632	firstChangeableLineBefore = beginFoldBlock - `1`;
633
634	Sci::Line firstChangeableLineAfter = -`1`;
635	for (lookLine = line + `1`, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel);
636	lookLine <= endFoldBlock;
637	lookLineLevel = GetFoldLevel(++lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) {
638	if (LevelIsHeader(lookLineLevel) && (lookLineLevelNum < LevelNumberPart(GetFoldLevel(lookLine + `1`)))) {
639	firstChangeableLineAfter = lookLine;
640	break;
641	}
642	}
643	if (firstChangeableLineAfter == -`1`)
644	firstChangeableLineAfter = endFoldBlock + `1`;
645
646	highlightDelimiter.beginFoldBlock = beginFoldBlock;
647	highlightDelimiter.endFoldBlock = endFoldBlock;
648	highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
649	highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
650	}
651
652	Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const noexcept {
653	return std::clamp<Sci::Position>(pos, `0`, LengthNoExcept());
654	}
655
656	bool Document::IsCrLf(Sci::Position pos) const noexcept {
657	if (pos < `0`)
658	return false;
659	if (pos >= (LengthNoExcept() - `1`))
660	return false;
661	return (cb.CharAt(pos) == `'\r'`) && (cb.CharAt(pos + `1`) == `'\n'`);
662	}
663
664	int Document::LenChar(Sci::Position pos) const noexcept {
665	if (pos < `0` \|\| pos >= LengthNoExcept()) {
666	// Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds.
667	return `1`;
668	} else if (IsCrLf(pos)) {
669	return `2`;
670	}
671
672	const unsigned char leadByte = cb.UCharAt(pos);
673	if (!dbcsCodePage \|\| UTF8IsAscii(leadByte)) {
674	// Common case: ASCII character
675	return `1`;
676	}
677	if (CpUtf8 == dbcsCodePage) {
678	const int widthCharBytes = UTF8BytesOfLead[leadByte];
679	unsigned char charBytes[UTF8MaxBytes] = { leadByte, `0`, `0`, `0` };
680	for (int b = `1`; b < widthCharBytes; b++) {
681	charBytes[b] = cb.UCharAt(pos + b);
682	}
683	const int utf8status = UTF8Classify(charBytes, widthCharBytes);
684	if (utf8status & UTF8MaskInvalid) {
685	// Treat as invalid and use up just one byte
686	return `1`;
687	} else {
688	return utf8status & UTF8MaskWidth;
689	}
690	} else {
691	if (IsDBCSLeadByteNoExcept(leadByte) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + `1`))) {
692	return `2`;
693	} else {
694	return `1`;
695	}
696	}
697	}
698
699	bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept {
700	Sci::Position trail = pos;
701	while ((trail>`0`) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-`1`)))
702	trail--;
703	start = (trail > `0`) ? trail-`1` : trail;
704
705	const unsigned char leadByte = cb.UCharAt(start);
706	const int widthCharBytes = UTF8BytesOfLead[leadByte];
707	if (widthCharBytes == `1`) {
708	return false;
709	} else {
710	const int trailBytes = widthCharBytes - `1`;
711	const Sci::Position len = pos - start;
712	if (len > trailBytes)
713	// pos too far from lead
714	return false;
715	unsigned char charBytes[UTF8MaxBytes] = {leadByte,`0`,`0`,`0`};
716	for (Sci::Position b=`1`; b<widthCharBytes && ((start+b) < cb.Length()); b++)
717	charBytes[b] = cb.CharAt(start+b);
718	const int utf8status = UTF8Classify(charBytes, widthCharBytes);
719	if (utf8status & UTF8MaskInvalid)
720	return false;
721	end = start + widthCharBytes;
722	return true;
723	}
724	}
725
726	// Normalise a position so that it is not part way through a multi-byte character.
727	// This can occur in two situations -
728	// When lines are terminated with \r\n pairs which should be treated as one character.
729	// When displaying DBCS text such as Japanese.
730	// If moving, move the position in the indicated direction.
731	Sci::Position Document::MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir, bool checkLineEnd) const noexcept {
732	//Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
733	// If out of range, just return minimum/maximum value.
734	if (pos <= `0`)
735	return `0`;
736	if (pos >= LengthNoExcept())
737	return LengthNoExcept();
738
739	// PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept());
740	if (checkLineEnd && IsCrLf(pos - `1`)) {
741	if (moveDir > `0`)
742	return pos + `1`;
743	else
744	return pos - `1`;
745	}
746
747	if (dbcsCodePage) {
748	if (CpUtf8 == dbcsCodePage) {
749	const unsigned char ch = cb.UCharAt(pos);
750	// If ch is not a trail byte then pos is valid intercharacter position
751	if (UTF8IsTrailByte(ch)) {
752	Sci::Position startUTF = pos;
753	Sci::Position endUTF = pos;
754	if (InGoodUTF8(pos, startUTF, endUTF)) {
755	// ch is a trail byte within a UTF-8 character
756	if (moveDir > `0`)
757	pos = endUTF;
758	else
759	pos = startUTF;
760	}
761	// Else invalid UTF-8 so return position of isolated trail byte
762	}
763	} else {
764	// Anchor DBCS calculations at start of line because start of line can
765	// not be a DBCS trail byte.
766	const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos));
767	if (pos == posStartLine)
768	return pos;
769
770	// Step back until a non-lead-byte is found.
771	Sci::Position posCheck = pos;
772	while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-`1`)))
773	posCheck--;
774
775	// Check from known start of character.
776	while (posCheck < pos) {
777	const int mbsize = IsDBCSDualByteAt(posCheck) ? `2` : `1`;
778	if (posCheck + mbsize == pos) {
779	return pos;
780	} else if (posCheck + mbsize > pos) {
781	if (moveDir > `0`) {
782	return posCheck + mbsize;
783	} else {
784	return posCheck;
785	}
786	}
787	posCheck += mbsize;
788	}
789	}
790	}
791
792	return pos;
793	}
794
795	// NextPosition moves between valid positions - it can not handle a position in the middle of a
796	// multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
797	// A \r\n pair is treated as two characters.
798	Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept {
799	// If out of range, just return minimum/maximum value.
800	const int increment = (moveDir > `0`) ? `1` : -`1`;
801	if (pos + increment <= `0`)
802	return `0`;
803	if (pos + increment >= cb.Length())
804	return cb.Length();
805
806	if (dbcsCodePage) {
807	if (CpUtf8 == dbcsCodePage) {
808	if (increment == `1`) {
809	// Simple forward movement case so can avoid some checks
810	const unsigned char leadByte = cb.UCharAt(pos);
811	if (UTF8IsAscii(leadByte)) {
812	// Single byte character or invalid
813	pos++;
814	} else {
815	const int widthCharBytes = UTF8BytesOfLead[leadByte];
816	unsigned char charBytes[UTF8MaxBytes] = {leadByte,`0`,`0`,`0`};
817	for (int b=`1`; b<widthCharBytes; b++)
818	charBytes[b] = cb.CharAt(pos+b);
819	const int utf8status = UTF8Classify(charBytes, widthCharBytes);
820	if (utf8status & UTF8MaskInvalid)
821	pos++;
822	else
823	pos += utf8status & UTF8MaskWidth;
824	}
825	} else {
826	// Examine byte before position
827	pos--;
828	const unsigned char ch = cb.UCharAt(pos);
829	// If ch is not a trail byte then pos is valid intercharacter position
830	if (UTF8IsTrailByte(ch)) {
831	// If ch is a trail byte in a valid UTF-8 character then return start of character
832	Sci::Position startUTF = pos;
833	Sci::Position endUTF = pos;
834	if (InGoodUTF8(pos, startUTF, endUTF)) {
835	pos = startUTF;
836	}
837	// Else invalid UTF-8 so return position of isolated trail byte
838	}
839	}
840	} else {
841	if (moveDir > `0`) {
842	const int mbsize = IsDBCSDualByteAt(pos) ? `2` : `1`;
843	pos += mbsize;
844	if (pos > cb.Length())
845	pos = cb.Length();
846	} else {
847	// Anchor DBCS calculations at start of line because start of line can
848	// not be a DBCS trail byte.
849	const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos));
850	// See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
851	// http://msdn.microsoft.com/en-us/library/cc194790.aspx
852	if ((pos - `1`) <= posStartLine) {
853	return pos - `1`;
854	} else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - `1`))) {
855	// Should actually be trail byte
856	if (IsDBCSDualByteAt(pos - `2`)) {
857	return pos - `2`;
858	} else {
859	// Invalid byte pair so treat as one byte wide
860	return pos - `1`;
861	}
862	} else {
863	// Otherwise, step back until a non-lead-byte is found.
864	Sci::Position posTemp = pos - `1`;
865	while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp)))
866	;
867	// Now posTemp+1 must point to the beginning of a character,
868	// so figure out whether we went back an even or an odd
869	// number of bytes and go back 1 or 2 bytes, respectively.
870	const Sci::Position widthLast = ((pos - posTemp) & `1`) + `1`;
871	if ((widthLast == `2`) && (IsDBCSDualByteAt(pos - widthLast))) {
872	return pos - widthLast;
873	}
874	// Byte before pos may be valid character or may be an invalid second byte
875	return pos - `1`;
876	}
877	}
878	}
879	} else {
880	pos += increment;
881	}
882
883	return pos;
884	}
885
886	bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept {
887	// Returns true if pos changed
888	Sci::Position posNext = NextPosition(pos, moveDir);
889	if (posNext == pos) {
890	return false;
891	} else {
892	pos = posNext;
893	return true;
894	}
895	}
896
897	Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept {
898	if (position >= LengthNoExcept()) {
899	return CharacterExtracted (unicodeReplacementChar, `0`);
900	}
901	const unsigned char leadByte = cb.UCharAt(position);
902	if (!dbcsCodePage \|\| UTF8IsAscii(leadByte)) {
903	// Common case: ASCII character
904	return CharacterExtracted (leadByte, `1`);
905	}
906	if (CpUtf8 == dbcsCodePage) {
907	const int widthCharBytes = UTF8BytesOfLead[leadByte];
908	unsigned char charBytes[UTF8MaxBytes] = { leadByte, `0`, `0`, `0` };
909	for (int b = `1`; b<widthCharBytes; b++)
910	charBytes[b] = cb.UCharAt(position + b);
911	const int utf8status = UTF8Classify(charBytes, widthCharBytes);
912	if (utf8status & UTF8MaskInvalid) {
913	// Treat as invalid and use up just one byte
914	return CharacterExtracted (unicodeReplacementChar, `1`);
915	} else {
916	return CharacterExtracted (UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
917	}
918	} else {
919	if (IsDBCSLeadByteNoExcept(leadByte)) {
920	const unsigned char trailByte = cb.UCharAt(position + `1`);
921	if (IsDBCSTrailByteNoExcept(trailByte)) {
922	return CharacterExtracted::DBCS(leadByte, trailByte);
923	}
924	}
925	return CharacterExtracted (leadByte, `1`);
926	}
927	}
928
929	Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept {
930	if (position <= `0`) {
931	return CharacterExtracted (unicodeReplacementChar, `0`);
932	}
933	const unsigned char previousByte = cb.UCharAt(position - `1`);
934	if (`0` == dbcsCodePage) {
935	return CharacterExtracted (previousByte, `1`);
936	}
937	if (CpUtf8 == dbcsCodePage) {
938	if (UTF8IsAscii(previousByte)) {
939	return CharacterExtracted (previousByte, `1`);
940	}
941	position--;
942	// If previousByte is not a trail byte then its invalid
943	if (UTF8IsTrailByte(previousByte)) {
944	// If previousByte is a trail byte in a valid UTF-8 character then find start of character
945	Sci::Position startUTF = position;
946	Sci::Position endUTF = position;
947	if (InGoodUTF8(position, startUTF, endUTF)) {
948	const Sci::Position widthCharBytes = endUTF - startUTF;
949	unsigned char charBytes[UTF8MaxBytes] = { `0`, `0`, `0`, `0` };
950	for (Sci::Position b = `0`; b<widthCharBytes; b++)
951	charBytes[b] = cb.UCharAt(startUTF + b);
952	const int utf8status = UTF8Classify(charBytes, widthCharBytes);
953	if (utf8status & UTF8MaskInvalid) {
954	// Treat as invalid and use up just one byte
955	return CharacterExtracted (unicodeReplacementChar, `1`);
956	} else {
957	return CharacterExtracted (UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
958	}
959	}
960	// Else invalid UTF-8 so return position of isolated trail byte
961	}
962	return CharacterExtracted (unicodeReplacementChar, `1`);
963	} else {
964	// Moving backwards in DBCS is complex so use NextPosition
965	const Sci::Position posStartCharacter = NextPosition(position, -`1`);
966	return CharacterAfter(posStartCharacter);
967	}
968	}
969
970	// Return -1 on out-of-bounds
971	Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const {
972	Sci::Position pos = positionStart;
973	if (dbcsCodePage) {
974	const int increment = (characterOffset > `0`) ? `1` : -`1`;
975	while (characterOffset != `0`) {
976	const Sci::Position posNext = NextPosition(pos, increment);
977	if (posNext == pos)
978	return Sci::invalidPosition;
979	pos = posNext;
980	characterOffset -= increment;
981	}
982	} else {
983	pos = positionStart + characterOffset;
984	if ((pos < `0`) \|\| (pos > Length()))
985	return Sci::invalidPosition;
986	}
987	return pos;
988	}
989
990	Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept {
991	Sci::Position pos = positionStart;
992	if (dbcsCodePage) {
993	const int increment = (characterOffset > `0`) ? `1` : -`1`;
994	while (characterOffset != `0`) {
995	const Sci::Position posNext = NextPosition(pos, increment);
996	if (posNext == pos)
997	return Sci::invalidPosition;
998	if (std::abs(pos-posNext) > `3`) // 4 byte character = 2UTF16.*
999	characterOffset -= increment;
1000	pos = posNext;
1001	characterOffset -= increment;
1002	}
1003	} else {
1004	pos = positionStart + characterOffset;
1005	if ((pos < `0`) \|\| (pos > LengthNoExcept()))
1006	return Sci::invalidPosition;
1007	}
1008	return pos;
1009	}
1010
1011	int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position pWidth) const* {
1012	int bytesInCharacter = `1`;
1013	const unsigned char leadByte = cb.UCharAt(position);
1014	int character = leadByte;
1015	if (dbcsCodePage && !UTF8IsAscii(leadByte)) {
1016	if (CpUtf8 == dbcsCodePage) {
1017	const int widthCharBytes = UTF8BytesOfLead[leadByte];
1018	unsigned char charBytes[UTF8MaxBytes] = {leadByte,`0`,`0`,`0`};
1019	for (int b=`1`; b<widthCharBytes; b++)
1020	charBytes[b] = cb.UCharAt(position+b);
1021	const int utf8status = UTF8Classify(charBytes, widthCharBytes);
1022	if (utf8status & UTF8MaskInvalid) {
1023	// Report as singleton surrogate values which are invalid Unicode
1024	character = `0xDC80` + leadByte;
1025	} else {
1026	bytesInCharacter = utf8status & UTF8MaskWidth;
1027	character = UnicodeFromUTF8(charBytes);
1028	}
1029	} else {
1030	if (IsDBCSLeadByteNoExcept(leadByte)) {
1031	const unsigned char trailByte = cb.UCharAt(position + `1`);
1032	if (IsDBCSTrailByteNoExcept(trailByte)) {
1033	bytesInCharacter = `2`;
1034	character = (leadByte << `8`) \| trailByte;
1035	}
1036	}
1037	}
1038	}
1039	if (pWidth) {
1040	*pWidth = bytesInCharacter;
1041	}
1042	return character;
1043	}
1044
1045	int SCI_METHOD Document::CodePage() const {
1046	return dbcsCodePage;
1047	}
1048
1049	bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
1050	// Used by lexers so must match IDocument method exactly
1051	return IsDBCSLeadByteNoExcept(ch);
1052	}
1053
1054	bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept {
1055	// Used inside core Scintilla
1056	// Byte ranges found in Wikipedia articles with relevant search strings in each case
1057	const unsigned char uch = ch;
1058	switch (dbcsCodePage) {
1059	case `932`:
1060	// Shift_jis
1061	return ((uch >= `0x81`) && (uch <= `0x9F`)) \|\|
1062	((uch >= `0xE0`) && (uch <= `0xFC`));
1063	// Lead bytes F0 to FC may be a Microsoft addition.
1064	case `936`:
1065	// GBK
1066	return (uch >= `0x81`) && (uch <= `0xFE`);
1067	case `949`:
1068	// Korean Wansung KS C-5601-1987
1069	return (uch >= `0x81`) && (uch <= `0xFE`);
1070	case `950`:
1071	// Big5
1072	return (uch >= `0x81`) && (uch <= `0xFE`);
1073	case `1361`:
1074	// Korean Johab KS C-5601-1992
1075	return
1076	((uch >= `0x84`) && (uch <= `0xD3`)) \|\|
1077	((uch >= `0xD8`) && (uch <= `0xDE`)) \|\|
1078	((uch >= `0xE0`) && (uch <= `0xF9`));
1079	}
1080	return false;
1081	}
1082
1083	bool Document::IsDBCSTrailByteNoExcept(char ch) const noexcept {
1084	const unsigned char trail = ch;
1085	switch (dbcsCodePage) {
1086	case `932`:
1087	// Shift_jis
1088	return (trail != `0x7F`) &&
1089	((trail >= `0x40`) && (trail <= `0xFC`));
1090	case `936`:
1091	// GBK
1092	return (trail != `0x7F`) &&
1093	((trail >= `0x40`) && (trail <= `0xFE`));
1094	case `949`:
1095	// Korean Wansung KS C-5601-1987
1096	return
1097	((trail >= `0x41`) && (trail <= `0x5A`)) \|\|
1098	((trail >= `0x61`) && (trail <= `0x7A`)) \|\|
1099	((trail >= `0x81`) && (trail <= `0xFE`));
1100	case `950`:
1101	// Big5
1102	return
1103	((trail >= `0x40`) && (trail <= `0x7E`)) \|\|
1104	((trail >= `0xA1`) && (trail <= `0xFE`));
1105	case `1361`:
1106	// Korean Johab KS C-5601-1992
1107	return
1108	((trail >= `0x31`) && (trail <= `0x7E`)) \|\|
1109	((trail >= `0x81`) && (trail <= `0xFE`));
1110	}
1111	return false;
1112	}
1113
1114	int Document::DBCSDrawBytes(std::string_view text) const noexcept {
1115	if (text.length() <= `1`) {
1116	return static_cast<int>(text.length());
1117	}
1118	if (IsDBCSLeadByteNoExcept(text [`0`])) {
1119	return IsDBCSTrailByteNoExcept(text [`1`]) ? `2` : `1`;
1120	} else {
1121	return `1`;
1122	}
1123	}
1124
1125	bool Document::IsDBCSDualByteAt(Sci::Position pos) const noexcept {
1126	return IsDBCSLeadByteNoExcept(cb.CharAt(pos))
1127	&& IsDBCSTrailByteNoExcept(cb.CharAt(pos + `1`));
1128	}
1129
1130	// Need to break text into segments near end but taking into account the
1131	// encoding to not break inside a UTF-8 or DBCS character and also trying
1132	// to avoid breaking inside a pair of combining characters, or inside
1133	// ligatures.
1134	// TODO: implement grapheme cluster boundaries,
1135	// see https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries.
1136	//
1137	// The segment length must always be long enough (more than 4 bytes)
1138	// so that there will be at least one whole character to make a segment.
1139	// For UTF-8, text must consist only of valid whole characters.
1140	// In preference order from best to worst:
1141	// 1) Break before or after spaces or controls
1142	// 2) Break at word and punctuation boundary for better kerning and ligature support
1143	// 3) Break after whole character, this may break combining characters
1144
1145	size_t Document::SafeSegment(std::string_view text) const noexcept {
1146	// check space first as most written language use spaces.
1147	for (std::string_view::iterator it = text.end() - `1`; it != text.begin(); --it) {
1148	if (IsBreakSpace(*it)) {
1149	return it - text.begin();
1150	}
1151	}
1152
1153	if (!dbcsCodePage \|\| dbcsCodePage == CpUtf8) {
1154	// backward iterate for UTF-8 and single byte encoding to find word and punctuation boundary.
1155	std::string_view::iterator it = text.end() - `1`;
1156	const bool punctuation = IsPunctuation(*it);
1157	do {
1158	--it;
1159	if (punctuation != IsPunctuation(*it)) {
1160	return it - text.begin() + `1`;
1161	}
1162	} while (it != text.begin());
1163
1164	it = text.end() - `1`;
1165	if (dbcsCodePage) {
1166	// for UTF-8 go back to the start of last character.
1167	for (int trail = `0`; trail < UTF8MaxBytes - `1` && UTF8IsTrailByte(*it); trail++) {
1168	--it;
1169	}
1170	}
1171	return it - text.begin();
1172	}
1173
1174	{
1175	// forward iterate for DBCS to find word and punctuation boundary.
1176	size_t lastPunctuationBreak = `0`;
1177	size_t lastEncodingAllowedBreak = `0`;
1178	CharacterClass ccPrev = CharacterClass::space;
1179	for (size_t j = `0`; j < text.length();) {
1180	const unsigned char ch = text [j];
1181	lastEncodingAllowedBreak = j++;
1182
1183	CharacterClass cc = CharacterClass::word;
1184	if (UTF8IsAscii(ch)) {
1185	if (IsPunctuation(ch)) {
1186	cc = CharacterClass::punctuation;
1187	}
1188	} else {
1189	j += IsDBCSLeadByteNoExcept(ch);
1190	}
1191	if (cc != ccPrev) {
1192	ccPrev = cc;
1193	lastPunctuationBreak = lastEncodingAllowedBreak;
1194	}
1195	}
1196	return lastPunctuationBreak ? lastPunctuationBreak : lastEncodingAllowedBreak;
1197	}
1198	}
1199
1200	EncodingFamily Document::CodePageFamily() const noexcept {
1201	if (CpUtf8 == dbcsCodePage)
1202	return EncodingFamily::unicode;
1203	else if (dbcsCodePage)
1204	return EncodingFamily::dbcs;
1205	else
1206	return EncodingFamily::eightBit;
1207	}
1208
1209	void Document::ModifiedAt(Sci::Position pos) noexcept {
1210	if (endStyled > pos)
1211	endStyled = pos;
1212	}
1213
1214	void Document::CheckReadOnly() {
1215	if (cb.IsReadOnly() && enteredReadOnlyCount == `0`) {
1216	enteredReadOnlyCount++;
1217	NotifyModifyAttempt();
1218	enteredReadOnlyCount--;
1219	}
1220	}
1221
1222	// Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
1223	// SetStyleAt does not change the persistent state of a document
1224
1225	bool Document::DeleteChars(Sci::Position pos, Sci::Position len) {
1226	if (pos < `0`)
1227	return false;
1228	if (len <= `0`)
1229	return false;
1230	if ((pos + len) > LengthNoExcept())
1231	return false;
1232	CheckReadOnly();
1233	if (enteredModification != `0`) {
1234	return false;
1235	} else {
1236	enteredModification++;
1237	if (!cb.IsReadOnly()) {
1238	NotifyModified(
1239	DocModification (
1240	ModificationFlags::BeforeDelete \| ModificationFlags::User,
1241	pos, len,
1242	`0`, nullptr));
1243	const Sci::Line prevLinesTotal = LinesTotal();
1244	const bool startSavePoint = cb.IsSavePoint();
1245	bool startSequence = false;
1246	const char *text = cb.DeleteChars(pos, len, startSequence);
1247	if (startSavePoint && cb.IsCollectingUndo())
1248	NotifySavePoint(false);
1249	if ((pos < LengthNoExcept()) \|\| (pos == `0`))
1250	ModifiedAt(pos);
1251	else
1252	ModifiedAt(pos-`1`);
1253	NotifyModified(
1254	DocModification (
1255	ModificationFlags::DeleteText \| ModificationFlags::User \|
1256	(startSequence?ModificationFlags::StartAction:ModificationFlags::None),
1257	pos, len,
1258	LinesTotal() - prevLinesTotal, text));
1259	}
1260	enteredModification--;
1261	}
1262	return !cb.IsReadOnly();
1263	}
1264
1265	/**
1266	* Insert a string with a length.
1267	*/
1268	Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) {
1269	if (insertLength <= `0`) {
1270	return `0`;
1271	}
1272	CheckReadOnly(); // Application may change read only state here
1273	if (cb.IsReadOnly()) {
1274	return `0`;
1275	}
1276	if (enteredModification != `0`) {
1277	return `0`;
1278	}
1279	enteredModification++;
1280	insertionSet = false;
1281	insertion.clear();
1282	NotifyModified(
1283	DocModification (
1284	ModificationFlags::InsertCheck,
1285	position, insertLength,
1286	`0`, s));
1287	if (insertionSet) {
1288	s = insertion.c_str();
1289	insertLength = insertion.length();
1290	}
1291	NotifyModified(
1292	DocModification (
1293	ModificationFlags::BeforeInsert \| ModificationFlags::User,
1294	position, insertLength,
1295	`0`, s));
1296	const Sci::Line prevLinesTotal = LinesTotal();
1297	const bool startSavePoint = cb.IsSavePoint();
1298	bool startSequence = false;
1299	const char *text = cb.InsertString(position, s, insertLength, startSequence);
1300	if (startSavePoint && cb.IsCollectingUndo())
1301	NotifySavePoint(false);
1302	ModifiedAt(position);
1303	NotifyModified(
1304	DocModification (
1305	ModificationFlags::InsertText \| ModificationFlags::User \|
1306	(startSequence?ModificationFlags::StartAction:ModificationFlags::None),
1307	position, insertLength,
1308	LinesTotal() - prevLinesTotal, text));
1309	if (insertionSet) { // Free memory as could be large
1310	std::string ().swap(insertion);
1311	}
1312	enteredModification--;
1313	return insertLength;
1314	}
1315
1316	void Document::ChangeInsertion(const char *s, Sci::Position length) {
1317	insertionSet = true;
1318	insertion.assign(s, length);
1319	}
1320
1321	int SCI_METHOD Document::AddData(const char *data, Sci_Position length) {
1322	try {
1323	const Sci::Position position = Length();
1324	InsertString(position, data, length);
1325	} catch (std::bad_alloc &) {
1326	return static_cast<int>(Status::BadAlloc);
1327	} catch (...) {
1328	return static_cast<int>(Status::Failure);
1329	}
1330	return static_cast<int>(Status::Ok);
1331	}
1332
1333	void * SCI_METHOD Document::ConvertToDocument() {
1334	return this;
1335	}
1336
1337	Sci::Position Document::Undo() {
1338	Sci::Position newPos = -`1`;
1339	CheckReadOnly();
1340	if ((enteredModification == `0`) && (cb.IsCollectingUndo())) {
1341	enteredModification++;
1342	if (!cb.IsReadOnly()) {
1343	const bool startSavePoint = cb.IsSavePoint();
1344	bool multiLine = false;
1345	const int steps = cb.StartUndo();
1346	//Platform::DebugPrintf("Steps=%d\n", steps);
1347	Sci::Position coalescedRemovePos = -`1`;
1348	Sci::Position coalescedRemoveLen = `0`;
1349	Sci::Position prevRemoveActionPos = -`1`;
1350	Sci::Position prevRemoveActionLen = `0`;
1351	for (int step = `0`; step < steps; step++) {
1352	const Sci::Line prevLinesTotal = LinesTotal();
1353	const Action &action = cb.GetUndoStep();
1354	if (action.at == ActionType::remove) {
1355	NotifyModified(DocModification (
1356	ModificationFlags::BeforeInsert \| ModificationFlags::Undo, action));
1357	} else if (action.at == ActionType::container) {
1358	DocModification dm(ModificationFlags::Container \| ModificationFlags::Undo);
1359	dm.token = action.position;
1360	NotifyModified(dm);
1361	if (!action.mayCoalesce) {
1362	coalescedRemovePos = -`1`;
1363	coalescedRemoveLen = `0`;
1364	prevRemoveActionPos = -`1`;
1365	prevRemoveActionLen = `0`;
1366	}
1367	} else {
1368	NotifyModified(DocModification (
1369	ModificationFlags::BeforeDelete \| ModificationFlags::Undo, action));
1370	}
1371	cb.PerformUndoStep();
1372	if (action.at != ActionType::container) {
1373	ModifiedAt(action.position);
1374	newPos = action.position;
1375	}
1376
1377	ModificationFlags modFlags = ModificationFlags::Undo;
1378	// With undo, an insertion action becomes a deletion notification
1379	if (action.at == ActionType::remove) {
1380	newPos += action.lenData;
1381	modFlags \|= ModificationFlags::InsertText;
1382	if ((coalescedRemoveLen > `0`) &&
1383	(action.position == prevRemoveActionPos \|\| action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1384	coalescedRemoveLen += action.lenData;
1385	newPos = coalescedRemovePos + coalescedRemoveLen;
1386	} else {
1387	coalescedRemovePos = action.position;
1388	coalescedRemoveLen = action.lenData;
1389	}
1390	prevRemoveActionPos = action.position;
1391	prevRemoveActionLen = action.lenData;
1392	} else if (action.at == ActionType::insert) {
1393	modFlags \|= ModificationFlags::DeleteText;
1394	coalescedRemovePos = -`1`;
1395	coalescedRemoveLen = `0`;
1396	prevRemoveActionPos = -`1`;
1397	prevRemoveActionLen = `0`;
1398	}
1399	if (steps > `1`)
1400	modFlags \|= ModificationFlags::MultiStepUndoRedo;
1401	const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1402	if (linesAdded != `0`)
1403	multiLine = true;
1404	if (step == steps - `1`) {
1405	modFlags \|= ModificationFlags::LastStepInUndoRedo;
1406	if (multiLine)
1407	modFlags \|= ModificationFlags::MultilineUndoRedo;
1408	}
1409	NotifyModified(DocModification (modFlags, action.position, action.lenData,
1410	linesAdded, action.data.get()));
1411	}
1412
1413	const bool endSavePoint = cb.IsSavePoint();
1414	if (startSavePoint != endSavePoint)
1415	NotifySavePoint(endSavePoint);
1416	}
1417	enteredModification--;
1418	}
1419	return newPos;
1420	}
1421
1422	Sci::Position Document::Redo() {
1423	Sci::Position newPos = -`1`;
1424	CheckReadOnly();
1425	if ((enteredModification == `0`) && (cb.IsCollectingUndo())) {
1426	enteredModification++;
1427	if (!cb.IsReadOnly()) {
1428	const bool startSavePoint = cb.IsSavePoint();
1429	bool multiLine = false;
1430	const int steps = cb.StartRedo();
1431	for (int step = `0`; step < steps; step++) {
1432	const Sci::Line prevLinesTotal = LinesTotal();
1433	const Action &action = cb.GetRedoStep();
1434	if (action.at == ActionType::insert) {
1435	NotifyModified(DocModification (
1436	ModificationFlags::BeforeInsert \| ModificationFlags::Redo, action));
1437	} else if (action.at == ActionType::container) {
1438	DocModification dm(ModificationFlags::Container \| ModificationFlags::Redo);
1439	dm.token = action.position;
1440	NotifyModified(dm);
1441	} else {
1442	NotifyModified(DocModification (
1443	ModificationFlags::BeforeDelete \| ModificationFlags::Redo, action));
1444	}
1445	cb.PerformRedoStep();
1446	if (action.at != ActionType::container) {
1447	ModifiedAt(action.position);
1448	newPos = action.position;
1449	}
1450
1451	ModificationFlags modFlags = ModificationFlags::Redo;
1452	if (action.at == ActionType::insert) {
1453	newPos += action.lenData;
1454	modFlags \|= ModificationFlags::InsertText;
1455	} else if (action.at == ActionType::remove) {
1456	modFlags \|= ModificationFlags::DeleteText;
1457	}
1458	if (steps > `1`)
1459	modFlags \|= ModificationFlags::MultiStepUndoRedo;
1460	const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1461	if (linesAdded != `0`)
1462	multiLine = true;
1463	if (step == steps - `1`) {
1464	modFlags \|= ModificationFlags::LastStepInUndoRedo;
1465	if (multiLine)
1466	modFlags \|= ModificationFlags::MultilineUndoRedo;
1467	}
1468	NotifyModified(
1469	DocModification (modFlags, action.position, action.lenData,
1470	linesAdded, action.data.get()));
1471	}
1472
1473	const bool endSavePoint = cb.IsSavePoint();
1474	if (startSavePoint != endSavePoint)
1475	NotifySavePoint(endSavePoint);
1476	}
1477	enteredModification--;
1478	}
1479	return newPos;
1480	}
1481
1482	void Document::DelChar(Sci::Position pos) {
1483	DeleteChars(pos, LenChar(pos));
1484	}
1485
1486	void Document::DelCharBack(Sci::Position pos) {
1487	if (pos <= `0`) {
1488	return;
1489	} else if (IsCrLf(pos - `2`)) {
1490	DeleteChars(pos - `2`, `2`);
1491	} else if (dbcsCodePage) {
1492	const Sci::Position startChar = NextPosition(pos, -`1`);
1493	DeleteChars(startChar, pos - startChar);
1494	} else {
1495	DeleteChars(pos - `1`, `1`);
1496	}
1497	}
1498
1499	static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept {
1500	return ((pos / tabSize) + `1`) * tabSize;
1501	}
1502
1503	static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) {
1504	std::string indentation;
1505	if (!insertSpaces) {
1506	while (indent >= tabSize) {
1507	indentation += `'\t'`;
1508	indent -= tabSize;
1509	}
1510	}
1511	while (indent > `0`) {
1512	indentation += `' '`;
1513	indent--;
1514	}
1515	return indentation;
1516	}
1517
1518	int SCI_METHOD Document::GetLineIndentation(Sci_Position line) {
1519	int indent = `0`;
1520	if ((line >= `0`) && (line < LinesTotal())) {
1521	const Sci::Position lineStart = LineStart(line);
1522	const Sci::Position length = Length();
1523	for (Sci::Position i = lineStart; i < length; i++) {
1524	const char ch = cb.CharAt(i);
1525	if (ch == `' '`)
1526	indent++;
1527	else if (ch == `'\t'`)
1528	indent = static_cast<int>(NextTab(indent, tabInChars));
1529	else
1530	return indent;
1531	}
1532	}
1533	return indent;
1534	}
1535
1536	Sci::Position Document::SetLineIndentation(Sci::Line line, Sci::Position indent) {
1537	const int indentOfLine = GetLineIndentation(line);
1538	if (indent < `0`)
1539	indent = `0`;
1540	if (indent != indentOfLine) {
1541	std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1542	const Sci::Position thisLineStart = LineStart(line);
1543	const Sci::Position indentPos = GetLineIndentPosition(line);
1544	UndoGroup ug(this);
1545	DeleteChars(thisLineStart, indentPos - thisLineStart);
1546	return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1547	linebuf.length());
1548	} else {
1549	return GetLineIndentPosition(line);
1550	}
1551	}
1552
1553	Sci::Position Document::GetLineIndentPosition(Sci::Line line) const {
1554	if (line < `0`)
1555	return `0`;
1556	Sci::Position pos = LineStart(line);
1557	const Sci::Position length = Length();
1558	while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1559	pos++;
1560	}
1561	return pos;
1562	}
1563
1564	Sci::Position Document::GetColumn(Sci::Position pos) {
1565	Sci::Position column = `0`;
1566	const Sci::Line line = SciLineFromPosition(pos);
1567	if ((line >= `0`) && (line < LinesTotal())) {
1568	for (Sci::Position i = LineStart(line); i < pos;) {
1569	const char ch = cb.CharAt(i);
1570	if (ch == `'\t'`) {
1571	column = NextTab(column, tabInChars);
1572	i++;
1573	} else if (ch == `'\r'`) {
1574	return column;
1575	} else if (ch == `'\n'`) {
1576	return column;
1577	} else if (i >= Length()) {
1578	return column;
1579	} else {
1580	column++;
1581	i = NextPosition(i, `1`);
1582	}
1583	}
1584	}
1585	return column;
1586	}
1587
1588	Sci::Position Document::CountCharacters(Sci::Position startPos, Sci::Position endPos) const noexcept {
1589	startPos = MovePositionOutsideChar(startPos, `1`, false);
1590	endPos = MovePositionOutsideChar(endPos, -`1`, false);
1591	Sci::Position count = `0`;
1592	Sci::Position i = startPos;
1593	while (i < endPos) {
1594	count++;
1595	i = NextPosition(i, `1`);
1596	}
1597	return count;
1598	}
1599
1600	Sci::Position Document::CountUTF16(Sci::Position startPos, Sci::Position endPos) const noexcept {
1601	startPos = MovePositionOutsideChar(startPos, `1`, false);
1602	endPos = MovePositionOutsideChar(endPos, -`1`, false);
1603	Sci::Position count = `0`;
1604	Sci::Position i = startPos;
1605	while (i < endPos) {
1606	count++;
1607	const Sci::Position next = NextPosition(i, `1`);
1608	if ((next - i) > `3`)
1609	count++;
1610	i = next;
1611	}
1612	return count;
1613	}
1614
1615	Sci::Position Document::FindColumn(Sci::Line line, Sci::Position column) {
1616	Sci::Position position = LineStart(line);
1617	if ((line >= `0`) && (line < LinesTotal())) {
1618	Sci::Position columnCurrent = `0`;
1619	while ((columnCurrent < column) && (position < Length())) {
1620	const char ch = cb.CharAt(position);
1621	if (ch == `'\t'`) {
1622	columnCurrent = NextTab(columnCurrent, tabInChars);
1623	if (columnCurrent > column)
1624	return position;
1625	position++;
1626	} else if (ch == `'\r'`) {
1627	return position;
1628	} else if (ch == `'\n'`) {
1629	return position;
1630	} else {
1631	columnCurrent++;
1632	position = NextPosition(position, `1`);
1633	}
1634	}
1635	}
1636	return position;
1637	}
1638
1639	void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) {
1640	// Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1641	for (Sci::Line line = lineBottom; line >= lineTop; line--) {
1642	const Sci::Position indentOfLine = GetLineIndentation(line);
1643	if (forwards) {
1644	if (LineStart(line) < LineEnd(line)) {
1645	SetLineIndentation(line, indentOfLine + IndentSize());
1646	}
1647	} else {
1648	SetLineIndentation(line, indentOfLine - IndentSize());
1649	}
1650	}
1651	}
1652
1653	// Convert line endings for a piece of text to a particular mode.
1654	// Stop at len or when a NUL is found.
1655	std::string Document::TransformLineEnds(const char *s, size_t len, EndOfLine eolModeWanted) {
1656	std::string dest;
1657	for (size_t i = `0`; (i < len) && (s[i]); i++) {
1658	if (s[i] == `'\n'` \|\| s[i] == `'\r'`) {
1659	if (eolModeWanted == EndOfLine::Cr) {
1660	dest.push_back(`'\r'`);
1661	} else if (eolModeWanted == EndOfLine::Lf) {
1662	dest.push_back(`'\n'`);
1663	} else { // eolModeWanted == EndOfLine::CrLf
1664	dest.push_back(`'\r'`);
1665	dest.push_back(`'\n'`);
1666	}
1667	if ((s[i] == `'\r'`) && (i+`1` < len) && (s[i+`1`] == `'\n'`)) {
1668	i++;
1669	}
1670	} else {
1671	dest.push_back(s[i]);
1672	}
1673	}
1674	return dest;
1675	}
1676
1677	void Document::ConvertLineEnds(EndOfLine eolModeSet) {
1678	UndoGroup ug(this);
1679
1680	for (Sci::Position pos = `0`; pos < Length(); pos++) {
1681	if (cb.CharAt(pos) == `'\r'`) {
1682	if (cb.CharAt(pos + `1`) == `'\n'`) {
1683	// CRLF
1684	if (eolModeSet == EndOfLine::Cr) {
1685	DeleteChars(pos + `1`, `1`); // Delete the LF
1686	} else if (eolModeSet == EndOfLine::Lf) {
1687	DeleteChars(pos, `1`); // Delete the CR
1688	} else {
1689	pos++;
1690	}
1691	} else {
1692	// CR
1693	if (eolModeSet == EndOfLine::CrLf) {
1694	pos += InsertString(pos + `1`, "\n", `1`); // Insert LF
1695	} else if (eolModeSet == EndOfLine::Lf) {
1696	pos += InsertString(pos, "\n", `1`); // Insert LF
1697	DeleteChars(pos, `1`); // Delete CR
1698	pos--;
1699	}
1700	}
1701	} else if (cb.CharAt(pos) == `'\n'`) {
1702	// LF
1703	if (eolModeSet == EndOfLine::CrLf) {
1704	pos += InsertString(pos, "\r", `1`); // Insert CR
1705	} else if (eolModeSet == EndOfLine::Cr) {
1706	pos += InsertString(pos, "\r", `1`); // Insert CR
1707	DeleteChars(pos, `1`); // Delete LF
1708	pos--;
1709	}
1710	}
1711	}
1712
1713	}
1714
1715	DocumentOption Document::Options() const noexcept {
1716	return (IsLarge() ? DocumentOption::TextLarge : DocumentOption::Default) \|
1717	(cb.HasStyles() ? DocumentOption::Default : DocumentOption::StylesNone);
1718	}
1719
1720	bool Document::IsWhiteLine(Sci::Line line) const {
1721	Sci::Position currentChar = LineStart(line);
1722	const Sci::Position endLine = LineEnd(line);
1723	while (currentChar < endLine) {
1724	if (!IsSpaceOrTab(cb.CharAt(currentChar))) {
1725	return false;
1726	}
1727	++currentChar;
1728	}
1729	return true;
1730	}
1731
1732	Sci::Position Document::ParaUp(Sci::Position pos) const {
1733	Sci::Line line = SciLineFromPosition(pos);
1734	line--;
1735	while (line >= `0` && IsWhiteLine(line)) { // skip empty lines
1736	line--;
1737	}
1738	while (line >= `0` && !IsWhiteLine(line)) { // skip non-empty lines
1739	line--;
1740	}
1741	line++;
1742	return LineStart(line);
1743	}
1744
1745	Sci::Position Document::ParaDown(Sci::Position pos) const {
1746	Sci::Line line = SciLineFromPosition(pos);
1747	while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1748	line++;
1749	}
1750	while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1751	line++;
1752	}
1753	if (line < LinesTotal())
1754	return LineStart(line);
1755	else // end of a document
1756	return LineEnd(line-`1`);
1757	}
1758
1759	CharacterClass Document::WordCharacterClass(unsigned int ch) const {
1760	if (dbcsCodePage && (ch >= `0x80`)) {
1761	if (CpUtf8 == dbcsCodePage) {
1762	// Use hard coded Unicode class
1763	const CharacterCategory cc = charMap.CategoryFor(ch);
1764	switch (cc) {
1765
1766	// Separator, Line/Paragraph
1767	case ccZl:
1768	case ccZp:
1769	return CharacterClass::newLine;
1770
1771	// Separator, Space
1772	case ccZs:
1773	// Other
1774	case ccCc:
1775	case ccCf:
1776	case ccCs:
1777	case ccCo:
1778	case ccCn:
1779	return CharacterClass::space;
1780
1781	// Letter
1782	case ccLu:
1783	case ccLl:
1784	case ccLt:
1785	case ccLm:
1786	case ccLo:
1787	// Number
1788	case ccNd:
1789	case ccNl:
1790	case ccNo:
1791	// Mark - includes combining diacritics
1792	case ccMn:
1793	case ccMc:
1794	case ccMe:
1795	return CharacterClass::word;
1796
1797	// Punctuation
1798	case ccPc:
1799	case ccPd:
1800	case ccPs:
1801	case ccPe:
1802	case ccPi:
1803	case ccPf:
1804	case ccPo:
1805	// Symbol
1806	case ccSm:
1807	case ccSc:
1808	case ccSk:
1809	case ccSo:
1810	return CharacterClass::punctuation;
1811
1812	}
1813	} else {
1814	// Asian DBCS
1815	return CharacterClass::word;
1816	}
1817	}
1818	return charClass.GetClass(static_cast<unsigned char>(ch));
1819	}
1820
1821	/**
1822	* Used by commands that want to select whole words.
1823	* Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1824	*/
1825	Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const {
1826	CharacterClass ccStart = CharacterClass::word;
1827	if (delta < `0`) {
1828	if (!onlyWordCharacters) {
1829	const CharacterExtracted ce = CharacterBefore(pos);
1830	ccStart = WordCharacterClass(ce.character);
1831	}
1832	while (pos > `0`) {
1833	const CharacterExtracted ce = CharacterBefore(pos);
1834	if (WordCharacterClass(ce.character) != ccStart)
1835	break;
1836	pos -= ce.widthBytes;
1837	}
1838	} else {
1839	if (!onlyWordCharacters && pos < LengthNoExcept()) {
1840	const CharacterExtracted ce = CharacterAfter(pos);
1841	ccStart = WordCharacterClass(ce.character);
1842	}
1843	while (pos < LengthNoExcept()) {
1844	const CharacterExtracted ce = CharacterAfter(pos);
1845	if (WordCharacterClass(ce.character) != ccStart)
1846	break;
1847	pos += ce.widthBytes;
1848	}
1849	}
1850	return MovePositionOutsideChar(pos, delta, true);
1851	}
1852
1853	/**
1854	* Find the start of the next word in either a forward (delta >= 0) or backwards direction
1855	* (delta < 0).
1856	* This is looking for a transition between character classes although there is also some
1857	* additional movement to transit white space.
1858	* Used by cursor movement by word commands.
1859	*/
1860	Sci::Position Document::NextWordStart(Sci::Position pos, int delta) const {
1861	if (delta < `0`) {
1862	while (pos > `0`) {
1863	const CharacterExtracted ce = CharacterBefore(pos);
1864	if (WordCharacterClass(ce.character) != CharacterClass::space)
1865	break;
1866	pos -= ce.widthBytes;
1867	}
1868	if (pos > `0`) {
1869	CharacterExtracted ce = CharacterBefore(pos);
1870	const CharacterClass ccStart = WordCharacterClass(ce.character);
1871	while (pos > `0`) {
1872	ce = CharacterBefore(pos);
1873	if (WordCharacterClass(ce.character) != ccStart)
1874	break;
1875	pos -= ce.widthBytes;
1876	}
1877	}
1878	} else {
1879	CharacterExtracted ce = CharacterAfter(pos);
1880	const CharacterClass ccStart = WordCharacterClass(ce.character);
1881	while (pos < LengthNoExcept()) {
1882	ce = CharacterAfter(pos);
1883	if (WordCharacterClass(ce.character) != ccStart)
1884	break;
1885	pos += ce.widthBytes;
1886	}
1887	while (pos < LengthNoExcept()) {
1888	ce = CharacterAfter(pos);
1889	if (WordCharacterClass(ce.character) != CharacterClass::space)
1890	break;
1891	pos += ce.widthBytes;
1892	}
1893	}
1894	return pos;
1895	}
1896
1897	/**
1898	* Find the end of the next word in either a forward (delta >= 0) or backwards direction
1899	* (delta < 0).
1900	* This is looking for a transition between character classes although there is also some
1901	* additional movement to transit white space.
1902	* Used by cursor movement by word commands.
1903	*/
1904	Sci::Position Document::NextWordEnd(Sci::Position pos, int delta) const {
1905	if (delta < `0`) {
1906	if (pos > `0`) {
1907	CharacterExtracted ce = CharacterBefore(pos);
1908	const CharacterClass ccStart = WordCharacterClass(ce.character);
1909	if (ccStart != CharacterClass::space) {
1910	while (pos > `0`) {
1911	ce = CharacterBefore(pos);
1912	if (WordCharacterClass(ce.character) != ccStart)
1913	break;
1914	pos -= ce.widthBytes;
1915	}
1916	}
1917	while (pos > `0`) {
1918	ce = CharacterBefore(pos);
1919	if (WordCharacterClass(ce.character) != CharacterClass::space)
1920	break;
1921	pos -= ce.widthBytes;
1922	}
1923	}
1924	} else {
1925	while (pos < LengthNoExcept()) {
1926	const CharacterExtracted ce = CharacterAfter(pos);
1927	if (WordCharacterClass(ce.character) != CharacterClass::space)
1928	break;
1929	pos += ce.widthBytes;
1930	}
1931	if (pos < LengthNoExcept()) {
1932	CharacterExtracted ce = CharacterAfter(pos);
1933	const CharacterClass ccStart = WordCharacterClass(ce.character);
1934	while (pos < LengthNoExcept()) {
1935	ce = CharacterAfter(pos);
1936	if (WordCharacterClass(ce.character) != ccStart)
1937	break;
1938	pos += ce.widthBytes;
1939	}
1940	}
1941	}
1942	return pos;
1943	}
1944
1945	namespace {
1946
1947	constexpr bool IsWordEdge(CharacterClass cc, CharacterClass ccNext) noexcept {
1948	return (cc != ccNext) &&
1949	(cc == CharacterClass::word \|\| cc == CharacterClass::punctuation);
1950	}
1951
1952	}
1953
1954	/**
1955	* Check that the character at the given position is a word or punctuation character and that
1956	* the previous character is of a different character class.
1957	*/
1958	bool Document::IsWordStartAt(Sci::Position pos) const {
1959	if (pos >= LengthNoExcept())
1960	return false;
1961	if (pos >= `0`) {
1962	const CharacterExtracted cePos = CharacterAfter(pos);
1963	// At start of document, treat as if space before so can be word start
1964	const CharacterExtracted cePrev = (pos > `0`) ?
1965	CharacterBefore(pos) : CharacterExtracted (`' '`, `1`);
1966	return IsWordEdge(WordCharacterClass(cePos.character), WordCharacterClass(cePrev.character));
1967	}
1968	return true;
1969	}
1970
1971	/**
1972	* Check that the character before the given position is a word or punctuation character and that
1973	* the next character is of a different character class.
1974	*/
1975	bool Document::IsWordEndAt(Sci::Position pos) const {
1976	if (pos <= `0`)
1977	return false;
1978	if (pos <= LengthNoExcept()) {
1979	// At end of document, treat as if space after so can be word end
1980	const CharacterExtracted cePos = (pos < LengthNoExcept()) ?
1981	CharacterAfter(pos) : CharacterExtracted (`' '`, `1`);
1982	const CharacterExtracted cePrev = CharacterBefore(pos);
1983	return IsWordEdge(WordCharacterClass(cePrev.character), WordCharacterClass(cePos.character));
1984	}
1985	return true;
1986	}
1987
1988	/**
1989	* Check that the given range is has transitions between character classes at both
1990	* ends and where the characters on the inside are word or punctuation characters.
1991	*/
1992	bool Document::IsWordAt(Sci::Position start, Sci::Position end) const {
1993	return (start < end) && IsWordStartAt(start) && IsWordEndAt(end);
1994	}
1995
1996	bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const {
1997	return (!word && !wordStart) \|\|
1998	(word && IsWordAt(pos, pos + length)) \|\|
1999	(wordStart && IsWordStartAt(pos));
2000	}
2001
2002	bool Document::HasCaseFolder() const noexcept {
2003	return pcf != nullptr;
2004	}
2005
2006	void Document::SetCaseFolder(std::unique_ptr<CaseFolder> pcf_) noexcept {
2007	pcf = std::move(pcf_);
2008	}
2009
2010	Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept {
2011	const unsigned char leadByte = cb.UCharAt(position);
2012	if (UTF8IsAscii(leadByte)) {
2013	// Common case: ASCII character
2014	return CharacterExtracted (leadByte, `1`);
2015	}
2016	const int widthCharBytes = UTF8BytesOfLead[leadByte];
2017	unsigned char charBytes[UTF8MaxBytes] = { leadByte, `0`, `0`, `0` };
2018	for (int b=`1`; b<widthCharBytes; b++)
2019	charBytes[b] = cb.UCharAt(position + b);
2020	const int utf8status = UTF8Classify(charBytes, widthCharBytes);
2021	if (utf8status & UTF8MaskInvalid) {
2022	// Treat as invalid and use up just one byte
2023	return CharacterExtracted (unicodeReplacementChar, `1`);
2024	} else {
2025	return CharacterExtracted (UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
2026	}
2027	}
2028
2029	namespace {
2030
2031	// Equivalent of memchr over the split view
2032	ptrdiff_t SplitFindChar(const SplitView &view, size_t start, size_t length, int ch) noexcept {
2033	size_t range1Length = `0`;
2034	if (start < view.length1) {
2035	range1Length = std::min(length, view.length1 - start);
2036	const char match = static_cast<const* char *>(memchr(view.segment1 + start, ch, range1Length));
2037	if (match) {
2038	return match - view.segment1;
2039	}
2040	start += range1Length;
2041	}
2042	const char match2 = static_cast<const* char *>(memchr(view.segment2 + start, ch, length - range1Length));
2043	if (match2) {
2044	return match2 - view.segment2;
2045	}
2046	return -`1`;
2047	}
2048
2049	// Equivalent of memcmp over the split view
2050	// This does not call memcmp as search texts are commonly too short to overcome the
2051	// call overhead.
2052	bool SplitMatch(const SplitView &view, size_t start, std::string_view text) noexcept {
2053	for (size_t i = `0`; i < text.length(); i++) {
2054	if (view.CharAt(i + start) != text [i]) {
2055	return false;
2056	}
2057	}
2058	return true;
2059	}
2060
2061	}
2062
2063	/**
2064	* Find text in document, supporting both forward and backward
2065	* searches (just pass minPos > maxPos to do a backward search)
2066	* Has not been tested with backwards DBCS searches yet.
2067	*/
2068	Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, const char *search,
2069	FindOption flags, Sci::Position *length) {
2070	if (*length <= `0`)
2071	return minPos;
2072	const bool caseSensitive = FlagSet(flags, FindOption::MatchCase);
2073	const bool word = FlagSet(flags, FindOption::WholeWord);
2074	const bool wordStart = FlagSet(flags, FindOption::WordStart);
2075	const bool regExp = FlagSet(flags, FindOption::RegExp);
2076	if (regExp) {
2077	if (!regex)
2078	regex = std::unique_ptr<RegexSearchBase>(CreateRegexSearch(&charClass));
2079	return regex ->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
2080	} else {
2081
2082	const bool forward = minPos <= maxPos;
2083	const int increment = forward ? `1` : -`1`;
2084
2085	// Range endpoints should not be inside DBCS characters, but just in case, move them.
2086	const Sci::Position startPos = MovePositionOutsideChar(minPos, increment, false);
2087	const Sci::Position endPos = MovePositionOutsideChar(maxPos, increment, false);
2088
2089	// Compute actual search ranges needed
2090	const Sci::Position lengthFind = *length;
2091
2092	//Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
2093	const Sci::Position limitPos = std::max(startPos, endPos);
2094	Sci::Position pos = startPos;
2095	if (!forward) {
2096	// Back all of a character
2097	pos = NextPosition(pos, increment);
2098	}
2099	const SplitView cbView = cb.AllView();
2100	if (caseSensitive) {
2101	const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + `1` : endPos;
2102	const unsigned char charStartSearch = search[`0`];
2103	if (forward && ((`0` == dbcsCodePage) \|\| (CpUtf8 == dbcsCodePage && !UTF8IsTrailByte(charStartSearch)))) {
2104	// This is a fast case where there is no need to test byte values to iterate
2105	// so becomes the equivalent of a memchr+memcmp loop.
2106	// UTF-8 search will not be self-synchronizing when starts with trail byte
2107	const std::string_view suffix(search + `1`, lengthFind - `1`);
2108	while (pos < endSearch) {
2109	pos = SplitFindChar(cbView, pos, limitPos - pos, charStartSearch);
2110	if (pos < `0`) {
2111	break;
2112	}
2113	if (SplitMatch(cbView, pos + `1`, suffix) && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2114	return pos;
2115	}
2116	pos++;
2117	}
2118	} else {
2119	while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2120	const unsigned char leadByte = cbView.CharAt(pos);
2121	if (leadByte == charStartSearch) {
2122	bool found = (pos + lengthFind) <= limitPos;
2123	// SplitMatch could be called here but it is slower with g++ -O2
2124	for (int indexSearch = `1`; (indexSearch < lengthFind) && found; indexSearch++) {
2125	found = cbView.CharAt(pos + indexSearch) == search[indexSearch];
2126	}
2127	if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2128	return pos;
2129	}
2130	}
2131	if (forward && UTF8IsAscii(leadByte)) {
2132	pos++;
2133	} else {
2134	if (dbcsCodePage) {
2135	if (!NextCharacter(pos, increment)) {
2136	break;
2137	}
2138	} else {
2139	pos += increment;
2140	}
2141	}
2142	}
2143	}
2144	} else if (CpUtf8 == dbcsCodePage) {
2145	constexpr size_t maxFoldingExpansion = `4`;
2146	std::vector<char> searchThing((lengthFind+`1`) * UTF8MaxBytes * maxFoldingExpansion + `1`);
2147	const size_t lenSearch =
2148	pcf ->Fold(&searchThing [`0`], searchThing.size(), search, lengthFind);
2149	while (forward ? (pos < endPos) : (pos >= endPos)) {
2150	int widthFirstCharacter = `0`;
2151	Sci::Position posIndexDocument = pos;
2152	size_t indexSearch = `0`;
2153	bool characterMatches = true;
2154	for (;;) {
2155	const unsigned char leadByte = cbView.CharAt(posIndexDocument);
2156	char bytes[UTF8MaxBytes + `1`];
2157	int widthChar = `1`;
2158	if (!UTF8IsAscii(leadByte)) {
2159	const int widthCharBytes = UTF8BytesOfLead[leadByte];
2160	bytes[`0`] = leadByte;
2161	for (int b=`1`; b<widthCharBytes; b++) {
2162	bytes[b] = cbView.CharAt(posIndexDocument+b);
2163	}
2164	widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
2165	}
2166	if (!widthFirstCharacter) {
2167	widthFirstCharacter = widthChar;
2168	}
2169	if ((posIndexDocument + widthChar) > limitPos) {
2170	break;
2171	}
2172	size_t lenFlat = `1`;
2173	if (widthChar == `1`) {
2174	characterMatches = searchThing [indexSearch] == MakeLowerCase(leadByte);
2175	} else {
2176	char folded[UTF8MaxBytes * maxFoldingExpansion + `1`];
2177	lenFlat = pcf ->Fold(folded, sizeof(folded), bytes, widthChar);
2178	// memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2179	assert((indexSearch + lenFlat) <= searchThing.size());
2180	// Does folded match the buffer
2181	characterMatches = `0` == memcmp(folded, &searchThing [`0`] + indexSearch, lenFlat);
2182	}
2183	if (!characterMatches) {
2184	break;
2185	}
2186	posIndexDocument += widthChar;
2187	indexSearch += lenFlat;
2188	if (indexSearch >= lenSearch) {
2189	break;
2190	}
2191	}
2192	if (characterMatches && (indexSearch == lenSearch)) {
2193	if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
2194	*length = posIndexDocument - pos;
2195	return pos;
2196	}
2197	}
2198	if (forward) {
2199	pos += widthFirstCharacter;
2200	} else {
2201	if (!NextCharacter(pos, increment)) {
2202	break;
2203	}
2204	}
2205	}
2206	} else if (dbcsCodePage) {
2207	constexpr size_t maxBytesCharacter = `2`;
2208	constexpr size_t maxFoldingExpansion = `4`;
2209	std::vector<char> searchThing((lengthFind+`1`) * maxBytesCharacter * maxFoldingExpansion + `1`);
2210	const size_t lenSearch = pcf ->Fold(&searchThing [`0`], searchThing.size(), search, lengthFind);
2211	while (forward ? (pos < endPos) : (pos >= endPos)) {
2212	int widthFirstCharacter = `0`;
2213	Sci::Position indexDocument = `0`;
2214	size_t indexSearch = `0`;
2215	bool characterMatches = true;
2216	while (((pos + indexDocument) < limitPos) &&
2217	(indexSearch < lenSearch)) {
2218	const unsigned char leadByte = cbView.CharAt(pos + indexDocument);
2219	const int widthChar = (!UTF8IsAscii(leadByte) && IsDBCSLeadByteNoExcept(leadByte)) ? `2` : `1`;
2220	if (!widthFirstCharacter) {
2221	widthFirstCharacter = widthChar;
2222	}
2223	if ((pos + indexDocument + widthChar) > limitPos) {
2224	break;
2225	}
2226	size_t lenFlat = `1`;
2227	if (widthChar == `1`) {
2228	characterMatches = searchThing [indexSearch] == MakeLowerCase(leadByte);
2229	} else {
2230	char bytes[maxBytesCharacter + `1`];
2231	bytes[`0`] = leadByte;
2232	bytes[`1`] = cbView.CharAt(pos + indexDocument + `1`);
2233	char folded[maxBytesCharacter * maxFoldingExpansion + `1`];
2234	lenFlat = pcf ->Fold(folded, sizeof(folded), bytes, widthChar);
2235	// memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2236	assert((indexSearch + lenFlat) <= searchThing.size());
2237	// Does folded match the buffer
2238	characterMatches = `0` == memcmp(folded, &searchThing [`0`] + indexSearch, lenFlat);
2239	}
2240	if (!characterMatches) {
2241	break;
2242	}
2243	indexDocument += widthChar;
2244	indexSearch += lenFlat;
2245	}
2246	if (characterMatches && (indexSearch == lenSearch)) {
2247	if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
2248	*length = indexDocument;
2249	return pos;
2250	}
2251	}
2252	if (forward) {
2253	pos += widthFirstCharacter;
2254	} else {
2255	if (!NextCharacter(pos, increment)) {
2256	break;
2257	}
2258	}
2259	}
2260	} else {
2261	const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + `1` : endPos;
2262	std::vector<char> searchThing(lengthFind + `1`);
2263	pcf ->Fold(&searchThing [`0`], searchThing.size(), search, lengthFind);
2264	while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2265	bool found = (pos + lengthFind) <= limitPos;
2266	for (int indexSearch = `0`; (indexSearch < lengthFind) && found; indexSearch++) {
2267	const char ch = cbView.CharAt(pos + indexSearch);
2268	const char chTest = searchThing [indexSearch];
2269	if (UTF8IsAscii(ch)) {
2270	found = chTest == MakeLowerCase(ch);
2271	} else {
2272	char folded[`2`];
2273	pcf ->Fold(folded, sizeof(folded), &ch, `1`);
2274	found = folded[`0`] == chTest;
2275	}
2276	}
2277	if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2278	return pos;
2279	}
2280	pos += increment;
2281	}
2282	}
2283	}
2284	//Platform::DebugPrintf("Not found\n");
2285	return -`1`;
2286	}
2287
2288	const char Document::SubstituteByPosition(const* char text, Sci::Position length) {
2289	if (regex)
2290	return regex ->SubstituteByPosition(this, text, length);
2291	else
2292	return nullptr;
2293	}
2294
2295	LineCharacterIndexType Document::LineCharacterIndex() const noexcept {
2296	return cb.LineCharacterIndex();
2297	}
2298
2299	void Document::AllocateLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) {
2300	return cb.AllocateLineCharacterIndex(lineCharacterIndex);
2301	}
2302
2303	void Document::ReleaseLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) {
2304	return cb.ReleaseLineCharacterIndex(lineCharacterIndex);
2305	}
2306
2307	Sci::Line Document::LinesTotal() const noexcept {
2308	return cb.Lines();
2309	}
2310
2311	void Document::AllocateLines(Sci::Line lines) {
2312	cb.AllocateLines(lines);
2313	}
2314
2315	void Document::SetDefaultCharClasses(bool includeWordClass) {
2316	charClass.SetDefaultCharClasses(includeWordClass);
2317	}
2318
2319	void Document::SetCharClasses(const unsigned char *chars, CharacterClass newCharClass) {
2320	charClass.SetCharClasses(chars, newCharClass);
2321	}
2322
2323	int Document::GetCharsOfClass(CharacterClass characterClass, unsigned char buffer) const* {
2324	return charClass.GetCharsOfClass(characterClass, buffer);
2325	}
2326
2327	void Document::SetCharacterCategoryOptimization(int countCharacters) {
2328	charMap.Optimize(countCharacters);
2329	}
2330
2331	int Document::CharacterCategoryOptimization() const noexcept {
2332	return charMap.Size();
2333	}
2334
2335	void SCI_METHOD Document::StartStyling(Sci_Position position) {
2336	endStyled = position;
2337	}
2338
2339	bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) {
2340	if (enteredStyling != `0`) {
2341	return false;
2342	} else {
2343	enteredStyling++;
2344	const Sci::Position prevEndStyled = endStyled;
2345	if (cb.SetStyleFor(endStyled, length, style)) {
2346	const DocModification mh(ModificationFlags::ChangeStyle \| ModificationFlags::User,
2347	prevEndStyled, length);
2348	NotifyModified(mh);
2349	}
2350	endStyled += length;
2351	enteredStyling--;
2352	return true;
2353	}
2354	}
2355
2356	bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) {
2357	if (enteredStyling != `0`) {
2358	return false;
2359	} else {
2360	enteredStyling++;
2361	bool didChange = false;
2362	Sci::Position startMod = `0`;
2363	Sci::Position endMod = `0`;
2364	for (int iPos = `0`; iPos < length; iPos++, endStyled++) {
2365	PLATFORM_ASSERT(endStyled < Length());
2366	if (cb.SetStyleAt(endStyled, styles[iPos])) {
2367	if (!didChange) {
2368	startMod = endStyled;
2369	}
2370	didChange = true;
2371	endMod = endStyled;
2372	}
2373	}
2374	if (didChange) {
2375	const DocModification mh(ModificationFlags::ChangeStyle \| ModificationFlags::User,
2376	startMod, endMod - startMod + `1`);
2377	NotifyModified(mh);
2378	}
2379	enteredStyling--;
2380	return true;
2381	}
2382	}
2383
2384	void Document::EnsureStyledTo(Sci::Position pos) {
2385	if ((enteredStyling == `0`) && (pos > GetEndStyled())) {
2386	IncrementStyleClock();
2387	if (pli && !pli ->UseContainerLexing()) {
2388	const Sci::Line lineEndStyled = SciLineFromPosition(GetEndStyled());
2389	const Sci::Position endStyledTo = LineStart(lineEndStyled);
2390	pli ->Colourise(endStyledTo, pos);
2391	} else {
2392	// Ask the watchers to style, and stop as soon as one responds.
2393	for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
2394	(pos > GetEndStyled()) && (it != watchers.end()); ++it) {
2395	it ->watcher->NotifyStyleNeeded(this, it ->userData, pos);
2396	}
2397	}
2398	}
2399	}
2400
2401	void Document::StyleToAdjustingLineDuration(Sci::Position pos) {
2402	const Sci::Position stylingStart = GetEndStyled();
2403	ElapsedPeriod epStyling;
2404	EnsureStyledTo(pos);
2405	durationStyleOneByte.AddSample(pos - stylingStart, epStyling.Duration());
2406	}
2407
2408	void Document::LexerChanged() {
2409	// Tell the watchers the lexer has changed.
2410	for (const WatcherWithUserData &watcher : watchers) {
2411	watcher.watcher->NotifyLexerChanged(this, watcher.userData);
2412	}
2413	}
2414
2415	LexInterface Document::GetLexInterface() const* noexcept {
2416	return pli.get();
2417	}
2418
2419	void Document::SetLexInterface(std::unique_ptr<LexInterface> pLexInterface) noexcept {
2420	pli = std::move(pLexInterface);
2421	}
2422
2423	int SCI_METHOD Document::SetLineState(Sci_Position line, int state) {
2424	const int statePrevious = States()->SetLineState(line, state);
2425	if (state != statePrevious) {
2426	const DocModification mh(ModificationFlags::ChangeLineState, LineStart(line), `0`, `0`, nullptr,
2427	static_cast<Sci::Line>(line));
2428	NotifyModified(mh);
2429	}
2430	return statePrevious;
2431	}
2432
2433	int SCI_METHOD Document::GetLineState(Sci_Position line) const {
2434	return States()->GetLineState(line);
2435	}
2436
2437	Sci::Line Document::GetMaxLineState() const noexcept {
2438	return States()->GetMaxLineState();
2439	}
2440
2441	void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) {
2442	const DocModification mh(ModificationFlags::LexerState, start,
2443	end-start, `0`, nullptr, `0`);
2444	NotifyModified(mh);
2445	}
2446
2447	StyledText Document::MarginStyledText(Sci::Line line) const noexcept {
2448	const LineAnnotation *pla = Margins();
2449	return StyledText (pla->Length(line), pla->Text(line),
2450	pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2451	}
2452
2453	void Document::MarginSetText(Sci::Line line, const char *text) {
2454	Margins()->SetText(line, text);
2455	const DocModification mh(ModificationFlags::ChangeMargin, LineStart(line),
2456	`0`, `0`, nullptr, line);
2457	NotifyModified(mh);
2458	}
2459
2460	void Document::MarginSetStyle(Sci::Line line, int style) {
2461	Margins()->SetStyle(line, style);
2462	NotifyModified(DocModification (ModificationFlags::ChangeMargin, LineStart(line),
2463	`0`, `0`, nullptr, line));
2464	}
2465
2466	void Document::MarginSetStyles(Sci::Line line, const unsigned char *styles) {
2467	Margins()->SetStyles(line, styles);
2468	NotifyModified(DocModification (ModificationFlags::ChangeMargin, LineStart(line),
2469	`0`, `0`, nullptr, line));
2470	}
2471
2472	void Document::MarginClearAll() {
2473	const Sci::Line maxEditorLine = LinesTotal();
2474	for (Sci::Line l=`0`; l<maxEditorLine; l++)
2475	MarginSetText(l, nullptr);
2476	// Free remaining data
2477	Margins()->ClearAll();
2478	}
2479
2480	StyledText Document::AnnotationStyledText(Sci::Line line) const noexcept {
2481	const LineAnnotation *pla = Annotations();
2482	return StyledText (pla->Length(line), pla->Text(line),
2483	pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2484	}
2485
2486	void Document::AnnotationSetText(Sci::Line line, const char *text) {
2487	if (line >= `0` && line < LinesTotal()) {
2488	const Sci::Line linesBefore = AnnotationLines(line);
2489	Annotations()->SetText(line, text);
2490	const int linesAfter = AnnotationLines(line);
2491	DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line),
2492	`0`, `0`, nullptr, line);
2493	mh.annotationLinesAdded = linesAfter - linesBefore;
2494	NotifyModified(mh);
2495	}
2496	}
2497
2498	void Document::AnnotationSetStyle(Sci::Line line, int style) {
2499	if (line >= `0` && line < LinesTotal()) {
2500	Annotations()->SetStyle(line, style);
2501	const DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line),
2502	`0`, `0`, nullptr, line);
2503	NotifyModified(mh);
2504	}
2505	}
2506
2507	void Document::AnnotationSetStyles(Sci::Line line, const unsigned char *styles) {
2508	if (line >= `0` && line < LinesTotal()) {
2509	Annotations()->SetStyles(line, styles);
2510	}
2511	}
2512
2513	int Document::AnnotationLines(Sci::Line line) const noexcept {
2514	return Annotations()->Lines(line);
2515	}
2516
2517	void Document::AnnotationClearAll() {
2518	const Sci::Line maxEditorLine = LinesTotal();
2519	for (Sci::Line l=`0`; l<maxEditorLine; l++)
2520	AnnotationSetText(l, nullptr);
2521	// Free remaining data
2522	Annotations()->ClearAll();
2523	}
2524
2525	StyledText Document::EOLAnnotationStyledText(Sci::Line line) const noexcept {
2526	const LineAnnotation *pla = EOLAnnotations();
2527	return StyledText (pla->Length(line), pla->Text(line),
2528	pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2529	}
2530
2531	void Document::EOLAnnotationSetText(Sci::Line line, const char *text) {
2532	if (line >= `0` && line < LinesTotal()) {
2533	EOLAnnotations()->SetText(line, text);
2534	const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line),
2535	`0`, `0`, nullptr, line);
2536	NotifyModified(mh);
2537	}
2538	}
2539
2540	void Document::EOLAnnotationSetStyle(Sci::Line line, int style) {
2541	if (line >= `0` && line < LinesTotal()) {
2542	EOLAnnotations()->SetStyle(line, style);
2543	const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line),
2544	`0`, `0`, nullptr, line);
2545	NotifyModified(mh);
2546	}
2547	}
2548
2549	void Document::EOLAnnotationClearAll() {
2550	const Sci::Line maxEditorLine = LinesTotal();
2551	for (Sci::Line l=`0`; l<maxEditorLine; l++)
2552	EOLAnnotationSetText(l, nullptr);
2553	// Free remaining data
2554	EOLAnnotations()->ClearAll();
2555	}
2556
2557	void Document::IncrementStyleClock() noexcept {
2558	styleClock = (styleClock + `1`) % `0x100000`;
2559	}
2560
2561	void SCI_METHOD Document::DecorationSetCurrentIndicator(int indicator) {
2562	decorations ->SetCurrentIndicator(indicator);
2563	}
2564
2565	void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) {
2566	const FillResult<Sci::Position> fr = decorations ->FillRange(
2567	position, value, fillLength);
2568	if (fr.changed) {
2569	const DocModification mh(ModificationFlags::ChangeIndicator \| ModificationFlags::User,
2570	fr.position, fr.fillLength);
2571	NotifyModified(mh);
2572	}
2573	}
2574
2575	bool Document::AddWatcher(DocWatcher watcher, void* *userData) {
2576	const WatcherWithUserData wwud(watcher, userData);
2577	std::vector<WatcherWithUserData>::iterator it =
2578	std::find(watchers.begin(), watchers.end(), wwud);
2579	if (it != watchers.end())
2580	return false;
2581	watchers.push_back(wwud);
2582	return true;
2583	}
2584
2585	bool Document::RemoveWatcher(DocWatcher watcher, void* userData) noexcept* {
2586	try {
2587	// This can never fail as WatcherWithUserData constructor and == are noexcept
2588	// but std::find is not noexcept.
2589	std::vector<WatcherWithUserData>::iterator it =
2590	std::find(watchers.begin(), watchers.end(), WatcherWithUserData (watcher, userData));
2591	if (it != watchers.end()) {
2592	watchers.erase(it);
2593	return true;
2594	}
2595	} catch (...) {
2596	// Ignore any exception
2597	}
2598	return false;
2599	}
2600
2601	void Document::NotifyModifyAttempt() {
2602	for (const WatcherWithUserData &watcher : watchers) {
2603	watcher.watcher->NotifyModifyAttempt(this, watcher.userData);
2604	}
2605	}
2606
2607	void Document::NotifySavePoint(bool atSavePoint) {
2608	for (const WatcherWithUserData &watcher : watchers) {
2609	watcher.watcher->NotifySavePoint(this, watcher.userData, atSavePoint);
2610	}
2611	}
2612
2613	void Document::NotifyModified(DocModification mh) {
2614	if (FlagSet(mh.modificationType, ModificationFlags::InsertText)) {
2615	decorations ->InsertSpace(mh.position, mh.length);
2616	} else if (FlagSet(mh.modificationType, ModificationFlags::DeleteText)) {
2617	decorations ->DeleteRange(mh.position, mh.length);
2618	}
2619	for (const WatcherWithUserData &watcher : watchers) {
2620	watcher.watcher->NotifyModified(this, mh, watcher.userData);
2621	}
2622	}
2623
2624	bool Document::IsWordPartSeparator(unsigned int ch) const {
2625	return (WordCharacterClass(ch) == CharacterClass::word) && IsPunctuation(ch);
2626	}
2627
2628	Sci::Position Document::WordPartLeft(Sci::Position pos) const {
2629	if (pos > `0`) {
2630	pos -= CharacterBefore(pos).widthBytes;
2631	CharacterExtracted ceStart = CharacterAfter(pos);
2632	if (IsWordPartSeparator(ceStart.character)) {
2633	while (pos > `0` && IsWordPartSeparator(CharacterAfter(pos).character)) {
2634	pos -= CharacterBefore(pos).widthBytes;
2635	}
2636	}
2637	if (pos > `0`) {
2638	ceStart = CharacterAfter(pos);
2639	pos -= CharacterBefore(pos).widthBytes;
2640	if (IsLowerCase(ceStart.character)) {
2641	while (pos > `0` && IsLowerCase(CharacterAfter(pos).character))
2642	pos -= CharacterBefore(pos).widthBytes;
2643	if (!IsUpperCase(CharacterAfter(pos).character) && !IsLowerCase(CharacterAfter(pos).character))
2644	pos += CharacterAfter(pos).widthBytes;
2645	} else if (IsUpperCase(ceStart.character)) {
2646	while (pos > `0` && IsUpperCase(CharacterAfter(pos).character))
2647	pos -= CharacterBefore(pos).widthBytes;
2648	if (!IsUpperCase(CharacterAfter(pos).character))
2649	pos += CharacterAfter(pos).widthBytes;
2650	} else if (IsADigit(ceStart.character)) {
2651	while (pos > `0` && IsADigit(CharacterAfter(pos).character))
2652	pos -= CharacterBefore(pos).widthBytes;
2653	if (!IsADigit(CharacterAfter(pos).character))
2654	pos += CharacterAfter(pos).widthBytes;
2655	} else if (IsPunctuation(ceStart.character)) {
2656	while (pos > `0` && IsPunctuation(CharacterAfter(pos).character))
2657	pos -= CharacterBefore(pos).widthBytes;
2658	if (!IsPunctuation(CharacterAfter(pos).character))
2659	pos += CharacterAfter(pos).widthBytes;
2660	} else if (IsASpace(ceStart.character)) {
2661	while (pos > `0` && IsASpace(CharacterAfter(pos).character))
2662	pos -= CharacterBefore(pos).widthBytes;
2663	if (!IsASpace(CharacterAfter(pos).character))
2664	pos += CharacterAfter(pos).widthBytes;
2665	} else if (!IsASCII(ceStart.character)) {
2666	while (pos > `0` && !IsASCII(CharacterAfter(pos).character))
2667	pos -= CharacterBefore(pos).widthBytes;
2668	if (IsASCII(CharacterAfter(pos).character))
2669	pos += CharacterAfter(pos).widthBytes;
2670	} else {
2671	pos += CharacterAfter(pos).widthBytes;
2672	}
2673	}
2674	}
2675	return pos;
2676	}
2677
2678	Sci::Position Document::WordPartRight(Sci::Position pos) const {
2679	CharacterExtracted ceStart = CharacterAfter(pos);
2680	const Sci::Position length = LengthNoExcept();
2681	if (IsWordPartSeparator(ceStart.character)) {
2682	while (pos < length && IsWordPartSeparator(CharacterAfter(pos).character))
2683	pos += CharacterAfter(pos).widthBytes;
2684	ceStart = CharacterAfter(pos);
2685	}
2686	if (!IsASCII(ceStart.character)) {
2687	while (pos < length && !IsASCII(CharacterAfter(pos).character))
2688	pos += CharacterAfter(pos).widthBytes;
2689	} else if (IsLowerCase(ceStart.character)) {
2690	while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2691	pos += CharacterAfter(pos).widthBytes;
2692	} else if (IsUpperCase(ceStart.character)) {
2693	if (IsLowerCase(CharacterAfter(pos + ceStart.widthBytes).character)) {
2694	pos += CharacterAfter(pos).widthBytes;
2695	while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2696	pos += CharacterAfter(pos).widthBytes;
2697	} else {
2698	while (pos < length && IsUpperCase(CharacterAfter(pos).character))
2699	pos += CharacterAfter(pos).widthBytes;
2700	}
2701	if (IsLowerCase(CharacterAfter(pos).character) && IsUpperCase(CharacterBefore(pos).character))
2702	pos -= CharacterBefore(pos).widthBytes;
2703	} else if (IsADigit(ceStart.character)) {
2704	while (pos < length && IsADigit(CharacterAfter(pos).character))
2705	pos += CharacterAfter(pos).widthBytes;
2706	} else if (IsPunctuation(ceStart.character)) {
2707	while (pos < length && IsPunctuation(CharacterAfter(pos).character))
2708	pos += CharacterAfter(pos).widthBytes;
2709	} else if (IsASpace(ceStart.character)) {
2710	while (pos < length && IsASpace(CharacterAfter(pos).character))
2711	pos += CharacterAfter(pos).widthBytes;
2712	} else {
2713	pos += CharacterAfter(pos).widthBytes;
2714	}
2715	return pos;
2716	}
2717
2718	Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) noexcept {
2719	const char sStart = cb.StyleAt(pos);
2720	if (delta < `0`) {
2721	while (pos > `0` && (cb.StyleAt(pos) == sStart) && (!singleLine \|\| !IsEOLCharacter(cb.CharAt(pos))))
2722	pos--;
2723	pos++;
2724	} else {
2725	while (pos < (LengthNoExcept()) && (cb.StyleAt(pos) == sStart) && (!singleLine \|\| !IsEOLCharacter(cb.CharAt(pos))))
2726	pos++;
2727	}
2728	return pos;
2729	}
2730
2731	static char BraceOpposite(char ch) noexcept {
2732	switch (ch) {
2733	case `'('`:
2734	return `')'`;
2735	case `')'`:
2736	return `'('`;
2737	case `'['`:
2738	return `']'`;
2739	case `']'`:
2740	return `'['`;
2741	case `'{'`:
2742	return `'}'`;
2743	case `'}'`:
2744	return `'{'`;
2745	case `'<'`:
2746	return `'>'`;
2747	case `'>'`:
2748	return `'<'`;
2749	default:
2750	return `'\0'`;
2751	}
2752	}
2753
2754	// TODO: should be able to extend styled region to find matching brace
2755	Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /maxReStyle/, Sci::Position startPos, bool useStartPos) noexcept {
2756	const char chBrace = CharAt(position);
2757	const char chSeek = BraceOpposite(chBrace);
2758	if (chSeek == `'\0'`)
2759	return - `1`;
2760	const int styBrace = StyleIndexAt(position);
2761	int direction = -`1`;
2762	if (chBrace == `'('` \|\| chBrace == `'['` \|\| chBrace == `'{'` \|\| chBrace == `'<'`)
2763	direction = `1`;
2764	int depth = `1`;
2765	position = useStartPos ? startPos : NextPosition(position, direction);
2766	while ((position >= `0`) && (position < LengthNoExcept())) {
2767	const char chAtPos = CharAt(position);
2768	const int styAtPos = StyleIndexAt(position);
2769	if ((position > GetEndStyled()) \|\| (styAtPos == styBrace)) {
2770	if (chAtPos == chBrace)
2771	depth++;
2772	if (chAtPos == chSeek)
2773	depth--;
2774	if (depth == `0`)
2775	return position;
2776	}
2777	const Sci::Position positionBeforeMove = position;
2778	position = NextPosition(position, direction);
2779	if (position == positionBeforeMove)
2780	break;
2781	}
2782	return - `1`;
2783	}
2784
2785	/**
2786	* Implementation of RegexSearchBase for the default built-in regular expression engine
2787	*/
2788	class BuiltinRegex : public RegexSearchBase {
2789	public:
2790	explicit BuiltinRegex(CharClassify *charClassTable) : search (charClassTable) {}
2791
2792	Sci::Position FindText(Document doc, Sci::Position minPos, Sci::Position maxPos, const* char *s,
2793	bool caseSensitive, bool word, bool wordStart, FindOption flags,
2794	Sci::Position *length) override;
2795
2796	const char SubstituteByPosition(Document doc, const char text, Sci::Position length) override;
2797
2798	private:
2799	RESearch search;
2800	std::string substituted;
2801	};
2802
2803	namespace {
2804
2805	/**
2806	* RESearchRange keeps track of search range.
2807	*/
2808	class RESearchRange {
2809	public:
2810	const Document *doc;
2811	int increment;
2812	Sci::Position startPos;
2813	Sci::Position endPos;
2814	Sci::Line lineRangeStart;
2815	Sci::Line lineRangeEnd;
2816	Sci::Line lineRangeBreak;
2817	RESearchRange(const Document doc_, Sci::Position minPos, Sci::Position maxPos) noexcept* : doc(doc_) {
2818	increment = (minPos <= maxPos) ? `1` : -`1`;
2819
2820	// Range endpoints should not be inside DBCS characters or between a CR and LF,
2821	// but just in case, move them.
2822	startPos = doc->MovePositionOutsideChar(minPos, `1`, true);
2823	endPos = doc->MovePositionOutsideChar(maxPos, `1`, true);
2824
2825	lineRangeStart = doc->SciLineFromPosition(startPos);
2826	lineRangeEnd = doc->SciLineFromPosition(endPos);
2827	lineRangeBreak = lineRangeEnd + increment;
2828	}
2829	Range LineRange(Sci::Line line) const {
2830	Range range(doc->LineStart(line), doc->LineEnd(line));
2831	if (increment == `1`) {
2832	if (line == lineRangeStart)
2833	range.start = startPos;
2834	if (line == lineRangeEnd)
2835	range.end = endPos;
2836	} else {
2837	if (line == lineRangeEnd)
2838	range.start = endPos;
2839	if (line == lineRangeStart)
2840	range.end = startPos;
2841	}
2842	return range;
2843	}
2844	};
2845
2846	// Define a way for the Regular Expression code to access the document
2847	class DocumentIndexer : public CharacterIndexer {
2848	Document *pdoc;
2849	Sci::Position end;
2850	public:
2851	DocumentIndexer(Document pdoc_, Sci::Position end_) noexcept* :
2852	pdoc(pdoc_), end(end_) {
2853	}
2854
2855	DocumentIndexer(const DocumentIndexer &) = delete;
2856	DocumentIndexer(DocumentIndexer &&) = delete;
2857	DocumentIndexer &operator=(const DocumentIndexer &) = delete;
2858	DocumentIndexer &operator=(DocumentIndexer &&) = delete;
2859
2860	~DocumentIndexer() override = default;
2861
2862	char CharAt(Sci::Position index) const noexcept override {
2863	if (index < `0` \|\| index >= end)
2864	return `0`;
2865	else
2866	return pdoc->CharAt(index);
2867	}
2868	};
2869
2870	#ifndef NO_CXX11_REGEX
2871
2872	class ByteIterator {
2873	public:
2874	using iterator_category = std::bidirectional_iterator_tag;
2875	using value_type = char;
2876	using difference_type = ptrdiff_t;
2877	using pointer = char*;
2878	using reference = char&;
2879
2880	const Document *doc;
2881	Sci::Position position;
2882
2883	explicit ByteIterator(const Document doc_=nullptr, Sci::Position position_=`0`) noexcept* :
2884	doc(doc_), position(position_) {
2885	}
2886	char operator() const* noexcept {
2887	return doc->CharAt(position);
2888	}
2889	ByteIterator &operator++() noexcept {
2890	position++;
2891	return *this;
2892	}
2893	ByteIterator operator++(int) noexcept {
2894	ByteIterator retVal(*this);
2895	position++;
2896	return retVal;
2897	}
2898	ByteIterator &operator--() noexcept {
2899	position--;
2900	return *this;
2901	}
2902	bool operator==(const ByteIterator &other) const noexcept {
2903	return doc == other.doc && position == other.position;
2904	}
2905	bool operator!=(const ByteIterator &other) const noexcept {
2906	return doc != other.doc \|\| position != other.position;
2907	}
2908	Sci::Position Pos() const noexcept {
2909	return position;
2910	}
2911	Sci::Position PosRoundUp() const noexcept {
2912	return position;
2913	}
2914	};
2915
2916	// On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2917	// Would be better to use sizeof(wchar_t) or similar to differentiate
2918	// but easier for now to hard-code platforms.
2919	// C++11 has char16_t and char32_t but neither Clang nor Visual C++
2920	// appear to allow specializing basic_regex over these.
2921
2922	#ifdef _WIN32
2923	#define WCHAR_T_IS_16 1
2924	#else
2925	#define WCHAR_T_IS_16 0
2926	#endif
2927
2928	#if WCHAR_T_IS_16
2929
2930	// On Windows, report non-BMP characters as 2 separate surrogates as that
2931	// matches wregex since it is based on wchar_t.
2932	class UTF8Iterator {
2933	// These 3 fields determine the iterator position and are used for comparisons
2934	const Document *doc;
2935	Sci::Position position;
2936	size_t characterIndex;
2937	// Remaining fields are derived from the determining fields so are excluded in comparisons
2938	unsigned int lenBytes;
2939	size_t lenCharacters;
2940	wchar_t buffered[`2`];
2941	public:
2942	using iterator_category = std::bidirectional_iterator_tag;
2943	using value_type = wchar_t;
2944	using difference_type = ptrdiff_t;
2945	using pointer = wchar_t*;
2946	using reference = wchar_t&;
2947
2948	explicit UTF8Iterator(const Document doc_=nullptr, Sci::Position position_=`0`) noexcept* :
2949	doc(doc_), position(position_), characterIndex(`0`), lenBytes(`0`), lenCharacters(`0`), buffered{} {
2950	buffered[`0`] = `0`;
2951	buffered[`1`] = `0`;
2952	if (doc) {
2953	ReadCharacter();
2954	}
2955	}
2956	wchar_t operator() const* noexcept {
2957	assert(lenCharacters != `0`);
2958	return buffered[characterIndex];
2959	}
2960	UTF8Iterator &operator++() noexcept {
2961	if ((characterIndex + `1`) < (lenCharacters)) {
2962	characterIndex++;
2963	} else {
2964	position += lenBytes;
2965	ReadCharacter();
2966	characterIndex = `0`;
2967	}
2968	return *this;
2969	}
2970	UTF8Iterator operator++(int) noexcept {
2971	UTF8Iterator retVal(*this);
2972	if ((characterIndex + `1`) < (lenCharacters)) {
2973	characterIndex++;
2974	} else {
2975	position += lenBytes;
2976	ReadCharacter();
2977	characterIndex = `0`;
2978	}
2979	return retVal;
2980	}
2981	UTF8Iterator &operator--() noexcept {
2982	if (characterIndex) {
2983	characterIndex--;
2984	} else {
2985	position = doc->NextPosition(position, -`1`);
2986	ReadCharacter();
2987	characterIndex = lenCharacters - `1`;
2988	}
2989	return *this;
2990	}
2991	bool operator==(const UTF8Iterator &other) const noexcept {
2992	// Only test the determining fields, not the character widths and values derived from this
2993	return doc == other.doc &&
2994	position == other.position &&
2995	characterIndex == other.characterIndex;
2996	}
2997	bool operator!=(const UTF8Iterator &other) const noexcept {
2998	// Only test the determining fields, not the character widths and values derived from this
2999	return doc != other.doc \|\|
3000	position != other.position \|\|
3001	characterIndex != other.characterIndex;
3002	}
3003	Sci::Position Pos() const noexcept {
3004	return position;
3005	}
3006	Sci::Position PosRoundUp() const noexcept {
3007	if (characterIndex)
3008	return position + lenBytes; // Force to end of character
3009	else
3010	return position;
3011	}
3012	private:
3013	void ReadCharacter() noexcept {
3014	const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
3015	lenBytes = charExtracted.widthBytes;
3016	if (charExtracted.character == unicodeReplacementChar) {
3017	lenCharacters = `1`;
3018	buffered[`0`] = static_cast<wchar_t>(charExtracted.character);
3019	} else {
3020	lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
3021	}
3022	}
3023	};
3024
3025	#else
3026
3027	// On Unix, report non-BMP characters as single characters
3028
3029	class UTF8Iterator {
3030	const Document *doc;
3031	Sci::Position position;
3032	public:
3033	using iterator_category = std::bidirectional_iterator_tag;
3034	using value_type = wchar_t;
3035	using difference_type = ptrdiff_t;
3036	using pointer = wchar_t*;
3037	using reference = wchar_t&;
3038
3039	explicit UTF8Iterator(const Document doc_=nullptr, Sci::Position position_=`0`) noexcept* :
3040	doc(doc_), position(position_) {
3041	}
3042	wchar_t operator() const* noexcept {
3043	const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
3044	return charExtracted.character;
3045	}
3046	UTF8Iterator &operator++() noexcept {
3047	position = doc->NextPosition(position, `1`);
3048	return *this;
3049	}
3050	UTF8Iterator operator++(int) noexcept {
3051	UTF8Iterator retVal(*this);
3052	position = doc->NextPosition(position, `1`);
3053	return retVal;
3054	}
3055	UTF8Iterator &operator--() noexcept {
3056	position = doc->NextPosition(position, -`1`);
3057	return *this;
3058	}
3059	bool operator==(const UTF8Iterator &other) const noexcept {
3060	return doc == other.doc && position == other.position;
3061	}
3062	bool operator!=(const UTF8Iterator &other) const noexcept {
3063	return doc != other.doc \|\| position != other.position;
3064	}
3065	Sci::Position Pos() const noexcept {
3066	return position;
3067	}
3068	Sci::Position PosRoundUp() const noexcept {
3069	return position;
3070	}
3071	};
3072
3073	#endif
3074
3075	std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos) {
3076	std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
3077	if (!doc->IsLineStartPosition(startPos))
3078	flagsMatch \|= std::regex_constants::match_not_bol;
3079	if (!doc->IsLineEndPosition(endPos))
3080	flagsMatch \|= std::regex_constants::match_not_eol;
3081	return flagsMatch;
3082	}
3083
3084	template<typename Iterator, typename Regex>
3085	bool MatchOnLines(const Document doc, const* Regex &regexp, const RESearchRange &resr, RESearch &search) {
3086	std::match_results<Iterator> match;
3087
3088	// MSVC and libc++ have problems with ^ and $ matching line ends inside a range.
3089	// CRLF line ends are also a problem as ^ and $ only treat LF as a line end.
3090	// The std::regex::multiline option was added to C++17 to improve behaviour but
3091	// has not been implemented by compiler runtimes with MSVC always in multiline
3092	// mode and libc++ and libstdc++ always in single-line mode.
3093	// If multiline regex worked well then the line by line iteration could be removed
3094	// for the forwards case and replaced with the following 4 lines:
3095	#ifdef REGEX_MULTILINE
3096	Iterator itStart(doc, resr.startPos);
3097	Iterator itEnd(doc, resr.endPos);
3098	const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos);
3099	const bool matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3100	#else
3101	// Line by line.
3102	bool matched = false;
3103	for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3104	const Range lineRange = resr.LineRange(line);
3105	Iterator itStart(doc, lineRange.start);
3106	Iterator itEnd(doc, lineRange.end);
3107	std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
3108	matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3109	// Check for the last match on this line.
3110	if (matched) {
3111	if (resr.increment == -`1`) {
3112	while (matched) {
3113	Iterator itNext(doc, match[`0`].second.PosRoundUp());
3114	flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
3115	std::match_results<Iterator> matchNext;
3116	matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
3117	if (matched) {
3118	if (match[`0`].first == match[`0`].second) {
3119	// Empty match means failure so exit
3120	return false;
3121	}
3122	match = matchNext;
3123	}
3124	}
3125	matched = true;
3126	}
3127	break;
3128	}
3129	}
3130	#endif
3131	if (matched) {
3132	for (size_t co = `0`; co < match.size() && co < RESearch::MAXTAG; co++) {
3133	search.bopat[co] = match[co].first.Pos();
3134	search.eopat[co] = match[co].second.PosRoundUp();
3135	const Sci::Position lenMatch = search.eopat[co] - search.bopat[co];
3136	search.pat[co].resize(lenMatch);
3137	for (Sci::Position iPos = `0`; iPos < lenMatch; iPos++) {
3138	search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
3139	}
3140	}
3141	}
3142	return matched;
3143	}
3144
3145	Sci::Position Cxx11RegexFindText(const Document doc, Sci::Position minPos, Sci::Position maxPos, const* char *s,
3146	bool caseSensitive, Sci::Position *length, RESearch &search) {
3147	const RESearchRange resr(doc, minPos, maxPos);
3148	try {
3149	//ElapsedPeriod ep;
3150	std::regex::flag_type flagsRe = std::regex::ECMAScript;
3151	// Flags that appear to have no effect:
3152	// \| std::regex::collate \| std::regex::extended;
3153	if (!caseSensitive)
3154	flagsRe = flagsRe \| std::regex::icase;
3155
3156	// Clear the RESearch so can fill in matches
3157	search.Clear();
3158
3159	bool matched = false;
3160	if (CpUtf8 == doc->dbcsCodePage) {
3161	const std::wstring ws = WStringFromUTF8(s);
3162	std::wregex regexp;
3163	regexp.assign(ws, flagsRe);
3164	matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
3165
3166	} else {
3167	std::regex regexp;
3168	regexp.assign(s, flagsRe);
3169	matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
3170	}
3171
3172	Sci::Position posMatch = -`1`;
3173	if (matched) {
3174	posMatch = search.bopat[`0`];
3175	*length = search.eopat[`0`] - search.bopat[`0`];
3176	}
3177	// Example - search in doc/ScintillaHistory.html for
3178	// [[:upper:]]eta[[:space:]]
3179	// On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
3180	//const double durSearch = ep.Duration(true);
3181	//Platform::DebugPrintf("Search:%9.6g \n", durSearch);
3182	return posMatch;
3183	} catch (std::regex_error &) {
3184	// Failed to create regular expression
3185	throw RegexError ();
3186	} catch (...) {
3187	// Failed in some other way
3188	return -`1`;
3189	}
3190	}
3191
3192	#endif
3193
3194	}
3195
3196	Sci::Position BuiltinRegex::FindText(Document doc, Sci::Position minPos, Sci::Position maxPos, const* char *s,
3197	bool caseSensitive, bool, bool, FindOption flags,
3198	Sci::Position *length) {
3199
3200	#ifndef NO_CXX11_REGEX
3201	if (FlagSet(flags, FindOption::Cxx11RegEx)) {
3202	return Cxx11RegexFindText(doc, minPos, maxPos, s,
3203	caseSensitive, length, search);
3204	}
3205	#endif
3206
3207	const RESearchRange resr(doc, minPos, maxPos);
3208
3209	const bool posix = FlagSet(flags, FindOption::Posix);
3210
3211	const char errmsg = search.Compile(s, length, caseSensitive, posix);
3212	if (errmsg) {
3213	return -`1`;
3214	}
3215	// Find a variable in a property file: \$($[A-Za-z0-9_.]+$)
3216	// Replace first '.' with '-' in each property file variable reference:
3217	// Search: \$($[A-Za-z0-9_-]+$\.$[A-Za-z0-9_.]+$)
3218	// Replace: $(\1-\2)
3219	Sci::Position pos = -`1`;
3220	Sci::Position lenRet = `0`;
3221	const bool searchforLineStart = s[`0`] == `'^'`;
3222	const char searchEnd = s[*length - `1`];
3223	const char searchEndPrev = (length > `1`) ? s[length - `2`] : `'\0'`;
3224	const bool searchforLineEnd = (searchEnd == `'$'`) && (searchEndPrev != `'\\'`);
3225	for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3226	Sci::Position startOfLine = doc->LineStart(line);
3227	Sci::Position endOfLine = doc->LineEnd(line);
3228	if (resr.increment == `1`) {
3229	if (line == resr.lineRangeStart) {
3230	if ((resr.startPos != startOfLine) && searchforLineStart)
3231	continue; // Can't match start of line if start position after start of line
3232	startOfLine = resr.startPos;
3233	}
3234	if (line == resr.lineRangeEnd) {
3235	if ((resr.endPos != endOfLine) && searchforLineEnd)
3236	continue; // Can't match end of line if end position before end of line
3237	endOfLine = resr.endPos;
3238	}
3239	} else {
3240	if (line == resr.lineRangeEnd) {
3241	if ((resr.endPos != startOfLine) && searchforLineStart)
3242	continue; // Can't match start of line if end position after start of line
3243	startOfLine = resr.endPos;
3244	}
3245	if (line == resr.lineRangeStart) {
3246	if ((resr.startPos != endOfLine) && searchforLineEnd)
3247	continue; // Can't match end of line if start position before end of line
3248	endOfLine = resr.startPos;
3249	}
3250	}
3251
3252	const DocumentIndexer di(doc, endOfLine);
3253	int success = search.Execute(di, startOfLine, endOfLine);
3254	if (success) {
3255	pos = search.bopat[`0`];
3256	// Ensure only whole characters selected
3257	search.eopat[`0`] = doc->MovePositionOutsideChar(search.eopat[`0`], `1`, false);
3258	lenRet = search.eopat[`0`] - search.bopat[`0`];
3259	// There can be only one start of a line, so no need to look for last match in line
3260	if ((resr.increment == -`1`) && !searchforLineStart) {
3261	// Check for the last match on this line.
3262	int repetitions = `1000`; // Break out of infinite loop
3263	while (success && (search.eopat[`0`] <= endOfLine) && (repetitions--)) {
3264	success = search.Execute(di, pos+`1`, endOfLine);
3265	if (success) {
3266	if (search.eopat[`0`] <= minPos) {
3267	pos = search.bopat[`0`];
3268	lenRet = search.eopat[`0`] - search.bopat[`0`];
3269	} else {
3270	success = `0`;
3271	}
3272	}
3273	}
3274	}
3275	break;
3276	}
3277	}
3278	*length = lenRet;
3279	return pos;
3280	}
3281
3282	const char BuiltinRegex::SubstituteByPosition(Document doc, const char text, Sci::Position length) {
3283	substituted.clear();
3284	const DocumentIndexer di(doc, doc->Length());
3285	search.GrabMatches(di);
3286	for (Sci::Position j = `0`; j < *length; j++) {
3287	if (text[j] == `'\\'`) {
3288	if (text[j + `1`] >= `'0'` && text[j + `1`] <= `'9'`) {
3289	const unsigned int patNum = text[j + `1`] - `'0'`;
3290	const Sci::Position len = search.eopat[patNum] - search.bopat[patNum];
3291	if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
3292	substituted.append(search.pat[patNum].c_str(), len);
3293	j++;
3294	} else {
3295	j++;
3296	switch (text[j]) {
3297	case `'a'`:
3298	substituted.push_back(`'\a'`);
3299	break;
3300	case `'b'`:
3301	substituted.push_back(`'\b'`);
3302	break;
3303	case `'f'`:
3304	substituted.push_back(`'\f'`);
3305	break;
3306	case `'n'`:
3307	substituted.push_back(`'\n'`);
3308	break;
3309	case `'r'`:
3310	substituted.push_back(`'\r'`);
3311	break;
3312	case `'t'`:
3313	substituted.push_back(`'\t'`);
3314	break;
3315	case `'v'`:
3316	substituted.push_back(`'\v'`);
3317	break;
3318	case `'\\'`:
3319	substituted.push_back(`'\\'`);
3320	break;
3321	default:
3322	substituted.push_back(`'\\'`);
3323	j--;
3324	}
3325	}
3326	} else {
3327	substituted.push_back(text[j]);
3328	}
3329	}
3330	*length = substituted.length();
3331	return substituted.c_str();
3332	}
3333
3334	#ifndef SCI_OWNREGEX
3335
3336	RegexSearchBase Scintilla::Internal::CreateRegexSearch(CharClassify charClassTable) {
3337	return new BuiltinRegex (charClassTable);
3338	}
3339
3340	#endif
3341

Browse the source code of DeepinIDE/3rdparty/unioncode-scintilla515/scintilla/src/Document.cxx