| 1 | // Scintilla source code edit control |
| 2 | /** @file Document.cxx |
| 3 | ** Text document that handles notifications, DBCS, styling, words and end of line. |
| 4 | **/ |
| 5 | // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org> |
| 6 | // The License.txt file describes the conditions under which this software may be distributed. |
| 7 | |
| 8 | #include <cstddef> |
| 9 | #include <cstdlib> |
| 10 | #include <cassert> |
| 11 | #include <cstring> |
| 12 | #include <cstdio> |
| 13 | #include <cmath> |
| 14 | |
| 15 | #include <stdexcept> |
| 16 | #include <string> |
| 17 | #include <string_view> |
| 18 | #include <vector> |
| 19 | #include <forward_list> |
| 20 | #include <optional> |
| 21 | #include <algorithm> |
| 22 | #include <memory> |
| 23 | #include <chrono> |
| 24 | |
| 25 | #ifndef NO_CXX11_REGEX |
| 26 | #include <regex> |
| 27 | #endif |
| 28 | |
| 29 | #include "ScintillaTypes.h" |
| 30 | #include "ILoader.h" |
| 31 | #include "ILexer.h" |
| 32 | |
| 33 | #include "Debugging.h" |
| 34 | |
| 35 | #include "CharacterType.h" |
| 36 | #include "CharacterCategoryMap.h" |
| 37 | #include "Position.h" |
| 38 | #include "SplitVector.h" |
| 39 | #include "Partitioning.h" |
| 40 | #include "RunStyles.h" |
| 41 | #include "CellBuffer.h" |
| 42 | #include "PerLine.h" |
| 43 | #include "CharClassify.h" |
| 44 | #include "Decoration.h" |
| 45 | #include "CaseFolder.h" |
| 46 | #include "Document.h" |
| 47 | #include "RESearch.h" |
| 48 | #include "UniConversion.h" |
| 49 | #include "ElapsedPeriod.h" |
| 50 | |
| 51 | using namespace Scintilla; |
| 52 | using namespace Scintilla::Internal; |
| 53 | |
| 54 | LexInterface::LexInterface(Document *pdoc_) noexcept : pdoc(pdoc_), performingStyle(false) { |
| 55 | } |
| 56 | |
| 57 | LexInterface::~LexInterface() noexcept = default; |
| 58 | |
| 59 | void LexInterface::SetInstance(ILexer5 *instance_) { |
| 60 | instance.reset(instance_); |
| 61 | pdoc->LexerChanged(); |
| 62 | } |
| 63 | |
| 64 | void LexInterface::Colourise(Sci::Position start, Sci::Position end) { |
| 65 | if (pdoc && instance && !performingStyle) { |
| 66 | // Protect against reentrance, which may occur, for example, when |
| 67 | // fold points are discovered while performing styling and the folding |
| 68 | // code looks for child lines which may trigger styling. |
| 69 | performingStyle = true; |
| 70 | |
| 71 | const Sci::Position lengthDoc = pdoc->Length(); |
| 72 | if (end == -1) |
| 73 | end = lengthDoc; |
| 74 | const Sci::Position len = end - start; |
| 75 | |
| 76 | PLATFORM_ASSERT(len >= 0); |
| 77 | PLATFORM_ASSERT(start + len <= lengthDoc); |
| 78 | |
| 79 | int styleStart = 0; |
| 80 | if (start > 0) |
| 81 | styleStart = pdoc->StyleAt(start - 1); |
| 82 | |
| 83 | if (len > 0) { |
| 84 | instance->Lex(start, len, styleStart, pdoc); |
| 85 | instance->Fold(start, len, styleStart, pdoc); |
| 86 | } |
| 87 | |
| 88 | performingStyle = false; |
| 89 | } |
| 90 | } |
| 91 | |
| 92 | LineEndType LexInterface::LineEndTypesSupported() { |
| 93 | if (instance) { |
| 94 | return static_cast<LineEndType>(instance->LineEndTypesSupported()); |
| 95 | } |
| 96 | return LineEndType::Default; |
| 97 | } |
| 98 | |
| 99 | bool LexInterface::UseContainerLexing() const noexcept { |
| 100 | return !instance; |
| 101 | } |
| 102 | |
| 103 | ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept : |
| 104 | duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) { |
| 105 | } |
| 106 | |
| 107 | void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept { |
| 108 | // Only adjust for multiple actions to avoid instability |
| 109 | if (numberActions < 8) |
| 110 | return; |
| 111 | |
| 112 | // Alpha value for exponential smoothing. |
| 113 | // Most recent value contributes 25% to smoothed value. |
| 114 | constexpr double alpha = 0.25; |
| 115 | |
| 116 | const double durationOne = durationOfActions / numberActions; |
| 117 | duration = std::clamp(alpha * durationOne + (1.0 - alpha) * duration, |
| 118 | minDuration, maxDuration); |
| 119 | } |
| 120 | |
| 121 | double ActionDuration::Duration() const noexcept { |
| 122 | return duration; |
| 123 | } |
| 124 | |
| 125 | size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcept { |
| 126 | return std::lround(secondsAllowed / Duration()); |
| 127 | } |
| 128 | |
| 129 | Document::Document(DocumentOption options) : |
| 130 | cb(!FlagSet(options, DocumentOption::StylesNone), FlagSet(options, DocumentOption::TextLarge)), |
| 131 | durationStyleOneByte(0.000001, 0.0000001, 0.00001) { |
| 132 | refCount = 0; |
| 133 | #ifdef _WIN32 |
| 134 | eolMode = EndOfLine::CrLf; |
| 135 | #else |
| 136 | eolMode = EndOfLine::Lf; |
| 137 | #endif |
| 138 | dbcsCodePage = CpUtf8; |
| 139 | lineEndBitSet = LineEndType::Default; |
| 140 | endStyled = 0; |
| 141 | styleClock = 0; |
| 142 | enteredModification = 0; |
| 143 | enteredStyling = 0; |
| 144 | enteredReadOnlyCount = 0; |
| 145 | insertionSet = false; |
| 146 | tabInChars = 8; |
| 147 | indentInChars = 0; |
| 148 | actualIndentInChars = 8; |
| 149 | useTabs = true; |
| 150 | tabIndents = true; |
| 151 | backspaceUnindents = false; |
| 152 | |
| 153 | matchesValid = false; |
| 154 | |
| 155 | perLineData[ldMarkers] = std::make_unique<LineMarkers>(); |
| 156 | perLineData[ldLevels] = std::make_unique<LineLevels>(); |
| 157 | perLineData[ldState] = std::make_unique<LineState>(); |
| 158 | perLineData[ldMargin] = std::make_unique<LineAnnotation>(); |
| 159 | perLineData[ldAnnotation] = std::make_unique<LineAnnotation>(); |
| 160 | perLineData[ldEOLAnnotation] = std::make_unique<LineAnnotation>(); |
| 161 | |
| 162 | decorations = DecorationListCreate(IsLarge()); |
| 163 | |
| 164 | cb.SetPerLine(this); |
| 165 | cb.SetUTF8Substance(CpUtf8 == dbcsCodePage); |
| 166 | } |
| 167 | |
| 168 | Document::~Document() { |
| 169 | for (const WatcherWithUserData &watcher : watchers) { |
| 170 | watcher.watcher->NotifyDeleted(this, watcher.userData); |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | // Increase reference count and return its previous value. |
| 175 | int Document::AddRef() { |
| 176 | return refCount++; |
| 177 | } |
| 178 | |
| 179 | // Decrease reference count and return its previous value. |
| 180 | // Delete the document if reference count reaches zero. |
| 181 | int SCI_METHOD Document::Release() { |
| 182 | const int curRefCount = --refCount; |
| 183 | if (curRefCount == 0) |
| 184 | delete this; |
| 185 | return curRefCount; |
| 186 | } |
| 187 | |
| 188 | void Document::Init() { |
| 189 | for (const std::unique_ptr<PerLine> &pl : perLineData) { |
| 190 | if (pl) |
| 191 | pl->Init(); |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | void Document::InsertLine(Sci::Line line) { |
| 196 | for (const std::unique_ptr<PerLine> &pl : perLineData) { |
| 197 | if (pl) |
| 198 | pl->InsertLine(line); |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | void Document::InsertLines(Sci::Line line, Sci::Line lines) { |
| 203 | for (const auto &pl : perLineData) { |
| 204 | if (pl) |
| 205 | pl->InsertLines(line, lines); |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | void Document::RemoveLine(Sci::Line line) { |
| 210 | for (const std::unique_ptr<PerLine> &pl : perLineData) { |
| 211 | if (pl) |
| 212 | pl->RemoveLine(line); |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | LineMarkers *Document::Markers() const noexcept { |
| 217 | return dynamic_cast<LineMarkers *>(perLineData[ldMarkers].get()); |
| 218 | } |
| 219 | |
| 220 | LineLevels *Document::Levels() const noexcept { |
| 221 | return dynamic_cast<LineLevels *>(perLineData[ldLevels].get()); |
| 222 | } |
| 223 | |
| 224 | LineState *Document::States() const noexcept { |
| 225 | return dynamic_cast<LineState *>(perLineData[ldState].get()); |
| 226 | } |
| 227 | |
| 228 | LineAnnotation *Document::Margins() const noexcept { |
| 229 | return dynamic_cast<LineAnnotation *>(perLineData[ldMargin].get()); |
| 230 | } |
| 231 | |
| 232 | LineAnnotation *Document::Annotations() const noexcept { |
| 233 | return dynamic_cast<LineAnnotation *>(perLineData[ldAnnotation].get()); |
| 234 | } |
| 235 | |
| 236 | LineAnnotation *Document::EOLAnnotations() const noexcept { |
| 237 | return dynamic_cast<LineAnnotation *>(perLineData[ldEOLAnnotation].get()); |
| 238 | } |
| 239 | |
| 240 | LineEndType Document::LineEndTypesSupported() const { |
| 241 | if ((CpUtf8 == dbcsCodePage) && pli) |
| 242 | return pli->LineEndTypesSupported(); |
| 243 | else |
| 244 | return LineEndType::Default; |
| 245 | } |
| 246 | |
| 247 | bool Document::SetDBCSCodePage(int dbcsCodePage_) { |
| 248 | if (dbcsCodePage != dbcsCodePage_) { |
| 249 | dbcsCodePage = dbcsCodePage_; |
| 250 | SetCaseFolder(nullptr); |
| 251 | cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported()); |
| 252 | cb.SetUTF8Substance(CpUtf8 == dbcsCodePage); |
| 253 | ModifiedAt(0); // Need to restyle whole document |
| 254 | return true; |
| 255 | } else { |
| 256 | return false; |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | bool Document::SetLineEndTypesAllowed(LineEndType lineEndBitSet_) { |
| 261 | if (lineEndBitSet != lineEndBitSet_) { |
| 262 | lineEndBitSet = lineEndBitSet_; |
| 263 | const LineEndType lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported(); |
| 264 | if (lineEndBitSetActive != cb.GetLineEndTypes()) { |
| 265 | ModifiedAt(0); |
| 266 | cb.SetLineEndTypes(lineEndBitSetActive); |
| 267 | return true; |
| 268 | } else { |
| 269 | return false; |
| 270 | } |
| 271 | } else { |
| 272 | return false; |
| 273 | } |
| 274 | } |
| 275 | |
| 276 | void Document::SetSavePoint() { |
| 277 | cb.SetSavePoint(); |
| 278 | NotifySavePoint(true); |
| 279 | } |
| 280 | |
| 281 | void Document::TentativeUndo() { |
| 282 | if (!TentativeActive()) |
| 283 | return; |
| 284 | CheckReadOnly(); |
| 285 | if (enteredModification == 0) { |
| 286 | enteredModification++; |
| 287 | if (!cb.IsReadOnly()) { |
| 288 | const bool startSavePoint = cb.IsSavePoint(); |
| 289 | bool multiLine = false; |
| 290 | const int steps = cb.TentativeSteps(); |
| 291 | //Platform::DebugPrintf("Steps=%d\n", steps); |
| 292 | for (int step = 0; step < steps; step++) { |
| 293 | const Sci::Line prevLinesTotal = LinesTotal(); |
| 294 | const Action &action = cb.GetUndoStep(); |
| 295 | if (action.at == ActionType::remove) { |
| 296 | NotifyModified(DocModification( |
| 297 | ModificationFlags::BeforeInsert | ModificationFlags::Undo, action)); |
| 298 | } else if (action.at == ActionType::container) { |
| 299 | DocModification dm(ModificationFlags::Container | ModificationFlags::Undo); |
| 300 | dm.token = action.position; |
| 301 | NotifyModified(dm); |
| 302 | } else { |
| 303 | NotifyModified(DocModification( |
| 304 | ModificationFlags::BeforeDelete | ModificationFlags::Undo, action)); |
| 305 | } |
| 306 | cb.PerformUndoStep(); |
| 307 | if (action.at != ActionType::container) { |
| 308 | ModifiedAt(action.position); |
| 309 | } |
| 310 | |
| 311 | ModificationFlags modFlags = ModificationFlags::Undo; |
| 312 | // With undo, an insertion action becomes a deletion notification |
| 313 | if (action.at == ActionType::remove) { |
| 314 | modFlags |= ModificationFlags::InsertText; |
| 315 | } else if (action.at == ActionType::insert) { |
| 316 | modFlags |= ModificationFlags::DeleteText; |
| 317 | } |
| 318 | if (steps > 1) |
| 319 | modFlags |= ModificationFlags::MultiStepUndoRedo; |
| 320 | const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; |
| 321 | if (linesAdded != 0) |
| 322 | multiLine = true; |
| 323 | if (step == steps - 1) { |
| 324 | modFlags |= ModificationFlags::LastStepInUndoRedo; |
| 325 | if (multiLine) |
| 326 | modFlags |= ModificationFlags::MultilineUndoRedo; |
| 327 | } |
| 328 | NotifyModified(DocModification(modFlags, action.position, action.lenData, |
| 329 | linesAdded, action.data.get())); |
| 330 | } |
| 331 | |
| 332 | const bool endSavePoint = cb.IsSavePoint(); |
| 333 | if (startSavePoint != endSavePoint) |
| 334 | NotifySavePoint(endSavePoint); |
| 335 | |
| 336 | cb.TentativeCommit(); |
| 337 | } |
| 338 | enteredModification--; |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | int Document::GetMark(Sci::Line line) const noexcept { |
| 343 | return Markers()->MarkValue(line); |
| 344 | } |
| 345 | |
| 346 | Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept { |
| 347 | return Markers()->MarkerNext(lineStart, mask); |
| 348 | } |
| 349 | |
| 350 | int Document::AddMark(Sci::Line line, int markerNum) { |
| 351 | if (line >= 0 && line <= LinesTotal()) { |
| 352 | const int prev = Markers()->AddMark(line, markerNum, LinesTotal()); |
| 353 | const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); |
| 354 | NotifyModified(mh); |
| 355 | return prev; |
| 356 | } else { |
| 357 | return -1; |
| 358 | } |
| 359 | } |
| 360 | |
| 361 | void Document::AddMarkSet(Sci::Line line, int valueSet) { |
| 362 | if (line < 0 || line > LinesTotal()) { |
| 363 | return; |
| 364 | } |
| 365 | unsigned int m = valueSet; |
| 366 | for (int i = 0; m; i++, m >>= 1) { |
| 367 | if (m & 1) |
| 368 | Markers()->AddMark(line, i, LinesTotal()); |
| 369 | } |
| 370 | const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); |
| 371 | NotifyModified(mh); |
| 372 | } |
| 373 | |
| 374 | void Document::DeleteMark(Sci::Line line, int markerNum) { |
| 375 | Markers()->DeleteMark(line, markerNum, false); |
| 376 | const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); |
| 377 | NotifyModified(mh); |
| 378 | } |
| 379 | |
| 380 | void Document::DeleteMarkFromHandle(int markerHandle) { |
| 381 | Markers()->DeleteMarkFromHandle(markerHandle); |
| 382 | DocModification mh(ModificationFlags::ChangeMarker); |
| 383 | mh.line = -1; |
| 384 | NotifyModified(mh); |
| 385 | } |
| 386 | |
| 387 | void Document::DeleteAllMarks(int markerNum) { |
| 388 | bool someChanges = false; |
| 389 | for (Sci::Line line = 0; line < LinesTotal(); line++) { |
| 390 | if (Markers()->DeleteMark(line, markerNum, true)) |
| 391 | someChanges = true; |
| 392 | } |
| 393 | if (someChanges) { |
| 394 | DocModification mh(ModificationFlags::ChangeMarker); |
| 395 | mh.line = -1; |
| 396 | NotifyModified(mh); |
| 397 | } |
| 398 | } |
| 399 | |
| 400 | Sci::Line Document::LineFromHandle(int markerHandle) const noexcept { |
| 401 | return Markers()->LineFromHandle(markerHandle); |
| 402 | } |
| 403 | |
| 404 | int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept { |
| 405 | return Markers()->NumberFromLine(line, which); |
| 406 | } |
| 407 | |
| 408 | int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept { |
| 409 | return Markers()->HandleFromLine(line, which); |
| 410 | } |
| 411 | |
| 412 | Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const { |
| 413 | return cb.LineStart(line); |
| 414 | } |
| 415 | |
| 416 | bool Document::IsLineStartPosition(Sci::Position position) const { |
| 417 | return LineStart(LineFromPosition(position)) == position; |
| 418 | } |
| 419 | |
| 420 | Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const { |
| 421 | if (line >= LinesTotal() - 1) { |
| 422 | return LineStart(line + 1); |
| 423 | } else { |
| 424 | Sci::Position position = LineStart(line + 1); |
| 425 | if (LineEndType::Unicode == cb.GetLineEndTypes()) { |
| 426 | const unsigned char bytes[] = { |
| 427 | cb.UCharAt(position-3), |
| 428 | cb.UCharAt(position-2), |
| 429 | cb.UCharAt(position-1), |
| 430 | }; |
| 431 | if (UTF8IsSeparator(bytes)) { |
| 432 | return position - UTF8SeparatorLength; |
| 433 | } |
| 434 | if (UTF8IsNEL(bytes+1)) { |
| 435 | return position - UTF8NELLength; |
| 436 | } |
| 437 | } |
| 438 | position--; // Back over CR or LF |
| 439 | // When line terminator is CR+LF, may need to go back one more |
| 440 | if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) { |
| 441 | position--; |
| 442 | } |
| 443 | return position; |
| 444 | } |
| 445 | } |
| 446 | |
| 447 | void SCI_METHOD Document::SetErrorStatus(int status) { |
| 448 | // Tell the watchers an error has occurred. |
| 449 | for (const WatcherWithUserData &watcher : watchers) { |
| 450 | watcher.watcher->NotifyErrorOccurred(this, watcher.userData, static_cast<Status>(status)); |
| 451 | } |
| 452 | } |
| 453 | |
| 454 | Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const { |
| 455 | return cb.LineFromPosition(pos); |
| 456 | } |
| 457 | |
| 458 | Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept { |
| 459 | // Avoids casting in callers for this very common function |
| 460 | return cb.LineFromPosition(pos); |
| 461 | } |
| 462 | |
| 463 | Sci::Position Document::LineEndPosition(Sci::Position position) const { |
| 464 | return LineEnd(LineFromPosition(position)); |
| 465 | } |
| 466 | |
| 467 | bool Document::IsLineEndPosition(Sci::Position position) const { |
| 468 | return LineEnd(LineFromPosition(position)) == position; |
| 469 | } |
| 470 | |
| 471 | bool Document::IsPositionInLineEnd(Sci::Position position) const { |
| 472 | return position >= LineEnd(LineFromPosition(position)); |
| 473 | } |
| 474 | |
| 475 | Sci::Position Document::VCHomePosition(Sci::Position position) const { |
| 476 | const Sci::Line line = SciLineFromPosition(position); |
| 477 | const Sci::Position startPosition = LineStart(line); |
| 478 | const Sci::Position endLine = LineEnd(line); |
| 479 | Sci::Position startText = startPosition; |
| 480 | while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t')) |
| 481 | startText++; |
| 482 | if (position == startText) |
| 483 | return startPosition; |
| 484 | else |
| 485 | return startText; |
| 486 | } |
| 487 | |
| 488 | Sci::Position Document::IndexLineStart(Sci::Line line, LineCharacterIndexType lineCharacterIndex) const noexcept { |
| 489 | return cb.IndexLineStart(line, lineCharacterIndex); |
| 490 | } |
| 491 | |
| 492 | Sci::Line Document::LineFromPositionIndex(Sci::Position pos, LineCharacterIndexType lineCharacterIndex) const noexcept { |
| 493 | return cb.LineFromPositionIndex(pos, lineCharacterIndex); |
| 494 | } |
| 495 | |
| 496 | Sci::Line Document::LineFromPositionAfter(Sci::Line line, Sci::Position length) const noexcept { |
| 497 | const Sci::Position posAfter = cb.LineStart(line) + length; |
| 498 | if (posAfter >= LengthNoExcept()) { |
| 499 | return LinesTotal(); |
| 500 | } |
| 501 | const Sci::Line lineAfter = SciLineFromPosition(posAfter); |
| 502 | if (lineAfter > line) { |
| 503 | return lineAfter; |
| 504 | } else { |
| 505 | // Want to make some progress so return next line |
| 506 | return lineAfter + 1; |
| 507 | } |
| 508 | } |
| 509 | |
| 510 | int SCI_METHOD Document::SetLevel(Sci_Position line, int level) { |
| 511 | const int prev = Levels()->SetLevel(line, level, LinesTotal()); |
| 512 | if (prev != level) { |
| 513 | DocModification mh(ModificationFlags::ChangeFold | ModificationFlags::ChangeMarker, |
| 514 | LineStart(line), 0, 0, nullptr, line); |
| 515 | mh.foldLevelNow = static_cast<FoldLevel>(level); |
| 516 | mh.foldLevelPrev = static_cast<FoldLevel>(prev); |
| 517 | NotifyModified(mh); |
| 518 | } |
| 519 | return prev; |
| 520 | } |
| 521 | |
| 522 | int SCI_METHOD Document::GetLevel(Sci_Position line) const { |
| 523 | return Levels()->GetLevel(line); |
| 524 | } |
| 525 | |
| 526 | FoldLevel Document::GetFoldLevel(Sci_Position line) const { |
| 527 | return static_cast<FoldLevel>(Levels()->GetLevel(line)); |
| 528 | } |
| 529 | |
| 530 | void Document::ClearLevels() { |
| 531 | Levels()->ClearLevels(); |
| 532 | } |
| 533 | |
| 534 | static bool IsSubordinate(FoldLevel levelStart, FoldLevel levelTry) noexcept { |
| 535 | if (LevelIsWhitespace(levelTry)) |
| 536 | return true; |
| 537 | else |
| 538 | return LevelNumber(levelStart) < LevelNumber(levelTry); |
| 539 | } |
| 540 | |
| 541 | Sci::Line Document::GetLastChild(Sci::Line lineParent, std::optional<FoldLevel> level, Sci::Line lastLine) { |
| 542 | const FoldLevel levelStart = LevelNumberPart(level ? *level : GetFoldLevel(lineParent)); |
| 543 | const Sci::Line maxLine = LinesTotal(); |
| 544 | const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1; |
| 545 | Sci::Line lineMaxSubord = lineParent; |
| 546 | while (lineMaxSubord < maxLine - 1) { |
| 547 | EnsureStyledTo(LineStart(lineMaxSubord + 2)); |
| 548 | if (!IsSubordinate(levelStart, GetFoldLevel(lineMaxSubord + 1))) |
| 549 | break; |
| 550 | if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) |
| 551 | break; |
| 552 | lineMaxSubord++; |
| 553 | } |
| 554 | if (lineMaxSubord > lineParent) { |
| 555 | if (levelStart > LevelNumberPart(GetFoldLevel(lineMaxSubord + 1))) { |
| 556 | // Have chewed up some whitespace that belongs to a parent so seek back |
| 557 | if (LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) { |
| 558 | lineMaxSubord--; |
| 559 | } |
| 560 | } |
| 561 | } |
| 562 | return lineMaxSubord; |
| 563 | } |
| 564 | |
| 565 | Sci::Line Document::GetFoldParent(Sci::Line line) const { |
| 566 | const FoldLevel level = LevelNumberPart(GetFoldLevel(line)); |
| 567 | Sci::Line lineLook = line - 1; |
| 568 | while ((lineLook > 0) && ( |
| 569 | (!LevelIsHeader(GetFoldLevel(lineLook))) || |
| 570 | (LevelNumberPart(GetFoldLevel(lineLook)) >= level)) |
| 571 | ) { |
| 572 | lineLook--; |
| 573 | } |
| 574 | if (LevelIsHeader(GetFoldLevel(lineLook)) && |
| 575 | (LevelNumberPart(GetFoldLevel(lineLook)) < level)) { |
| 576 | return lineLook; |
| 577 | } else { |
| 578 | return -1; |
| 579 | } |
| 580 | } |
| 581 | |
| 582 | void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) { |
| 583 | const FoldLevel level = GetFoldLevel(line); |
| 584 | const Sci::Line lookLastLine = std::max(line, lastLine) + 1; |
| 585 | |
| 586 | Sci::Line lookLine = line; |
| 587 | FoldLevel lookLineLevel = level; |
| 588 | FoldLevel lookLineLevelNum = LevelNumberPart(lookLineLevel); |
| 589 | while ((lookLine > 0) && (LevelIsWhitespace(lookLineLevel) || |
| 590 | (LevelIsHeader(lookLineLevel) && (lookLineLevelNum >= LevelNumberPart(GetFoldLevel(lookLine + 1)))))) { |
| 591 | lookLineLevel = GetFoldLevel(--lookLine); |
| 592 | lookLineLevelNum = LevelNumberPart(lookLineLevel); |
| 593 | } |
| 594 | |
| 595 | Sci::Line beginFoldBlock = LevelIsHeader(lookLineLevel) ? lookLine : GetFoldParent(lookLine); |
| 596 | if (beginFoldBlock == -1) { |
| 597 | highlightDelimiter.Clear(); |
| 598 | return; |
| 599 | } |
| 600 | |
| 601 | Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, {}, lookLastLine); |
| 602 | Sci::Line firstChangeableLineBefore = -1; |
| 603 | if (endFoldBlock < line) { |
| 604 | lookLine = beginFoldBlock - 1; |
| 605 | lookLineLevel = GetFoldLevel(lookLine); |
| 606 | lookLineLevelNum = LevelNumberPart(lookLineLevel); |
| 607 | while ((lookLine >= 0) && (lookLineLevelNum >= FoldLevel::Base)) { |
| 608 | if (LevelIsHeader(lookLineLevel)) { |
| 609 | if (GetLastChild(lookLine, {}, lookLastLine) == line) { |
| 610 | beginFoldBlock = lookLine; |
| 611 | endFoldBlock = line; |
| 612 | firstChangeableLineBefore = line - 1; |
| 613 | } |
| 614 | } |
| 615 | if ((lookLine > 0) && (lookLineLevelNum == FoldLevel::Base) && (LevelNumberPart(GetFoldLevel(lookLine - 1)) > lookLineLevelNum)) |
| 616 | break; |
| 617 | lookLineLevel = GetFoldLevel(--lookLine); |
| 618 | lookLineLevelNum = LevelNumberPart(lookLineLevel); |
| 619 | } |
| 620 | } |
| 621 | if (firstChangeableLineBefore == -1) { |
| 622 | for (lookLine = line - 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel); |
| 623 | lookLine >= beginFoldBlock; |
| 624 | lookLineLevel = GetFoldLevel(--lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) { |
| 625 | if (LevelIsWhitespace(lookLineLevel) || (lookLineLevelNum > LevelNumberPart(level))) { |
| 626 | firstChangeableLineBefore = lookLine; |
| 627 | break; |
| 628 | } |
| 629 | } |
| 630 | } |
| 631 | if (firstChangeableLineBefore == -1) |
| 632 | firstChangeableLineBefore = beginFoldBlock - 1; |
| 633 | |
| 634 | Sci::Line firstChangeableLineAfter = -1; |
| 635 | for (lookLine = line + 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel); |
| 636 | lookLine <= endFoldBlock; |
| 637 | lookLineLevel = GetFoldLevel(++lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) { |
| 638 | if (LevelIsHeader(lookLineLevel) && (lookLineLevelNum < LevelNumberPart(GetFoldLevel(lookLine + 1)))) { |
| 639 | firstChangeableLineAfter = lookLine; |
| 640 | break; |
| 641 | } |
| 642 | } |
| 643 | if (firstChangeableLineAfter == -1) |
| 644 | firstChangeableLineAfter = endFoldBlock + 1; |
| 645 | |
| 646 | highlightDelimiter.beginFoldBlock = beginFoldBlock; |
| 647 | highlightDelimiter.endFoldBlock = endFoldBlock; |
| 648 | highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore; |
| 649 | highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter; |
| 650 | } |
| 651 | |
| 652 | Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const noexcept { |
| 653 | return std::clamp<Sci::Position>(pos, 0, LengthNoExcept()); |
| 654 | } |
| 655 | |
| 656 | bool Document::IsCrLf(Sci::Position pos) const noexcept { |
| 657 | if (pos < 0) |
| 658 | return false; |
| 659 | if (pos >= (LengthNoExcept() - 1)) |
| 660 | return false; |
| 661 | return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n'); |
| 662 | } |
| 663 | |
| 664 | int Document::LenChar(Sci::Position pos) const noexcept { |
| 665 | if (pos < 0 || pos >= LengthNoExcept()) { |
| 666 | // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds. |
| 667 | return 1; |
| 668 | } else if (IsCrLf(pos)) { |
| 669 | return 2; |
| 670 | } |
| 671 | |
| 672 | const unsigned char leadByte = cb.UCharAt(pos); |
| 673 | if (!dbcsCodePage || UTF8IsAscii(leadByte)) { |
| 674 | // Common case: ASCII character |
| 675 | return 1; |
| 676 | } |
| 677 | if (CpUtf8 == dbcsCodePage) { |
| 678 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
| 679 | unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; |
| 680 | for (int b = 1; b < widthCharBytes; b++) { |
| 681 | charBytes[b] = cb.UCharAt(pos + b); |
| 682 | } |
| 683 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
| 684 | if (utf8status & UTF8MaskInvalid) { |
| 685 | // Treat as invalid and use up just one byte |
| 686 | return 1; |
| 687 | } else { |
| 688 | return utf8status & UTF8MaskWidth; |
| 689 | } |
| 690 | } else { |
| 691 | if (IsDBCSLeadByteNoExcept(leadByte) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1))) { |
| 692 | return 2; |
| 693 | } else { |
| 694 | return 1; |
| 695 | } |
| 696 | } |
| 697 | } |
| 698 | |
| 699 | bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept { |
| 700 | Sci::Position trail = pos; |
| 701 | while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1))) |
| 702 | trail--; |
| 703 | start = (trail > 0) ? trail-1 : trail; |
| 704 | |
| 705 | const unsigned char leadByte = cb.UCharAt(start); |
| 706 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
| 707 | if (widthCharBytes == 1) { |
| 708 | return false; |
| 709 | } else { |
| 710 | const int trailBytes = widthCharBytes - 1; |
| 711 | const Sci::Position len = pos - start; |
| 712 | if (len > trailBytes) |
| 713 | // pos too far from lead |
| 714 | return false; |
| 715 | unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; |
| 716 | for (Sci::Position b=1; b<widthCharBytes && ((start+b) < cb.Length()); b++) |
| 717 | charBytes[b] = cb.CharAt(start+b); |
| 718 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
| 719 | if (utf8status & UTF8MaskInvalid) |
| 720 | return false; |
| 721 | end = start + widthCharBytes; |
| 722 | return true; |
| 723 | } |
| 724 | } |
| 725 | |
| 726 | // Normalise a position so that it is not part way through a multi-byte character. |
| 727 | // This can occur in two situations - |
| 728 | // When lines are terminated with \r\n pairs which should be treated as one character. |
| 729 | // When displaying DBCS text such as Japanese. |
| 730 | // If moving, move the position in the indicated direction. |
| 731 | Sci::Position Document::MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir, bool checkLineEnd) const noexcept { |
| 732 | //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir); |
| 733 | // If out of range, just return minimum/maximum value. |
| 734 | if (pos <= 0) |
| 735 | return 0; |
| 736 | if (pos >= LengthNoExcept()) |
| 737 | return LengthNoExcept(); |
| 738 | |
| 739 | // PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept()); |
| 740 | if (checkLineEnd && IsCrLf(pos - 1)) { |
| 741 | if (moveDir > 0) |
| 742 | return pos + 1; |
| 743 | else |
| 744 | return pos - 1; |
| 745 | } |
| 746 | |
| 747 | if (dbcsCodePage) { |
| 748 | if (CpUtf8 == dbcsCodePage) { |
| 749 | const unsigned char ch = cb.UCharAt(pos); |
| 750 | // If ch is not a trail byte then pos is valid intercharacter position |
| 751 | if (UTF8IsTrailByte(ch)) { |
| 752 | Sci::Position startUTF = pos; |
| 753 | Sci::Position endUTF = pos; |
| 754 | if (InGoodUTF8(pos, startUTF, endUTF)) { |
| 755 | // ch is a trail byte within a UTF-8 character |
| 756 | if (moveDir > 0) |
| 757 | pos = endUTF; |
| 758 | else |
| 759 | pos = startUTF; |
| 760 | } |
| 761 | // Else invalid UTF-8 so return position of isolated trail byte |
| 762 | } |
| 763 | } else { |
| 764 | // Anchor DBCS calculations at start of line because start of line can |
| 765 | // not be a DBCS trail byte. |
| 766 | const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); |
| 767 | if (pos == posStartLine) |
| 768 | return pos; |
| 769 | |
| 770 | // Step back until a non-lead-byte is found. |
| 771 | Sci::Position posCheck = pos; |
| 772 | while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1))) |
| 773 | posCheck--; |
| 774 | |
| 775 | // Check from known start of character. |
| 776 | while (posCheck < pos) { |
| 777 | const int mbsize = IsDBCSDualByteAt(posCheck) ? 2 : 1; |
| 778 | if (posCheck + mbsize == pos) { |
| 779 | return pos; |
| 780 | } else if (posCheck + mbsize > pos) { |
| 781 | if (moveDir > 0) { |
| 782 | return posCheck + mbsize; |
| 783 | } else { |
| 784 | return posCheck; |
| 785 | } |
| 786 | } |
| 787 | posCheck += mbsize; |
| 788 | } |
| 789 | } |
| 790 | } |
| 791 | |
| 792 | return pos; |
| 793 | } |
| 794 | |
| 795 | // NextPosition moves between valid positions - it can not handle a position in the middle of a |
| 796 | // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar. |
| 797 | // A \r\n pair is treated as two characters. |
| 798 | Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept { |
| 799 | // If out of range, just return minimum/maximum value. |
| 800 | const int increment = (moveDir > 0) ? 1 : -1; |
| 801 | if (pos + increment <= 0) |
| 802 | return 0; |
| 803 | if (pos + increment >= cb.Length()) |
| 804 | return cb.Length(); |
| 805 | |
| 806 | if (dbcsCodePage) { |
| 807 | if (CpUtf8 == dbcsCodePage) { |
| 808 | if (increment == 1) { |
| 809 | // Simple forward movement case so can avoid some checks |
| 810 | const unsigned char leadByte = cb.UCharAt(pos); |
| 811 | if (UTF8IsAscii(leadByte)) { |
| 812 | // Single byte character or invalid |
| 813 | pos++; |
| 814 | } else { |
| 815 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
| 816 | unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; |
| 817 | for (int b=1; b<widthCharBytes; b++) |
| 818 | charBytes[b] = cb.CharAt(pos+b); |
| 819 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
| 820 | if (utf8status & UTF8MaskInvalid) |
| 821 | pos++; |
| 822 | else |
| 823 | pos += utf8status & UTF8MaskWidth; |
| 824 | } |
| 825 | } else { |
| 826 | // Examine byte before position |
| 827 | pos--; |
| 828 | const unsigned char ch = cb.UCharAt(pos); |
| 829 | // If ch is not a trail byte then pos is valid intercharacter position |
| 830 | if (UTF8IsTrailByte(ch)) { |
| 831 | // If ch is a trail byte in a valid UTF-8 character then return start of character |
| 832 | Sci::Position startUTF = pos; |
| 833 | Sci::Position endUTF = pos; |
| 834 | if (InGoodUTF8(pos, startUTF, endUTF)) { |
| 835 | pos = startUTF; |
| 836 | } |
| 837 | // Else invalid UTF-8 so return position of isolated trail byte |
| 838 | } |
| 839 | } |
| 840 | } else { |
| 841 | if (moveDir > 0) { |
| 842 | const int mbsize = IsDBCSDualByteAt(pos) ? 2 : 1; |
| 843 | pos += mbsize; |
| 844 | if (pos > cb.Length()) |
| 845 | pos = cb.Length(); |
| 846 | } else { |
| 847 | // Anchor DBCS calculations at start of line because start of line can |
| 848 | // not be a DBCS trail byte. |
| 849 | const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); |
| 850 | // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx |
| 851 | // http://msdn.microsoft.com/en-us/library/cc194790.aspx |
| 852 | if ((pos - 1) <= posStartLine) { |
| 853 | return pos - 1; |
| 854 | } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) { |
| 855 | // Should actually be trail byte |
| 856 | if (IsDBCSDualByteAt(pos - 2)) { |
| 857 | return pos - 2; |
| 858 | } else { |
| 859 | // Invalid byte pair so treat as one byte wide |
| 860 | return pos - 1; |
| 861 | } |
| 862 | } else { |
| 863 | // Otherwise, step back until a non-lead-byte is found. |
| 864 | Sci::Position posTemp = pos - 1; |
| 865 | while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp))) |
| 866 | ; |
| 867 | // Now posTemp+1 must point to the beginning of a character, |
| 868 | // so figure out whether we went back an even or an odd |
| 869 | // number of bytes and go back 1 or 2 bytes, respectively. |
| 870 | const Sci::Position widthLast = ((pos - posTemp) & 1) + 1; |
| 871 | if ((widthLast == 2) && (IsDBCSDualByteAt(pos - widthLast))) { |
| 872 | return pos - widthLast; |
| 873 | } |
| 874 | // Byte before pos may be valid character or may be an invalid second byte |
| 875 | return pos - 1; |
| 876 | } |
| 877 | } |
| 878 | } |
| 879 | } else { |
| 880 | pos += increment; |
| 881 | } |
| 882 | |
| 883 | return pos; |
| 884 | } |
| 885 | |
| 886 | bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept { |
| 887 | // Returns true if pos changed |
| 888 | Sci::Position posNext = NextPosition(pos, moveDir); |
| 889 | if (posNext == pos) { |
| 890 | return false; |
| 891 | } else { |
| 892 | pos = posNext; |
| 893 | return true; |
| 894 | } |
| 895 | } |
| 896 | |
| 897 | Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept { |
| 898 | if (position >= LengthNoExcept()) { |
| 899 | return CharacterExtracted(unicodeReplacementChar, 0); |
| 900 | } |
| 901 | const unsigned char leadByte = cb.UCharAt(position); |
| 902 | if (!dbcsCodePage || UTF8IsAscii(leadByte)) { |
| 903 | // Common case: ASCII character |
| 904 | return CharacterExtracted(leadByte, 1); |
| 905 | } |
| 906 | if (CpUtf8 == dbcsCodePage) { |
| 907 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
| 908 | unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; |
| 909 | for (int b = 1; b<widthCharBytes; b++) |
| 910 | charBytes[b] = cb.UCharAt(position + b); |
| 911 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
| 912 | if (utf8status & UTF8MaskInvalid) { |
| 913 | // Treat as invalid and use up just one byte |
| 914 | return CharacterExtracted(unicodeReplacementChar, 1); |
| 915 | } else { |
| 916 | return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); |
| 917 | } |
| 918 | } else { |
| 919 | if (IsDBCSLeadByteNoExcept(leadByte)) { |
| 920 | const unsigned char trailByte = cb.UCharAt(position + 1); |
| 921 | if (IsDBCSTrailByteNoExcept(trailByte)) { |
| 922 | return CharacterExtracted::DBCS(leadByte, trailByte); |
| 923 | } |
| 924 | } |
| 925 | return CharacterExtracted(leadByte, 1); |
| 926 | } |
| 927 | } |
| 928 | |
| 929 | Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept { |
| 930 | if (position <= 0) { |
| 931 | return CharacterExtracted(unicodeReplacementChar, 0); |
| 932 | } |
| 933 | const unsigned char previousByte = cb.UCharAt(position - 1); |
| 934 | if (0 == dbcsCodePage) { |
| 935 | return CharacterExtracted(previousByte, 1); |
| 936 | } |
| 937 | if (CpUtf8 == dbcsCodePage) { |
| 938 | if (UTF8IsAscii(previousByte)) { |
| 939 | return CharacterExtracted(previousByte, 1); |
| 940 | } |
| 941 | position--; |
| 942 | // If previousByte is not a trail byte then its invalid |
| 943 | if (UTF8IsTrailByte(previousByte)) { |
| 944 | // If previousByte is a trail byte in a valid UTF-8 character then find start of character |
| 945 | Sci::Position startUTF = position; |
| 946 | Sci::Position endUTF = position; |
| 947 | if (InGoodUTF8(position, startUTF, endUTF)) { |
| 948 | const Sci::Position widthCharBytes = endUTF - startUTF; |
| 949 | unsigned char charBytes[UTF8MaxBytes] = { 0, 0, 0, 0 }; |
| 950 | for (Sci::Position b = 0; b<widthCharBytes; b++) |
| 951 | charBytes[b] = cb.UCharAt(startUTF + b); |
| 952 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
| 953 | if (utf8status & UTF8MaskInvalid) { |
| 954 | // Treat as invalid and use up just one byte |
| 955 | return CharacterExtracted(unicodeReplacementChar, 1); |
| 956 | } else { |
| 957 | return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); |
| 958 | } |
| 959 | } |
| 960 | // Else invalid UTF-8 so return position of isolated trail byte |
| 961 | } |
| 962 | return CharacterExtracted(unicodeReplacementChar, 1); |
| 963 | } else { |
| 964 | // Moving backwards in DBCS is complex so use NextPosition |
| 965 | const Sci::Position posStartCharacter = NextPosition(position, -1); |
| 966 | return CharacterAfter(posStartCharacter); |
| 967 | } |
| 968 | } |
| 969 | |
| 970 | // Return -1 on out-of-bounds |
| 971 | Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const { |
| 972 | Sci::Position pos = positionStart; |
| 973 | if (dbcsCodePage) { |
| 974 | const int increment = (characterOffset > 0) ? 1 : -1; |
| 975 | while (characterOffset != 0) { |
| 976 | const Sci::Position posNext = NextPosition(pos, increment); |
| 977 | if (posNext == pos) |
| 978 | return Sci::invalidPosition; |
| 979 | pos = posNext; |
| 980 | characterOffset -= increment; |
| 981 | } |
| 982 | } else { |
| 983 | pos = positionStart + characterOffset; |
| 984 | if ((pos < 0) || (pos > Length())) |
| 985 | return Sci::invalidPosition; |
| 986 | } |
| 987 | return pos; |
| 988 | } |
| 989 | |
| 990 | Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept { |
| 991 | Sci::Position pos = positionStart; |
| 992 | if (dbcsCodePage) { |
| 993 | const int increment = (characterOffset > 0) ? 1 : -1; |
| 994 | while (characterOffset != 0) { |
| 995 | const Sci::Position posNext = NextPosition(pos, increment); |
| 996 | if (posNext == pos) |
| 997 | return Sci::invalidPosition; |
| 998 | if (std::abs(pos-posNext) > 3) // 4 byte character = 2*UTF16. |
| 999 | characterOffset -= increment; |
| 1000 | pos = posNext; |
| 1001 | characterOffset -= increment; |
| 1002 | } |
| 1003 | } else { |
| 1004 | pos = positionStart + characterOffset; |
| 1005 | if ((pos < 0) || (pos > LengthNoExcept())) |
| 1006 | return Sci::invalidPosition; |
| 1007 | } |
| 1008 | return pos; |
| 1009 | } |
| 1010 | |
| 1011 | int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const { |
| 1012 | int bytesInCharacter = 1; |
| 1013 | const unsigned char leadByte = cb.UCharAt(position); |
| 1014 | int character = leadByte; |
| 1015 | if (dbcsCodePage && !UTF8IsAscii(leadByte)) { |
| 1016 | if (CpUtf8 == dbcsCodePage) { |
| 1017 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
| 1018 | unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; |
| 1019 | for (int b=1; b<widthCharBytes; b++) |
| 1020 | charBytes[b] = cb.UCharAt(position+b); |
| 1021 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
| 1022 | if (utf8status & UTF8MaskInvalid) { |
| 1023 | // Report as singleton surrogate values which are invalid Unicode |
| 1024 | character = 0xDC80 + leadByte; |
| 1025 | } else { |
| 1026 | bytesInCharacter = utf8status & UTF8MaskWidth; |
| 1027 | character = UnicodeFromUTF8(charBytes); |
| 1028 | } |
| 1029 | } else { |
| 1030 | if (IsDBCSLeadByteNoExcept(leadByte)) { |
| 1031 | const unsigned char trailByte = cb.UCharAt(position + 1); |
| 1032 | if (IsDBCSTrailByteNoExcept(trailByte)) { |
| 1033 | bytesInCharacter = 2; |
| 1034 | character = (leadByte << 8) | trailByte; |
| 1035 | } |
| 1036 | } |
| 1037 | } |
| 1038 | } |
| 1039 | if (pWidth) { |
| 1040 | *pWidth = bytesInCharacter; |
| 1041 | } |
| 1042 | return character; |
| 1043 | } |
| 1044 | |
| 1045 | int SCI_METHOD Document::CodePage() const { |
| 1046 | return dbcsCodePage; |
| 1047 | } |
| 1048 | |
| 1049 | bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const { |
| 1050 | // Used by lexers so must match IDocument method exactly |
| 1051 | return IsDBCSLeadByteNoExcept(ch); |
| 1052 | } |
| 1053 | |
| 1054 | bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept { |
| 1055 | // Used inside core Scintilla |
| 1056 | // Byte ranges found in Wikipedia articles with relevant search strings in each case |
| 1057 | const unsigned char uch = ch; |
| 1058 | switch (dbcsCodePage) { |
| 1059 | case 932: |
| 1060 | // Shift_jis |
| 1061 | return ((uch >= 0x81) && (uch <= 0x9F)) || |
| 1062 | ((uch >= 0xE0) && (uch <= 0xFC)); |
| 1063 | // Lead bytes F0 to FC may be a Microsoft addition. |
| 1064 | case 936: |
| 1065 | // GBK |
| 1066 | return (uch >= 0x81) && (uch <= 0xFE); |
| 1067 | case 949: |
| 1068 | // Korean Wansung KS C-5601-1987 |
| 1069 | return (uch >= 0x81) && (uch <= 0xFE); |
| 1070 | case 950: |
| 1071 | // Big5 |
| 1072 | return (uch >= 0x81) && (uch <= 0xFE); |
| 1073 | case 1361: |
| 1074 | // Korean Johab KS C-5601-1992 |
| 1075 | return |
| 1076 | ((uch >= 0x84) && (uch <= 0xD3)) || |
| 1077 | ((uch >= 0xD8) && (uch <= 0xDE)) || |
| 1078 | ((uch >= 0xE0) && (uch <= 0xF9)); |
| 1079 | } |
| 1080 | return false; |
| 1081 | } |
| 1082 | |
| 1083 | bool Document::IsDBCSTrailByteNoExcept(char ch) const noexcept { |
| 1084 | const unsigned char trail = ch; |
| 1085 | switch (dbcsCodePage) { |
| 1086 | case 932: |
| 1087 | // Shift_jis |
| 1088 | return (trail != 0x7F) && |
| 1089 | ((trail >= 0x40) && (trail <= 0xFC)); |
| 1090 | case 936: |
| 1091 | // GBK |
| 1092 | return (trail != 0x7F) && |
| 1093 | ((trail >= 0x40) && (trail <= 0xFE)); |
| 1094 | case 949: |
| 1095 | // Korean Wansung KS C-5601-1987 |
| 1096 | return |
| 1097 | ((trail >= 0x41) && (trail <= 0x5A)) || |
| 1098 | ((trail >= 0x61) && (trail <= 0x7A)) || |
| 1099 | ((trail >= 0x81) && (trail <= 0xFE)); |
| 1100 | case 950: |
| 1101 | // Big5 |
| 1102 | return |
| 1103 | ((trail >= 0x40) && (trail <= 0x7E)) || |
| 1104 | ((trail >= 0xA1) && (trail <= 0xFE)); |
| 1105 | case 1361: |
| 1106 | // Korean Johab KS C-5601-1992 |
| 1107 | return |
| 1108 | ((trail >= 0x31) && (trail <= 0x7E)) || |
| 1109 | ((trail >= 0x81) && (trail <= 0xFE)); |
| 1110 | } |
| 1111 | return false; |
| 1112 | } |
| 1113 | |
| 1114 | int Document::DBCSDrawBytes(std::string_view text) const noexcept { |
| 1115 | if (text.length() <= 1) { |
| 1116 | return static_cast<int>(text.length()); |
| 1117 | } |
| 1118 | if (IsDBCSLeadByteNoExcept(text[0])) { |
| 1119 | return IsDBCSTrailByteNoExcept(text[1]) ? 2 : 1; |
| 1120 | } else { |
| 1121 | return 1; |
| 1122 | } |
| 1123 | } |
| 1124 | |
| 1125 | bool Document::IsDBCSDualByteAt(Sci::Position pos) const noexcept { |
| 1126 | return IsDBCSLeadByteNoExcept(cb.CharAt(pos)) |
| 1127 | && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1)); |
| 1128 | } |
| 1129 | |
| 1130 | // Need to break text into segments near end but taking into account the |
| 1131 | // encoding to not break inside a UTF-8 or DBCS character and also trying |
| 1132 | // to avoid breaking inside a pair of combining characters, or inside |
| 1133 | // ligatures. |
| 1134 | // TODO: implement grapheme cluster boundaries, |
| 1135 | // see https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries. |
| 1136 | // |
| 1137 | // The segment length must always be long enough (more than 4 bytes) |
| 1138 | // so that there will be at least one whole character to make a segment. |
| 1139 | // For UTF-8, text must consist only of valid whole characters. |
| 1140 | // In preference order from best to worst: |
| 1141 | // 1) Break before or after spaces or controls |
| 1142 | // 2) Break at word and punctuation boundary for better kerning and ligature support |
| 1143 | // 3) Break after whole character, this may break combining characters |
| 1144 | |
| 1145 | size_t Document::SafeSegment(std::string_view text) const noexcept { |
| 1146 | // check space first as most written language use spaces. |
| 1147 | for (std::string_view::iterator it = text.end() - 1; it != text.begin(); --it) { |
| 1148 | if (IsBreakSpace(*it)) { |
| 1149 | return it - text.begin(); |
| 1150 | } |
| 1151 | } |
| 1152 | |
| 1153 | if (!dbcsCodePage || dbcsCodePage == CpUtf8) { |
| 1154 | // backward iterate for UTF-8 and single byte encoding to find word and punctuation boundary. |
| 1155 | std::string_view::iterator it = text.end() - 1; |
| 1156 | const bool punctuation = IsPunctuation(*it); |
| 1157 | do { |
| 1158 | --it; |
| 1159 | if (punctuation != IsPunctuation(*it)) { |
| 1160 | return it - text.begin() + 1; |
| 1161 | } |
| 1162 | } while (it != text.begin()); |
| 1163 | |
| 1164 | it = text.end() - 1; |
| 1165 | if (dbcsCodePage) { |
| 1166 | // for UTF-8 go back to the start of last character. |
| 1167 | for (int trail = 0; trail < UTF8MaxBytes - 1 && UTF8IsTrailByte(*it); trail++) { |
| 1168 | --it; |
| 1169 | } |
| 1170 | } |
| 1171 | return it - text.begin(); |
| 1172 | } |
| 1173 | |
| 1174 | { |
| 1175 | // forward iterate for DBCS to find word and punctuation boundary. |
| 1176 | size_t lastPunctuationBreak = 0; |
| 1177 | size_t lastEncodingAllowedBreak = 0; |
| 1178 | CharacterClass ccPrev = CharacterClass::space; |
| 1179 | for (size_t j = 0; j < text.length();) { |
| 1180 | const unsigned char ch = text[j]; |
| 1181 | lastEncodingAllowedBreak = j++; |
| 1182 | |
| 1183 | CharacterClass cc = CharacterClass::word; |
| 1184 | if (UTF8IsAscii(ch)) { |
| 1185 | if (IsPunctuation(ch)) { |
| 1186 | cc = CharacterClass::punctuation; |
| 1187 | } |
| 1188 | } else { |
| 1189 | j += IsDBCSLeadByteNoExcept(ch); |
| 1190 | } |
| 1191 | if (cc != ccPrev) { |
| 1192 | ccPrev = cc; |
| 1193 | lastPunctuationBreak = lastEncodingAllowedBreak; |
| 1194 | } |
| 1195 | } |
| 1196 | return lastPunctuationBreak ? lastPunctuationBreak : lastEncodingAllowedBreak; |
| 1197 | } |
| 1198 | } |
| 1199 | |
| 1200 | EncodingFamily Document::CodePageFamily() const noexcept { |
| 1201 | if (CpUtf8 == dbcsCodePage) |
| 1202 | return EncodingFamily::unicode; |
| 1203 | else if (dbcsCodePage) |
| 1204 | return EncodingFamily::dbcs; |
| 1205 | else |
| 1206 | return EncodingFamily::eightBit; |
| 1207 | } |
| 1208 | |
| 1209 | void Document::ModifiedAt(Sci::Position pos) noexcept { |
| 1210 | if (endStyled > pos) |
| 1211 | endStyled = pos; |
| 1212 | } |
| 1213 | |
| 1214 | void Document::CheckReadOnly() { |
| 1215 | if (cb.IsReadOnly() && enteredReadOnlyCount == 0) { |
| 1216 | enteredReadOnlyCount++; |
| 1217 | NotifyModifyAttempt(); |
| 1218 | enteredReadOnlyCount--; |
| 1219 | } |
| 1220 | } |
| 1221 | |
| 1222 | // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt. |
| 1223 | // SetStyleAt does not change the persistent state of a document |
| 1224 | |
| 1225 | bool Document::DeleteChars(Sci::Position pos, Sci::Position len) { |
| 1226 | if (pos < 0) |
| 1227 | return false; |
| 1228 | if (len <= 0) |
| 1229 | return false; |
| 1230 | if ((pos + len) > LengthNoExcept()) |
| 1231 | return false; |
| 1232 | CheckReadOnly(); |
| 1233 | if (enteredModification != 0) { |
| 1234 | return false; |
| 1235 | } else { |
| 1236 | enteredModification++; |
| 1237 | if (!cb.IsReadOnly()) { |
| 1238 | NotifyModified( |
| 1239 | DocModification( |
| 1240 | ModificationFlags::BeforeDelete | ModificationFlags::User, |
| 1241 | pos, len, |
| 1242 | 0, nullptr)); |
| 1243 | const Sci::Line prevLinesTotal = LinesTotal(); |
| 1244 | const bool startSavePoint = cb.IsSavePoint(); |
| 1245 | bool startSequence = false; |
| 1246 | const char *text = cb.DeleteChars(pos, len, startSequence); |
| 1247 | if (startSavePoint && cb.IsCollectingUndo()) |
| 1248 | NotifySavePoint(false); |
| 1249 | if ((pos < LengthNoExcept()) || (pos == 0)) |
| 1250 | ModifiedAt(pos); |
| 1251 | else |
| 1252 | ModifiedAt(pos-1); |
| 1253 | NotifyModified( |
| 1254 | DocModification( |
| 1255 | ModificationFlags::DeleteText | ModificationFlags::User | |
| 1256 | (startSequence?ModificationFlags::StartAction:ModificationFlags::None), |
| 1257 | pos, len, |
| 1258 | LinesTotal() - prevLinesTotal, text)); |
| 1259 | } |
| 1260 | enteredModification--; |
| 1261 | } |
| 1262 | return !cb.IsReadOnly(); |
| 1263 | } |
| 1264 | |
| 1265 | /** |
| 1266 | * Insert a string with a length. |
| 1267 | */ |
| 1268 | Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) { |
| 1269 | if (insertLength <= 0) { |
| 1270 | return 0; |
| 1271 | } |
| 1272 | CheckReadOnly(); // Application may change read only state here |
| 1273 | if (cb.IsReadOnly()) { |
| 1274 | return 0; |
| 1275 | } |
| 1276 | if (enteredModification != 0) { |
| 1277 | return 0; |
| 1278 | } |
| 1279 | enteredModification++; |
| 1280 | insertionSet = false; |
| 1281 | insertion.clear(); |
| 1282 | NotifyModified( |
| 1283 | DocModification( |
| 1284 | ModificationFlags::InsertCheck, |
| 1285 | position, insertLength, |
| 1286 | 0, s)); |
| 1287 | if (insertionSet) { |
| 1288 | s = insertion.c_str(); |
| 1289 | insertLength = insertion.length(); |
| 1290 | } |
| 1291 | NotifyModified( |
| 1292 | DocModification( |
| 1293 | ModificationFlags::BeforeInsert | ModificationFlags::User, |
| 1294 | position, insertLength, |
| 1295 | 0, s)); |
| 1296 | const Sci::Line prevLinesTotal = LinesTotal(); |
| 1297 | const bool startSavePoint = cb.IsSavePoint(); |
| 1298 | bool startSequence = false; |
| 1299 | const char *text = cb.InsertString(position, s, insertLength, startSequence); |
| 1300 | if (startSavePoint && cb.IsCollectingUndo()) |
| 1301 | NotifySavePoint(false); |
| 1302 | ModifiedAt(position); |
| 1303 | NotifyModified( |
| 1304 | DocModification( |
| 1305 | ModificationFlags::InsertText | ModificationFlags::User | |
| 1306 | (startSequence?ModificationFlags::StartAction:ModificationFlags::None), |
| 1307 | position, insertLength, |
| 1308 | LinesTotal() - prevLinesTotal, text)); |
| 1309 | if (insertionSet) { // Free memory as could be large |
| 1310 | std::string().swap(insertion); |
| 1311 | } |
| 1312 | enteredModification--; |
| 1313 | return insertLength; |
| 1314 | } |
| 1315 | |
| 1316 | void Document::ChangeInsertion(const char *s, Sci::Position length) { |
| 1317 | insertionSet = true; |
| 1318 | insertion.assign(s, length); |
| 1319 | } |
| 1320 | |
| 1321 | int SCI_METHOD Document::AddData(const char *data, Sci_Position length) { |
| 1322 | try { |
| 1323 | const Sci::Position position = Length(); |
| 1324 | InsertString(position, data, length); |
| 1325 | } catch (std::bad_alloc &) { |
| 1326 | return static_cast<int>(Status::BadAlloc); |
| 1327 | } catch (...) { |
| 1328 | return static_cast<int>(Status::Failure); |
| 1329 | } |
| 1330 | return static_cast<int>(Status::Ok); |
| 1331 | } |
| 1332 | |
| 1333 | void * SCI_METHOD Document::ConvertToDocument() { |
| 1334 | return this; |
| 1335 | } |
| 1336 | |
| 1337 | Sci::Position Document::Undo() { |
| 1338 | Sci::Position newPos = -1; |
| 1339 | CheckReadOnly(); |
| 1340 | if ((enteredModification == 0) && (cb.IsCollectingUndo())) { |
| 1341 | enteredModification++; |
| 1342 | if (!cb.IsReadOnly()) { |
| 1343 | const bool startSavePoint = cb.IsSavePoint(); |
| 1344 | bool multiLine = false; |
| 1345 | const int steps = cb.StartUndo(); |
| 1346 | //Platform::DebugPrintf("Steps=%d\n", steps); |
| 1347 | Sci::Position coalescedRemovePos = -1; |
| 1348 | Sci::Position coalescedRemoveLen = 0; |
| 1349 | Sci::Position prevRemoveActionPos = -1; |
| 1350 | Sci::Position prevRemoveActionLen = 0; |
| 1351 | for (int step = 0; step < steps; step++) { |
| 1352 | const Sci::Line prevLinesTotal = LinesTotal(); |
| 1353 | const Action &action = cb.GetUndoStep(); |
| 1354 | if (action.at == ActionType::remove) { |
| 1355 | NotifyModified(DocModification( |
| 1356 | ModificationFlags::BeforeInsert | ModificationFlags::Undo, action)); |
| 1357 | } else if (action.at == ActionType::container) { |
| 1358 | DocModification dm(ModificationFlags::Container | ModificationFlags::Undo); |
| 1359 | dm.token = action.position; |
| 1360 | NotifyModified(dm); |
| 1361 | if (!action.mayCoalesce) { |
| 1362 | coalescedRemovePos = -1; |
| 1363 | coalescedRemoveLen = 0; |
| 1364 | prevRemoveActionPos = -1; |
| 1365 | prevRemoveActionLen = 0; |
| 1366 | } |
| 1367 | } else { |
| 1368 | NotifyModified(DocModification( |
| 1369 | ModificationFlags::BeforeDelete | ModificationFlags::Undo, action)); |
| 1370 | } |
| 1371 | cb.PerformUndoStep(); |
| 1372 | if (action.at != ActionType::container) { |
| 1373 | ModifiedAt(action.position); |
| 1374 | newPos = action.position; |
| 1375 | } |
| 1376 | |
| 1377 | ModificationFlags modFlags = ModificationFlags::Undo; |
| 1378 | // With undo, an insertion action becomes a deletion notification |
| 1379 | if (action.at == ActionType::remove) { |
| 1380 | newPos += action.lenData; |
| 1381 | modFlags |= ModificationFlags::InsertText; |
| 1382 | if ((coalescedRemoveLen > 0) && |
| 1383 | (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) { |
| 1384 | coalescedRemoveLen += action.lenData; |
| 1385 | newPos = coalescedRemovePos + coalescedRemoveLen; |
| 1386 | } else { |
| 1387 | coalescedRemovePos = action.position; |
| 1388 | coalescedRemoveLen = action.lenData; |
| 1389 | } |
| 1390 | prevRemoveActionPos = action.position; |
| 1391 | prevRemoveActionLen = action.lenData; |
| 1392 | } else if (action.at == ActionType::insert) { |
| 1393 | modFlags |= ModificationFlags::DeleteText; |
| 1394 | coalescedRemovePos = -1; |
| 1395 | coalescedRemoveLen = 0; |
| 1396 | prevRemoveActionPos = -1; |
| 1397 | prevRemoveActionLen = 0; |
| 1398 | } |
| 1399 | if (steps > 1) |
| 1400 | modFlags |= ModificationFlags::MultiStepUndoRedo; |
| 1401 | const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; |
| 1402 | if (linesAdded != 0) |
| 1403 | multiLine = true; |
| 1404 | if (step == steps - 1) { |
| 1405 | modFlags |= ModificationFlags::LastStepInUndoRedo; |
| 1406 | if (multiLine) |
| 1407 | modFlags |= ModificationFlags::MultilineUndoRedo; |
| 1408 | } |
| 1409 | NotifyModified(DocModification(modFlags, action.position, action.lenData, |
| 1410 | linesAdded, action.data.get())); |
| 1411 | } |
| 1412 | |
| 1413 | const bool endSavePoint = cb.IsSavePoint(); |
| 1414 | if (startSavePoint != endSavePoint) |
| 1415 | NotifySavePoint(endSavePoint); |
| 1416 | } |
| 1417 | enteredModification--; |
| 1418 | } |
| 1419 | return newPos; |
| 1420 | } |
| 1421 | |
| 1422 | Sci::Position Document::Redo() { |
| 1423 | Sci::Position newPos = -1; |
| 1424 | CheckReadOnly(); |
| 1425 | if ((enteredModification == 0) && (cb.IsCollectingUndo())) { |
| 1426 | enteredModification++; |
| 1427 | if (!cb.IsReadOnly()) { |
| 1428 | const bool startSavePoint = cb.IsSavePoint(); |
| 1429 | bool multiLine = false; |
| 1430 | const int steps = cb.StartRedo(); |
| 1431 | for (int step = 0; step < steps; step++) { |
| 1432 | const Sci::Line prevLinesTotal = LinesTotal(); |
| 1433 | const Action &action = cb.GetRedoStep(); |
| 1434 | if (action.at == ActionType::insert) { |
| 1435 | NotifyModified(DocModification( |
| 1436 | ModificationFlags::BeforeInsert | ModificationFlags::Redo, action)); |
| 1437 | } else if (action.at == ActionType::container) { |
| 1438 | DocModification dm(ModificationFlags::Container | ModificationFlags::Redo); |
| 1439 | dm.token = action.position; |
| 1440 | NotifyModified(dm); |
| 1441 | } else { |
| 1442 | NotifyModified(DocModification( |
| 1443 | ModificationFlags::BeforeDelete | ModificationFlags::Redo, action)); |
| 1444 | } |
| 1445 | cb.PerformRedoStep(); |
| 1446 | if (action.at != ActionType::container) { |
| 1447 | ModifiedAt(action.position); |
| 1448 | newPos = action.position; |
| 1449 | } |
| 1450 | |
| 1451 | ModificationFlags modFlags = ModificationFlags::Redo; |
| 1452 | if (action.at == ActionType::insert) { |
| 1453 | newPos += action.lenData; |
| 1454 | modFlags |= ModificationFlags::InsertText; |
| 1455 | } else if (action.at == ActionType::remove) { |
| 1456 | modFlags |= ModificationFlags::DeleteText; |
| 1457 | } |
| 1458 | if (steps > 1) |
| 1459 | modFlags |= ModificationFlags::MultiStepUndoRedo; |
| 1460 | const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; |
| 1461 | if (linesAdded != 0) |
| 1462 | multiLine = true; |
| 1463 | if (step == steps - 1) { |
| 1464 | modFlags |= ModificationFlags::LastStepInUndoRedo; |
| 1465 | if (multiLine) |
| 1466 | modFlags |= ModificationFlags::MultilineUndoRedo; |
| 1467 | } |
| 1468 | NotifyModified( |
| 1469 | DocModification(modFlags, action.position, action.lenData, |
| 1470 | linesAdded, action.data.get())); |
| 1471 | } |
| 1472 | |
| 1473 | const bool endSavePoint = cb.IsSavePoint(); |
| 1474 | if (startSavePoint != endSavePoint) |
| 1475 | NotifySavePoint(endSavePoint); |
| 1476 | } |
| 1477 | enteredModification--; |
| 1478 | } |
| 1479 | return newPos; |
| 1480 | } |
| 1481 | |
| 1482 | void Document::DelChar(Sci::Position pos) { |
| 1483 | DeleteChars(pos, LenChar(pos)); |
| 1484 | } |
| 1485 | |
| 1486 | void Document::DelCharBack(Sci::Position pos) { |
| 1487 | if (pos <= 0) { |
| 1488 | return; |
| 1489 | } else if (IsCrLf(pos - 2)) { |
| 1490 | DeleteChars(pos - 2, 2); |
| 1491 | } else if (dbcsCodePage) { |
| 1492 | const Sci::Position startChar = NextPosition(pos, -1); |
| 1493 | DeleteChars(startChar, pos - startChar); |
| 1494 | } else { |
| 1495 | DeleteChars(pos - 1, 1); |
| 1496 | } |
| 1497 | } |
| 1498 | |
| 1499 | static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept { |
| 1500 | return ((pos / tabSize) + 1) * tabSize; |
| 1501 | } |
| 1502 | |
| 1503 | static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) { |
| 1504 | std::string indentation; |
| 1505 | if (!insertSpaces) { |
| 1506 | while (indent >= tabSize) { |
| 1507 | indentation += '\t'; |
| 1508 | indent -= tabSize; |
| 1509 | } |
| 1510 | } |
| 1511 | while (indent > 0) { |
| 1512 | indentation += ' '; |
| 1513 | indent--; |
| 1514 | } |
| 1515 | return indentation; |
| 1516 | } |
| 1517 | |
| 1518 | int SCI_METHOD Document::GetLineIndentation(Sci_Position line) { |
| 1519 | int indent = 0; |
| 1520 | if ((line >= 0) && (line < LinesTotal())) { |
| 1521 | const Sci::Position lineStart = LineStart(line); |
| 1522 | const Sci::Position length = Length(); |
| 1523 | for (Sci::Position i = lineStart; i < length; i++) { |
| 1524 | const char ch = cb.CharAt(i); |
| 1525 | if (ch == ' ') |
| 1526 | indent++; |
| 1527 | else if (ch == '\t') |
| 1528 | indent = static_cast<int>(NextTab(indent, tabInChars)); |
| 1529 | else |
| 1530 | return indent; |
| 1531 | } |
| 1532 | } |
| 1533 | return indent; |
| 1534 | } |
| 1535 | |
| 1536 | Sci::Position Document::SetLineIndentation(Sci::Line line, Sci::Position indent) { |
| 1537 | const int indentOfLine = GetLineIndentation(line); |
| 1538 | if (indent < 0) |
| 1539 | indent = 0; |
| 1540 | if (indent != indentOfLine) { |
| 1541 | std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs); |
| 1542 | const Sci::Position thisLineStart = LineStart(line); |
| 1543 | const Sci::Position indentPos = GetLineIndentPosition(line); |
| 1544 | UndoGroup ug(this); |
| 1545 | DeleteChars(thisLineStart, indentPos - thisLineStart); |
| 1546 | return thisLineStart + InsertString(thisLineStart, linebuf.c_str(), |
| 1547 | linebuf.length()); |
| 1548 | } else { |
| 1549 | return GetLineIndentPosition(line); |
| 1550 | } |
| 1551 | } |
| 1552 | |
| 1553 | Sci::Position Document::GetLineIndentPosition(Sci::Line line) const { |
| 1554 | if (line < 0) |
| 1555 | return 0; |
| 1556 | Sci::Position pos = LineStart(line); |
| 1557 | const Sci::Position length = Length(); |
| 1558 | while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) { |
| 1559 | pos++; |
| 1560 | } |
| 1561 | return pos; |
| 1562 | } |
| 1563 | |
| 1564 | Sci::Position Document::GetColumn(Sci::Position pos) { |
| 1565 | Sci::Position column = 0; |
| 1566 | const Sci::Line line = SciLineFromPosition(pos); |
| 1567 | if ((line >= 0) && (line < LinesTotal())) { |
| 1568 | for (Sci::Position i = LineStart(line); i < pos;) { |
| 1569 | const char ch = cb.CharAt(i); |
| 1570 | if (ch == '\t') { |
| 1571 | column = NextTab(column, tabInChars); |
| 1572 | i++; |
| 1573 | } else if (ch == '\r') { |
| 1574 | return column; |
| 1575 | } else if (ch == '\n') { |
| 1576 | return column; |
| 1577 | } else if (i >= Length()) { |
| 1578 | return column; |
| 1579 | } else { |
| 1580 | column++; |
| 1581 | i = NextPosition(i, 1); |
| 1582 | } |
| 1583 | } |
| 1584 | } |
| 1585 | return column; |
| 1586 | } |
| 1587 | |
| 1588 | Sci::Position Document::CountCharacters(Sci::Position startPos, Sci::Position endPos) const noexcept { |
| 1589 | startPos = MovePositionOutsideChar(startPos, 1, false); |
| 1590 | endPos = MovePositionOutsideChar(endPos, -1, false); |
| 1591 | Sci::Position count = 0; |
| 1592 | Sci::Position i = startPos; |
| 1593 | while (i < endPos) { |
| 1594 | count++; |
| 1595 | i = NextPosition(i, 1); |
| 1596 | } |
| 1597 | return count; |
| 1598 | } |
| 1599 | |
| 1600 | Sci::Position Document::CountUTF16(Sci::Position startPos, Sci::Position endPos) const noexcept { |
| 1601 | startPos = MovePositionOutsideChar(startPos, 1, false); |
| 1602 | endPos = MovePositionOutsideChar(endPos, -1, false); |
| 1603 | Sci::Position count = 0; |
| 1604 | Sci::Position i = startPos; |
| 1605 | while (i < endPos) { |
| 1606 | count++; |
| 1607 | const Sci::Position next = NextPosition(i, 1); |
| 1608 | if ((next - i) > 3) |
| 1609 | count++; |
| 1610 | i = next; |
| 1611 | } |
| 1612 | return count; |
| 1613 | } |
| 1614 | |
| 1615 | Sci::Position Document::FindColumn(Sci::Line line, Sci::Position column) { |
| 1616 | Sci::Position position = LineStart(line); |
| 1617 | if ((line >= 0) && (line < LinesTotal())) { |
| 1618 | Sci::Position columnCurrent = 0; |
| 1619 | while ((columnCurrent < column) && (position < Length())) { |
| 1620 | const char ch = cb.CharAt(position); |
| 1621 | if (ch == '\t') { |
| 1622 | columnCurrent = NextTab(columnCurrent, tabInChars); |
| 1623 | if (columnCurrent > column) |
| 1624 | return position; |
| 1625 | position++; |
| 1626 | } else if (ch == '\r') { |
| 1627 | return position; |
| 1628 | } else if (ch == '\n') { |
| 1629 | return position; |
| 1630 | } else { |
| 1631 | columnCurrent++; |
| 1632 | position = NextPosition(position, 1); |
| 1633 | } |
| 1634 | } |
| 1635 | } |
| 1636 | return position; |
| 1637 | } |
| 1638 | |
| 1639 | void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) { |
| 1640 | // Dedent - suck white space off the front of the line to dedent by equivalent of a tab |
| 1641 | for (Sci::Line line = lineBottom; line >= lineTop; line--) { |
| 1642 | const Sci::Position indentOfLine = GetLineIndentation(line); |
| 1643 | if (forwards) { |
| 1644 | if (LineStart(line) < LineEnd(line)) { |
| 1645 | SetLineIndentation(line, indentOfLine + IndentSize()); |
| 1646 | } |
| 1647 | } else { |
| 1648 | SetLineIndentation(line, indentOfLine - IndentSize()); |
| 1649 | } |
| 1650 | } |
| 1651 | } |
| 1652 | |
| 1653 | // Convert line endings for a piece of text to a particular mode. |
| 1654 | // Stop at len or when a NUL is found. |
| 1655 | std::string Document::TransformLineEnds(const char *s, size_t len, EndOfLine eolModeWanted) { |
| 1656 | std::string dest; |
| 1657 | for (size_t i = 0; (i < len) && (s[i]); i++) { |
| 1658 | if (s[i] == '\n' || s[i] == '\r') { |
| 1659 | if (eolModeWanted == EndOfLine::Cr) { |
| 1660 | dest.push_back('\r'); |
| 1661 | } else if (eolModeWanted == EndOfLine::Lf) { |
| 1662 | dest.push_back('\n'); |
| 1663 | } else { // eolModeWanted == EndOfLine::CrLf |
| 1664 | dest.push_back('\r'); |
| 1665 | dest.push_back('\n'); |
| 1666 | } |
| 1667 | if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) { |
| 1668 | i++; |
| 1669 | } |
| 1670 | } else { |
| 1671 | dest.push_back(s[i]); |
| 1672 | } |
| 1673 | } |
| 1674 | return dest; |
| 1675 | } |
| 1676 | |
| 1677 | void Document::ConvertLineEnds(EndOfLine eolModeSet) { |
| 1678 | UndoGroup ug(this); |
| 1679 | |
| 1680 | for (Sci::Position pos = 0; pos < Length(); pos++) { |
| 1681 | if (cb.CharAt(pos) == '\r') { |
| 1682 | if (cb.CharAt(pos + 1) == '\n') { |
| 1683 | // CRLF |
| 1684 | if (eolModeSet == EndOfLine::Cr) { |
| 1685 | DeleteChars(pos + 1, 1); // Delete the LF |
| 1686 | } else if (eolModeSet == EndOfLine::Lf) { |
| 1687 | DeleteChars(pos, 1); // Delete the CR |
| 1688 | } else { |
| 1689 | pos++; |
| 1690 | } |
| 1691 | } else { |
| 1692 | // CR |
| 1693 | if (eolModeSet == EndOfLine::CrLf) { |
| 1694 | pos += InsertString(pos + 1, "\n" , 1); // Insert LF |
| 1695 | } else if (eolModeSet == EndOfLine::Lf) { |
| 1696 | pos += InsertString(pos, "\n" , 1); // Insert LF |
| 1697 | DeleteChars(pos, 1); // Delete CR |
| 1698 | pos--; |
| 1699 | } |
| 1700 | } |
| 1701 | } else if (cb.CharAt(pos) == '\n') { |
| 1702 | // LF |
| 1703 | if (eolModeSet == EndOfLine::CrLf) { |
| 1704 | pos += InsertString(pos, "\r" , 1); // Insert CR |
| 1705 | } else if (eolModeSet == EndOfLine::Cr) { |
| 1706 | pos += InsertString(pos, "\r" , 1); // Insert CR |
| 1707 | DeleteChars(pos, 1); // Delete LF |
| 1708 | pos--; |
| 1709 | } |
| 1710 | } |
| 1711 | } |
| 1712 | |
| 1713 | } |
| 1714 | |
| 1715 | DocumentOption Document::Options() const noexcept { |
| 1716 | return (IsLarge() ? DocumentOption::TextLarge : DocumentOption::Default) | |
| 1717 | (cb.HasStyles() ? DocumentOption::Default : DocumentOption::StylesNone); |
| 1718 | } |
| 1719 | |
| 1720 | bool Document::IsWhiteLine(Sci::Line line) const { |
| 1721 | Sci::Position currentChar = LineStart(line); |
| 1722 | const Sci::Position endLine = LineEnd(line); |
| 1723 | while (currentChar < endLine) { |
| 1724 | if (!IsSpaceOrTab(cb.CharAt(currentChar))) { |
| 1725 | return false; |
| 1726 | } |
| 1727 | ++currentChar; |
| 1728 | } |
| 1729 | return true; |
| 1730 | } |
| 1731 | |
| 1732 | Sci::Position Document::ParaUp(Sci::Position pos) const { |
| 1733 | Sci::Line line = SciLineFromPosition(pos); |
| 1734 | line--; |
| 1735 | while (line >= 0 && IsWhiteLine(line)) { // skip empty lines |
| 1736 | line--; |
| 1737 | } |
| 1738 | while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines |
| 1739 | line--; |
| 1740 | } |
| 1741 | line++; |
| 1742 | return LineStart(line); |
| 1743 | } |
| 1744 | |
| 1745 | Sci::Position Document::ParaDown(Sci::Position pos) const { |
| 1746 | Sci::Line line = SciLineFromPosition(pos); |
| 1747 | while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines |
| 1748 | line++; |
| 1749 | } |
| 1750 | while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines |
| 1751 | line++; |
| 1752 | } |
| 1753 | if (line < LinesTotal()) |
| 1754 | return LineStart(line); |
| 1755 | else // end of a document |
| 1756 | return LineEnd(line-1); |
| 1757 | } |
| 1758 | |
| 1759 | CharacterClass Document::WordCharacterClass(unsigned int ch) const { |
| 1760 | if (dbcsCodePage && (ch >= 0x80)) { |
| 1761 | if (CpUtf8 == dbcsCodePage) { |
| 1762 | // Use hard coded Unicode class |
| 1763 | const CharacterCategory cc = charMap.CategoryFor(ch); |
| 1764 | switch (cc) { |
| 1765 | |
| 1766 | // Separator, Line/Paragraph |
| 1767 | case ccZl: |
| 1768 | case ccZp: |
| 1769 | return CharacterClass::newLine; |
| 1770 | |
| 1771 | // Separator, Space |
| 1772 | case ccZs: |
| 1773 | // Other |
| 1774 | case ccCc: |
| 1775 | case ccCf: |
| 1776 | case ccCs: |
| 1777 | case ccCo: |
| 1778 | case ccCn: |
| 1779 | return CharacterClass::space; |
| 1780 | |
| 1781 | // Letter |
| 1782 | case ccLu: |
| 1783 | case ccLl: |
| 1784 | case ccLt: |
| 1785 | case ccLm: |
| 1786 | case ccLo: |
| 1787 | // Number |
| 1788 | case ccNd: |
| 1789 | case ccNl: |
| 1790 | case ccNo: |
| 1791 | // Mark - includes combining diacritics |
| 1792 | case ccMn: |
| 1793 | case ccMc: |
| 1794 | case ccMe: |
| 1795 | return CharacterClass::word; |
| 1796 | |
| 1797 | // Punctuation |
| 1798 | case ccPc: |
| 1799 | case ccPd: |
| 1800 | case ccPs: |
| 1801 | case ccPe: |
| 1802 | case ccPi: |
| 1803 | case ccPf: |
| 1804 | case ccPo: |
| 1805 | // Symbol |
| 1806 | case ccSm: |
| 1807 | case ccSc: |
| 1808 | case ccSk: |
| 1809 | case ccSo: |
| 1810 | return CharacterClass::punctuation; |
| 1811 | |
| 1812 | } |
| 1813 | } else { |
| 1814 | // Asian DBCS |
| 1815 | return CharacterClass::word; |
| 1816 | } |
| 1817 | } |
| 1818 | return charClass.GetClass(static_cast<unsigned char>(ch)); |
| 1819 | } |
| 1820 | |
| 1821 | /** |
| 1822 | * Used by commands that want to select whole words. |
| 1823 | * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0. |
| 1824 | */ |
| 1825 | Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const { |
| 1826 | CharacterClass ccStart = CharacterClass::word; |
| 1827 | if (delta < 0) { |
| 1828 | if (!onlyWordCharacters) { |
| 1829 | const CharacterExtracted ce = CharacterBefore(pos); |
| 1830 | ccStart = WordCharacterClass(ce.character); |
| 1831 | } |
| 1832 | while (pos > 0) { |
| 1833 | const CharacterExtracted ce = CharacterBefore(pos); |
| 1834 | if (WordCharacterClass(ce.character) != ccStart) |
| 1835 | break; |
| 1836 | pos -= ce.widthBytes; |
| 1837 | } |
| 1838 | } else { |
| 1839 | if (!onlyWordCharacters && pos < LengthNoExcept()) { |
| 1840 | const CharacterExtracted ce = CharacterAfter(pos); |
| 1841 | ccStart = WordCharacterClass(ce.character); |
| 1842 | } |
| 1843 | while (pos < LengthNoExcept()) { |
| 1844 | const CharacterExtracted ce = CharacterAfter(pos); |
| 1845 | if (WordCharacterClass(ce.character) != ccStart) |
| 1846 | break; |
| 1847 | pos += ce.widthBytes; |
| 1848 | } |
| 1849 | } |
| 1850 | return MovePositionOutsideChar(pos, delta, true); |
| 1851 | } |
| 1852 | |
| 1853 | /** |
| 1854 | * Find the start of the next word in either a forward (delta >= 0) or backwards direction |
| 1855 | * (delta < 0). |
| 1856 | * This is looking for a transition between character classes although there is also some |
| 1857 | * additional movement to transit white space. |
| 1858 | * Used by cursor movement by word commands. |
| 1859 | */ |
| 1860 | Sci::Position Document::NextWordStart(Sci::Position pos, int delta) const { |
| 1861 | if (delta < 0) { |
| 1862 | while (pos > 0) { |
| 1863 | const CharacterExtracted ce = CharacterBefore(pos); |
| 1864 | if (WordCharacterClass(ce.character) != CharacterClass::space) |
| 1865 | break; |
| 1866 | pos -= ce.widthBytes; |
| 1867 | } |
| 1868 | if (pos > 0) { |
| 1869 | CharacterExtracted ce = CharacterBefore(pos); |
| 1870 | const CharacterClass ccStart = WordCharacterClass(ce.character); |
| 1871 | while (pos > 0) { |
| 1872 | ce = CharacterBefore(pos); |
| 1873 | if (WordCharacterClass(ce.character) != ccStart) |
| 1874 | break; |
| 1875 | pos -= ce.widthBytes; |
| 1876 | } |
| 1877 | } |
| 1878 | } else { |
| 1879 | CharacterExtracted ce = CharacterAfter(pos); |
| 1880 | const CharacterClass ccStart = WordCharacterClass(ce.character); |
| 1881 | while (pos < LengthNoExcept()) { |
| 1882 | ce = CharacterAfter(pos); |
| 1883 | if (WordCharacterClass(ce.character) != ccStart) |
| 1884 | break; |
| 1885 | pos += ce.widthBytes; |
| 1886 | } |
| 1887 | while (pos < LengthNoExcept()) { |
| 1888 | ce = CharacterAfter(pos); |
| 1889 | if (WordCharacterClass(ce.character) != CharacterClass::space) |
| 1890 | break; |
| 1891 | pos += ce.widthBytes; |
| 1892 | } |
| 1893 | } |
| 1894 | return pos; |
| 1895 | } |
| 1896 | |
| 1897 | /** |
| 1898 | * Find the end of the next word in either a forward (delta >= 0) or backwards direction |
| 1899 | * (delta < 0). |
| 1900 | * This is looking for a transition between character classes although there is also some |
| 1901 | * additional movement to transit white space. |
| 1902 | * Used by cursor movement by word commands. |
| 1903 | */ |
| 1904 | Sci::Position Document::NextWordEnd(Sci::Position pos, int delta) const { |
| 1905 | if (delta < 0) { |
| 1906 | if (pos > 0) { |
| 1907 | CharacterExtracted ce = CharacterBefore(pos); |
| 1908 | const CharacterClass ccStart = WordCharacterClass(ce.character); |
| 1909 | if (ccStart != CharacterClass::space) { |
| 1910 | while (pos > 0) { |
| 1911 | ce = CharacterBefore(pos); |
| 1912 | if (WordCharacterClass(ce.character) != ccStart) |
| 1913 | break; |
| 1914 | pos -= ce.widthBytes; |
| 1915 | } |
| 1916 | } |
| 1917 | while (pos > 0) { |
| 1918 | ce = CharacterBefore(pos); |
| 1919 | if (WordCharacterClass(ce.character) != CharacterClass::space) |
| 1920 | break; |
| 1921 | pos -= ce.widthBytes; |
| 1922 | } |
| 1923 | } |
| 1924 | } else { |
| 1925 | while (pos < LengthNoExcept()) { |
| 1926 | const CharacterExtracted ce = CharacterAfter(pos); |
| 1927 | if (WordCharacterClass(ce.character) != CharacterClass::space) |
| 1928 | break; |
| 1929 | pos += ce.widthBytes; |
| 1930 | } |
| 1931 | if (pos < LengthNoExcept()) { |
| 1932 | CharacterExtracted ce = CharacterAfter(pos); |
| 1933 | const CharacterClass ccStart = WordCharacterClass(ce.character); |
| 1934 | while (pos < LengthNoExcept()) { |
| 1935 | ce = CharacterAfter(pos); |
| 1936 | if (WordCharacterClass(ce.character) != ccStart) |
| 1937 | break; |
| 1938 | pos += ce.widthBytes; |
| 1939 | } |
| 1940 | } |
| 1941 | } |
| 1942 | return pos; |
| 1943 | } |
| 1944 | |
| 1945 | namespace { |
| 1946 | |
| 1947 | constexpr bool IsWordEdge(CharacterClass cc, CharacterClass ccNext) noexcept { |
| 1948 | return (cc != ccNext) && |
| 1949 | (cc == CharacterClass::word || cc == CharacterClass::punctuation); |
| 1950 | } |
| 1951 | |
| 1952 | } |
| 1953 | |
| 1954 | /** |
| 1955 | * Check that the character at the given position is a word or punctuation character and that |
| 1956 | * the previous character is of a different character class. |
| 1957 | */ |
| 1958 | bool Document::IsWordStartAt(Sci::Position pos) const { |
| 1959 | if (pos >= LengthNoExcept()) |
| 1960 | return false; |
| 1961 | if (pos >= 0) { |
| 1962 | const CharacterExtracted cePos = CharacterAfter(pos); |
| 1963 | // At start of document, treat as if space before so can be word start |
| 1964 | const CharacterExtracted cePrev = (pos > 0) ? |
| 1965 | CharacterBefore(pos) : CharacterExtracted(' ', 1); |
| 1966 | return IsWordEdge(WordCharacterClass(cePos.character), WordCharacterClass(cePrev.character)); |
| 1967 | } |
| 1968 | return true; |
| 1969 | } |
| 1970 | |
| 1971 | /** |
| 1972 | * Check that the character before the given position is a word or punctuation character and that |
| 1973 | * the next character is of a different character class. |
| 1974 | */ |
| 1975 | bool Document::IsWordEndAt(Sci::Position pos) const { |
| 1976 | if (pos <= 0) |
| 1977 | return false; |
| 1978 | if (pos <= LengthNoExcept()) { |
| 1979 | // At end of document, treat as if space after so can be word end |
| 1980 | const CharacterExtracted cePos = (pos < LengthNoExcept()) ? |
| 1981 | CharacterAfter(pos) : CharacterExtracted(' ', 1); |
| 1982 | const CharacterExtracted cePrev = CharacterBefore(pos); |
| 1983 | return IsWordEdge(WordCharacterClass(cePrev.character), WordCharacterClass(cePos.character)); |
| 1984 | } |
| 1985 | return true; |
| 1986 | } |
| 1987 | |
| 1988 | /** |
| 1989 | * Check that the given range is has transitions between character classes at both |
| 1990 | * ends and where the characters on the inside are word or punctuation characters. |
| 1991 | */ |
| 1992 | bool Document::IsWordAt(Sci::Position start, Sci::Position end) const { |
| 1993 | return (start < end) && IsWordStartAt(start) && IsWordEndAt(end); |
| 1994 | } |
| 1995 | |
| 1996 | bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const { |
| 1997 | return (!word && !wordStart) || |
| 1998 | (word && IsWordAt(pos, pos + length)) || |
| 1999 | (wordStart && IsWordStartAt(pos)); |
| 2000 | } |
| 2001 | |
| 2002 | bool Document::HasCaseFolder() const noexcept { |
| 2003 | return pcf != nullptr; |
| 2004 | } |
| 2005 | |
| 2006 | void Document::SetCaseFolder(std::unique_ptr<CaseFolder> pcf_) noexcept { |
| 2007 | pcf = std::move(pcf_); |
| 2008 | } |
| 2009 | |
| 2010 | Document::CharacterExtracted Document::(Sci::Position position) const noexcept { |
| 2011 | const unsigned char leadByte = cb.UCharAt(position); |
| 2012 | if (UTF8IsAscii(leadByte)) { |
| 2013 | // Common case: ASCII character |
| 2014 | return CharacterExtracted(leadByte, 1); |
| 2015 | } |
| 2016 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
| 2017 | unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; |
| 2018 | for (int b=1; b<widthCharBytes; b++) |
| 2019 | charBytes[b] = cb.UCharAt(position + b); |
| 2020 | const int utf8status = UTF8Classify(charBytes, widthCharBytes); |
| 2021 | if (utf8status & UTF8MaskInvalid) { |
| 2022 | // Treat as invalid and use up just one byte |
| 2023 | return CharacterExtracted(unicodeReplacementChar, 1); |
| 2024 | } else { |
| 2025 | return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); |
| 2026 | } |
| 2027 | } |
| 2028 | |
| 2029 | namespace { |
| 2030 | |
| 2031 | // Equivalent of memchr over the split view |
| 2032 | ptrdiff_t SplitFindChar(const SplitView &view, size_t start, size_t length, int ch) noexcept { |
| 2033 | size_t range1Length = 0; |
| 2034 | if (start < view.length1) { |
| 2035 | range1Length = std::min(length, view.length1 - start); |
| 2036 | const char *match = static_cast<const char *>(memchr(view.segment1 + start, ch, range1Length)); |
| 2037 | if (match) { |
| 2038 | return match - view.segment1; |
| 2039 | } |
| 2040 | start += range1Length; |
| 2041 | } |
| 2042 | const char *match2 = static_cast<const char *>(memchr(view.segment2 + start, ch, length - range1Length)); |
| 2043 | if (match2) { |
| 2044 | return match2 - view.segment2; |
| 2045 | } |
| 2046 | return -1; |
| 2047 | } |
| 2048 | |
| 2049 | // Equivalent of memcmp over the split view |
| 2050 | // This does not call memcmp as search texts are commonly too short to overcome the |
| 2051 | // call overhead. |
| 2052 | bool SplitMatch(const SplitView &view, size_t start, std::string_view text) noexcept { |
| 2053 | for (size_t i = 0; i < text.length(); i++) { |
| 2054 | if (view.CharAt(i + start) != text[i]) { |
| 2055 | return false; |
| 2056 | } |
| 2057 | } |
| 2058 | return true; |
| 2059 | } |
| 2060 | |
| 2061 | } |
| 2062 | |
| 2063 | /** |
| 2064 | * Find text in document, supporting both forward and backward |
| 2065 | * searches (just pass minPos > maxPos to do a backward search) |
| 2066 | * Has not been tested with backwards DBCS searches yet. |
| 2067 | */ |
| 2068 | Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, const char *search, |
| 2069 | FindOption flags, Sci::Position *length) { |
| 2070 | if (*length <= 0) |
| 2071 | return minPos; |
| 2072 | const bool caseSensitive = FlagSet(flags, FindOption::MatchCase); |
| 2073 | const bool word = FlagSet(flags, FindOption::WholeWord); |
| 2074 | const bool wordStart = FlagSet(flags, FindOption::WordStart); |
| 2075 | const bool regExp = FlagSet(flags, FindOption::RegExp); |
| 2076 | if (regExp) { |
| 2077 | if (!regex) |
| 2078 | regex = std::unique_ptr<RegexSearchBase>(CreateRegexSearch(&charClass)); |
| 2079 | return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length); |
| 2080 | } else { |
| 2081 | |
| 2082 | const bool forward = minPos <= maxPos; |
| 2083 | const int increment = forward ? 1 : -1; |
| 2084 | |
| 2085 | // Range endpoints should not be inside DBCS characters, but just in case, move them. |
| 2086 | const Sci::Position startPos = MovePositionOutsideChar(minPos, increment, false); |
| 2087 | const Sci::Position endPos = MovePositionOutsideChar(maxPos, increment, false); |
| 2088 | |
| 2089 | // Compute actual search ranges needed |
| 2090 | const Sci::Position lengthFind = *length; |
| 2091 | |
| 2092 | //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind); |
| 2093 | const Sci::Position limitPos = std::max(startPos, endPos); |
| 2094 | Sci::Position pos = startPos; |
| 2095 | if (!forward) { |
| 2096 | // Back all of a character |
| 2097 | pos = NextPosition(pos, increment); |
| 2098 | } |
| 2099 | const SplitView cbView = cb.AllView(); |
| 2100 | if (caseSensitive) { |
| 2101 | const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos; |
| 2102 | const unsigned char charStartSearch = search[0]; |
| 2103 | if (forward && ((0 == dbcsCodePage) || (CpUtf8 == dbcsCodePage && !UTF8IsTrailByte(charStartSearch)))) { |
| 2104 | // This is a fast case where there is no need to test byte values to iterate |
| 2105 | // so becomes the equivalent of a memchr+memcmp loop. |
| 2106 | // UTF-8 search will not be self-synchronizing when starts with trail byte |
| 2107 | const std::string_view suffix(search + 1, lengthFind - 1); |
| 2108 | while (pos < endSearch) { |
| 2109 | pos = SplitFindChar(cbView, pos, limitPos - pos, charStartSearch); |
| 2110 | if (pos < 0) { |
| 2111 | break; |
| 2112 | } |
| 2113 | if (SplitMatch(cbView, pos + 1, suffix) && MatchesWordOptions(word, wordStart, pos, lengthFind)) { |
| 2114 | return pos; |
| 2115 | } |
| 2116 | pos++; |
| 2117 | } |
| 2118 | } else { |
| 2119 | while (forward ? (pos < endSearch) : (pos >= endSearch)) { |
| 2120 | const unsigned char leadByte = cbView.CharAt(pos); |
| 2121 | if (leadByte == charStartSearch) { |
| 2122 | bool found = (pos + lengthFind) <= limitPos; |
| 2123 | // SplitMatch could be called here but it is slower with g++ -O2 |
| 2124 | for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) { |
| 2125 | found = cbView.CharAt(pos + indexSearch) == search[indexSearch]; |
| 2126 | } |
| 2127 | if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) { |
| 2128 | return pos; |
| 2129 | } |
| 2130 | } |
| 2131 | if (forward && UTF8IsAscii(leadByte)) { |
| 2132 | pos++; |
| 2133 | } else { |
| 2134 | if (dbcsCodePage) { |
| 2135 | if (!NextCharacter(pos, increment)) { |
| 2136 | break; |
| 2137 | } |
| 2138 | } else { |
| 2139 | pos += increment; |
| 2140 | } |
| 2141 | } |
| 2142 | } |
| 2143 | } |
| 2144 | } else if (CpUtf8 == dbcsCodePage) { |
| 2145 | constexpr size_t maxFoldingExpansion = 4; |
| 2146 | std::vector<char> searchThing((lengthFind+1) * UTF8MaxBytes * maxFoldingExpansion + 1); |
| 2147 | const size_t lenSearch = |
| 2148 | pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); |
| 2149 | while (forward ? (pos < endPos) : (pos >= endPos)) { |
| 2150 | int widthFirstCharacter = 0; |
| 2151 | Sci::Position posIndexDocument = pos; |
| 2152 | size_t indexSearch = 0; |
| 2153 | bool characterMatches = true; |
| 2154 | for (;;) { |
| 2155 | const unsigned char leadByte = cbView.CharAt(posIndexDocument); |
| 2156 | char bytes[UTF8MaxBytes + 1]; |
| 2157 | int widthChar = 1; |
| 2158 | if (!UTF8IsAscii(leadByte)) { |
| 2159 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
| 2160 | bytes[0] = leadByte; |
| 2161 | for (int b=1; b<widthCharBytes; b++) { |
| 2162 | bytes[b] = cbView.CharAt(posIndexDocument+b); |
| 2163 | } |
| 2164 | widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth; |
| 2165 | } |
| 2166 | if (!widthFirstCharacter) { |
| 2167 | widthFirstCharacter = widthChar; |
| 2168 | } |
| 2169 | if ((posIndexDocument + widthChar) > limitPos) { |
| 2170 | break; |
| 2171 | } |
| 2172 | size_t lenFlat = 1; |
| 2173 | if (widthChar == 1) { |
| 2174 | characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte); |
| 2175 | } else { |
| 2176 | char folded[UTF8MaxBytes * maxFoldingExpansion + 1]; |
| 2177 | lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar); |
| 2178 | // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing |
| 2179 | assert((indexSearch + lenFlat) <= searchThing.size()); |
| 2180 | // Does folded match the buffer |
| 2181 | characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat); |
| 2182 | } |
| 2183 | if (!characterMatches) { |
| 2184 | break; |
| 2185 | } |
| 2186 | posIndexDocument += widthChar; |
| 2187 | indexSearch += lenFlat; |
| 2188 | if (indexSearch >= lenSearch) { |
| 2189 | break; |
| 2190 | } |
| 2191 | } |
| 2192 | if (characterMatches && (indexSearch == lenSearch)) { |
| 2193 | if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) { |
| 2194 | *length = posIndexDocument - pos; |
| 2195 | return pos; |
| 2196 | } |
| 2197 | } |
| 2198 | if (forward) { |
| 2199 | pos += widthFirstCharacter; |
| 2200 | } else { |
| 2201 | if (!NextCharacter(pos, increment)) { |
| 2202 | break; |
| 2203 | } |
| 2204 | } |
| 2205 | } |
| 2206 | } else if (dbcsCodePage) { |
| 2207 | constexpr size_t maxBytesCharacter = 2; |
| 2208 | constexpr size_t maxFoldingExpansion = 4; |
| 2209 | std::vector<char> searchThing((lengthFind+1) * maxBytesCharacter * maxFoldingExpansion + 1); |
| 2210 | const size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); |
| 2211 | while (forward ? (pos < endPos) : (pos >= endPos)) { |
| 2212 | int widthFirstCharacter = 0; |
| 2213 | Sci::Position indexDocument = 0; |
| 2214 | size_t indexSearch = 0; |
| 2215 | bool characterMatches = true; |
| 2216 | while (((pos + indexDocument) < limitPos) && |
| 2217 | (indexSearch < lenSearch)) { |
| 2218 | const unsigned char leadByte = cbView.CharAt(pos + indexDocument); |
| 2219 | const int widthChar = (!UTF8IsAscii(leadByte) && IsDBCSLeadByteNoExcept(leadByte)) ? 2 : 1; |
| 2220 | if (!widthFirstCharacter) { |
| 2221 | widthFirstCharacter = widthChar; |
| 2222 | } |
| 2223 | if ((pos + indexDocument + widthChar) > limitPos) { |
| 2224 | break; |
| 2225 | } |
| 2226 | size_t lenFlat = 1; |
| 2227 | if (widthChar == 1) { |
| 2228 | characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte); |
| 2229 | } else { |
| 2230 | char bytes[maxBytesCharacter + 1]; |
| 2231 | bytes[0] = leadByte; |
| 2232 | bytes[1] = cbView.CharAt(pos + indexDocument + 1); |
| 2233 | char folded[maxBytesCharacter * maxFoldingExpansion + 1]; |
| 2234 | lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar); |
| 2235 | // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing |
| 2236 | assert((indexSearch + lenFlat) <= searchThing.size()); |
| 2237 | // Does folded match the buffer |
| 2238 | characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat); |
| 2239 | } |
| 2240 | if (!characterMatches) { |
| 2241 | break; |
| 2242 | } |
| 2243 | indexDocument += widthChar; |
| 2244 | indexSearch += lenFlat; |
| 2245 | } |
| 2246 | if (characterMatches && (indexSearch == lenSearch)) { |
| 2247 | if (MatchesWordOptions(word, wordStart, pos, indexDocument)) { |
| 2248 | *length = indexDocument; |
| 2249 | return pos; |
| 2250 | } |
| 2251 | } |
| 2252 | if (forward) { |
| 2253 | pos += widthFirstCharacter; |
| 2254 | } else { |
| 2255 | if (!NextCharacter(pos, increment)) { |
| 2256 | break; |
| 2257 | } |
| 2258 | } |
| 2259 | } |
| 2260 | } else { |
| 2261 | const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos; |
| 2262 | std::vector<char> searchThing(lengthFind + 1); |
| 2263 | pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); |
| 2264 | while (forward ? (pos < endSearch) : (pos >= endSearch)) { |
| 2265 | bool found = (pos + lengthFind) <= limitPos; |
| 2266 | for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) { |
| 2267 | const char ch = cbView.CharAt(pos + indexSearch); |
| 2268 | const char chTest = searchThing[indexSearch]; |
| 2269 | if (UTF8IsAscii(ch)) { |
| 2270 | found = chTest == MakeLowerCase(ch); |
| 2271 | } else { |
| 2272 | char folded[2]; |
| 2273 | pcf->Fold(folded, sizeof(folded), &ch, 1); |
| 2274 | found = folded[0] == chTest; |
| 2275 | } |
| 2276 | } |
| 2277 | if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) { |
| 2278 | return pos; |
| 2279 | } |
| 2280 | pos += increment; |
| 2281 | } |
| 2282 | } |
| 2283 | } |
| 2284 | //Platform::DebugPrintf("Not found\n"); |
| 2285 | return -1; |
| 2286 | } |
| 2287 | |
| 2288 | const char *Document::SubstituteByPosition(const char *text, Sci::Position *length) { |
| 2289 | if (regex) |
| 2290 | return regex->SubstituteByPosition(this, text, length); |
| 2291 | else |
| 2292 | return nullptr; |
| 2293 | } |
| 2294 | |
| 2295 | LineCharacterIndexType Document::LineCharacterIndex() const noexcept { |
| 2296 | return cb.LineCharacterIndex(); |
| 2297 | } |
| 2298 | |
| 2299 | void Document::AllocateLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) { |
| 2300 | return cb.AllocateLineCharacterIndex(lineCharacterIndex); |
| 2301 | } |
| 2302 | |
| 2303 | void Document::ReleaseLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) { |
| 2304 | return cb.ReleaseLineCharacterIndex(lineCharacterIndex); |
| 2305 | } |
| 2306 | |
| 2307 | Sci::Line Document::LinesTotal() const noexcept { |
| 2308 | return cb.Lines(); |
| 2309 | } |
| 2310 | |
| 2311 | void Document::AllocateLines(Sci::Line lines) { |
| 2312 | cb.AllocateLines(lines); |
| 2313 | } |
| 2314 | |
| 2315 | void Document::SetDefaultCharClasses(bool includeWordClass) { |
| 2316 | charClass.SetDefaultCharClasses(includeWordClass); |
| 2317 | } |
| 2318 | |
| 2319 | void Document::SetCharClasses(const unsigned char *chars, CharacterClass newCharClass) { |
| 2320 | charClass.SetCharClasses(chars, newCharClass); |
| 2321 | } |
| 2322 | |
| 2323 | int Document::GetCharsOfClass(CharacterClass characterClass, unsigned char *buffer) const { |
| 2324 | return charClass.GetCharsOfClass(characterClass, buffer); |
| 2325 | } |
| 2326 | |
| 2327 | void Document::SetCharacterCategoryOptimization(int countCharacters) { |
| 2328 | charMap.Optimize(countCharacters); |
| 2329 | } |
| 2330 | |
| 2331 | int Document::CharacterCategoryOptimization() const noexcept { |
| 2332 | return charMap.Size(); |
| 2333 | } |
| 2334 | |
| 2335 | void SCI_METHOD Document::StartStyling(Sci_Position position) { |
| 2336 | endStyled = position; |
| 2337 | } |
| 2338 | |
| 2339 | bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) { |
| 2340 | if (enteredStyling != 0) { |
| 2341 | return false; |
| 2342 | } else { |
| 2343 | enteredStyling++; |
| 2344 | const Sci::Position prevEndStyled = endStyled; |
| 2345 | if (cb.SetStyleFor(endStyled, length, style)) { |
| 2346 | const DocModification mh(ModificationFlags::ChangeStyle | ModificationFlags::User, |
| 2347 | prevEndStyled, length); |
| 2348 | NotifyModified(mh); |
| 2349 | } |
| 2350 | endStyled += length; |
| 2351 | enteredStyling--; |
| 2352 | return true; |
| 2353 | } |
| 2354 | } |
| 2355 | |
| 2356 | bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) { |
| 2357 | if (enteredStyling != 0) { |
| 2358 | return false; |
| 2359 | } else { |
| 2360 | enteredStyling++; |
| 2361 | bool didChange = false; |
| 2362 | Sci::Position startMod = 0; |
| 2363 | Sci::Position endMod = 0; |
| 2364 | for (int iPos = 0; iPos < length; iPos++, endStyled++) { |
| 2365 | PLATFORM_ASSERT(endStyled < Length()); |
| 2366 | if (cb.SetStyleAt(endStyled, styles[iPos])) { |
| 2367 | if (!didChange) { |
| 2368 | startMod = endStyled; |
| 2369 | } |
| 2370 | didChange = true; |
| 2371 | endMod = endStyled; |
| 2372 | } |
| 2373 | } |
| 2374 | if (didChange) { |
| 2375 | const DocModification mh(ModificationFlags::ChangeStyle | ModificationFlags::User, |
| 2376 | startMod, endMod - startMod + 1); |
| 2377 | NotifyModified(mh); |
| 2378 | } |
| 2379 | enteredStyling--; |
| 2380 | return true; |
| 2381 | } |
| 2382 | } |
| 2383 | |
| 2384 | void Document::EnsureStyledTo(Sci::Position pos) { |
| 2385 | if ((enteredStyling == 0) && (pos > GetEndStyled())) { |
| 2386 | IncrementStyleClock(); |
| 2387 | if (pli && !pli->UseContainerLexing()) { |
| 2388 | const Sci::Line lineEndStyled = SciLineFromPosition(GetEndStyled()); |
| 2389 | const Sci::Position endStyledTo = LineStart(lineEndStyled); |
| 2390 | pli->Colourise(endStyledTo, pos); |
| 2391 | } else { |
| 2392 | // Ask the watchers to style, and stop as soon as one responds. |
| 2393 | for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); |
| 2394 | (pos > GetEndStyled()) && (it != watchers.end()); ++it) { |
| 2395 | it->watcher->NotifyStyleNeeded(this, it->userData, pos); |
| 2396 | } |
| 2397 | } |
| 2398 | } |
| 2399 | } |
| 2400 | |
| 2401 | void Document::StyleToAdjustingLineDuration(Sci::Position pos) { |
| 2402 | const Sci::Position stylingStart = GetEndStyled(); |
| 2403 | ElapsedPeriod epStyling; |
| 2404 | EnsureStyledTo(pos); |
| 2405 | durationStyleOneByte.AddSample(pos - stylingStart, epStyling.Duration()); |
| 2406 | } |
| 2407 | |
| 2408 | void Document::LexerChanged() { |
| 2409 | // Tell the watchers the lexer has changed. |
| 2410 | for (const WatcherWithUserData &watcher : watchers) { |
| 2411 | watcher.watcher->NotifyLexerChanged(this, watcher.userData); |
| 2412 | } |
| 2413 | } |
| 2414 | |
| 2415 | LexInterface *Document::GetLexInterface() const noexcept { |
| 2416 | return pli.get(); |
| 2417 | } |
| 2418 | |
| 2419 | void Document::SetLexInterface(std::unique_ptr<LexInterface> pLexInterface) noexcept { |
| 2420 | pli = std::move(pLexInterface); |
| 2421 | } |
| 2422 | |
| 2423 | int SCI_METHOD Document::SetLineState(Sci_Position line, int state) { |
| 2424 | const int statePrevious = States()->SetLineState(line, state); |
| 2425 | if (state != statePrevious) { |
| 2426 | const DocModification mh(ModificationFlags::ChangeLineState, LineStart(line), 0, 0, nullptr, |
| 2427 | static_cast<Sci::Line>(line)); |
| 2428 | NotifyModified(mh); |
| 2429 | } |
| 2430 | return statePrevious; |
| 2431 | } |
| 2432 | |
| 2433 | int SCI_METHOD Document::GetLineState(Sci_Position line) const { |
| 2434 | return States()->GetLineState(line); |
| 2435 | } |
| 2436 | |
| 2437 | Sci::Line Document::GetMaxLineState() const noexcept { |
| 2438 | return States()->GetMaxLineState(); |
| 2439 | } |
| 2440 | |
| 2441 | void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) { |
| 2442 | const DocModification mh(ModificationFlags::LexerState, start, |
| 2443 | end-start, 0, nullptr, 0); |
| 2444 | NotifyModified(mh); |
| 2445 | } |
| 2446 | |
| 2447 | StyledText Document::MarginStyledText(Sci::Line line) const noexcept { |
| 2448 | const LineAnnotation *pla = Margins(); |
| 2449 | return StyledText(pla->Length(line), pla->Text(line), |
| 2450 | pla->MultipleStyles(line), pla->Style(line), pla->Styles(line)); |
| 2451 | } |
| 2452 | |
| 2453 | void Document::MarginSetText(Sci::Line line, const char *text) { |
| 2454 | Margins()->SetText(line, text); |
| 2455 | const DocModification mh(ModificationFlags::ChangeMargin, LineStart(line), |
| 2456 | 0, 0, nullptr, line); |
| 2457 | NotifyModified(mh); |
| 2458 | } |
| 2459 | |
| 2460 | void Document::MarginSetStyle(Sci::Line line, int style) { |
| 2461 | Margins()->SetStyle(line, style); |
| 2462 | NotifyModified(DocModification(ModificationFlags::ChangeMargin, LineStart(line), |
| 2463 | 0, 0, nullptr, line)); |
| 2464 | } |
| 2465 | |
| 2466 | void Document::MarginSetStyles(Sci::Line line, const unsigned char *styles) { |
| 2467 | Margins()->SetStyles(line, styles); |
| 2468 | NotifyModified(DocModification(ModificationFlags::ChangeMargin, LineStart(line), |
| 2469 | 0, 0, nullptr, line)); |
| 2470 | } |
| 2471 | |
| 2472 | void Document::MarginClearAll() { |
| 2473 | const Sci::Line maxEditorLine = LinesTotal(); |
| 2474 | for (Sci::Line l=0; l<maxEditorLine; l++) |
| 2475 | MarginSetText(l, nullptr); |
| 2476 | // Free remaining data |
| 2477 | Margins()->ClearAll(); |
| 2478 | } |
| 2479 | |
| 2480 | StyledText Document::AnnotationStyledText(Sci::Line line) const noexcept { |
| 2481 | const LineAnnotation *pla = Annotations(); |
| 2482 | return StyledText(pla->Length(line), pla->Text(line), |
| 2483 | pla->MultipleStyles(line), pla->Style(line), pla->Styles(line)); |
| 2484 | } |
| 2485 | |
| 2486 | void Document::AnnotationSetText(Sci::Line line, const char *text) { |
| 2487 | if (line >= 0 && line < LinesTotal()) { |
| 2488 | const Sci::Line linesBefore = AnnotationLines(line); |
| 2489 | Annotations()->SetText(line, text); |
| 2490 | const int linesAfter = AnnotationLines(line); |
| 2491 | DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line), |
| 2492 | 0, 0, nullptr, line); |
| 2493 | mh.annotationLinesAdded = linesAfter - linesBefore; |
| 2494 | NotifyModified(mh); |
| 2495 | } |
| 2496 | } |
| 2497 | |
| 2498 | void Document::AnnotationSetStyle(Sci::Line line, int style) { |
| 2499 | if (line >= 0 && line < LinesTotal()) { |
| 2500 | Annotations()->SetStyle(line, style); |
| 2501 | const DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line), |
| 2502 | 0, 0, nullptr, line); |
| 2503 | NotifyModified(mh); |
| 2504 | } |
| 2505 | } |
| 2506 | |
| 2507 | void Document::AnnotationSetStyles(Sci::Line line, const unsigned char *styles) { |
| 2508 | if (line >= 0 && line < LinesTotal()) { |
| 2509 | Annotations()->SetStyles(line, styles); |
| 2510 | } |
| 2511 | } |
| 2512 | |
| 2513 | int Document::AnnotationLines(Sci::Line line) const noexcept { |
| 2514 | return Annotations()->Lines(line); |
| 2515 | } |
| 2516 | |
| 2517 | void Document::AnnotationClearAll() { |
| 2518 | const Sci::Line maxEditorLine = LinesTotal(); |
| 2519 | for (Sci::Line l=0; l<maxEditorLine; l++) |
| 2520 | AnnotationSetText(l, nullptr); |
| 2521 | // Free remaining data |
| 2522 | Annotations()->ClearAll(); |
| 2523 | } |
| 2524 | |
| 2525 | StyledText Document::EOLAnnotationStyledText(Sci::Line line) const noexcept { |
| 2526 | const LineAnnotation *pla = EOLAnnotations(); |
| 2527 | return StyledText(pla->Length(line), pla->Text(line), |
| 2528 | pla->MultipleStyles(line), pla->Style(line), pla->Styles(line)); |
| 2529 | } |
| 2530 | |
| 2531 | void Document::EOLAnnotationSetText(Sci::Line line, const char *text) { |
| 2532 | if (line >= 0 && line < LinesTotal()) { |
| 2533 | EOLAnnotations()->SetText(line, text); |
| 2534 | const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line), |
| 2535 | 0, 0, nullptr, line); |
| 2536 | NotifyModified(mh); |
| 2537 | } |
| 2538 | } |
| 2539 | |
| 2540 | void Document::EOLAnnotationSetStyle(Sci::Line line, int style) { |
| 2541 | if (line >= 0 && line < LinesTotal()) { |
| 2542 | EOLAnnotations()->SetStyle(line, style); |
| 2543 | const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line), |
| 2544 | 0, 0, nullptr, line); |
| 2545 | NotifyModified(mh); |
| 2546 | } |
| 2547 | } |
| 2548 | |
| 2549 | void Document::EOLAnnotationClearAll() { |
| 2550 | const Sci::Line maxEditorLine = LinesTotal(); |
| 2551 | for (Sci::Line l=0; l<maxEditorLine; l++) |
| 2552 | EOLAnnotationSetText(l, nullptr); |
| 2553 | // Free remaining data |
| 2554 | EOLAnnotations()->ClearAll(); |
| 2555 | } |
| 2556 | |
| 2557 | void Document::IncrementStyleClock() noexcept { |
| 2558 | styleClock = (styleClock + 1) % 0x100000; |
| 2559 | } |
| 2560 | |
| 2561 | void SCI_METHOD Document::DecorationSetCurrentIndicator(int indicator) { |
| 2562 | decorations->SetCurrentIndicator(indicator); |
| 2563 | } |
| 2564 | |
| 2565 | void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) { |
| 2566 | const FillResult<Sci::Position> fr = decorations->FillRange( |
| 2567 | position, value, fillLength); |
| 2568 | if (fr.changed) { |
| 2569 | const DocModification mh(ModificationFlags::ChangeIndicator | ModificationFlags::User, |
| 2570 | fr.position, fr.fillLength); |
| 2571 | NotifyModified(mh); |
| 2572 | } |
| 2573 | } |
| 2574 | |
| 2575 | bool Document::AddWatcher(DocWatcher *watcher, void *userData) { |
| 2576 | const WatcherWithUserData wwud(watcher, userData); |
| 2577 | std::vector<WatcherWithUserData>::iterator it = |
| 2578 | std::find(watchers.begin(), watchers.end(), wwud); |
| 2579 | if (it != watchers.end()) |
| 2580 | return false; |
| 2581 | watchers.push_back(wwud); |
| 2582 | return true; |
| 2583 | } |
| 2584 | |
| 2585 | bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) noexcept { |
| 2586 | try { |
| 2587 | // This can never fail as WatcherWithUserData constructor and == are noexcept |
| 2588 | // but std::find is not noexcept. |
| 2589 | std::vector<WatcherWithUserData>::iterator it = |
| 2590 | std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData)); |
| 2591 | if (it != watchers.end()) { |
| 2592 | watchers.erase(it); |
| 2593 | return true; |
| 2594 | } |
| 2595 | } catch (...) { |
| 2596 | // Ignore any exception |
| 2597 | } |
| 2598 | return false; |
| 2599 | } |
| 2600 | |
| 2601 | void Document::NotifyModifyAttempt() { |
| 2602 | for (const WatcherWithUserData &watcher : watchers) { |
| 2603 | watcher.watcher->NotifyModifyAttempt(this, watcher.userData); |
| 2604 | } |
| 2605 | } |
| 2606 | |
| 2607 | void Document::NotifySavePoint(bool atSavePoint) { |
| 2608 | for (const WatcherWithUserData &watcher : watchers) { |
| 2609 | watcher.watcher->NotifySavePoint(this, watcher.userData, atSavePoint); |
| 2610 | } |
| 2611 | } |
| 2612 | |
| 2613 | void Document::NotifyModified(DocModification mh) { |
| 2614 | if (FlagSet(mh.modificationType, ModificationFlags::InsertText)) { |
| 2615 | decorations->InsertSpace(mh.position, mh.length); |
| 2616 | } else if (FlagSet(mh.modificationType, ModificationFlags::DeleteText)) { |
| 2617 | decorations->DeleteRange(mh.position, mh.length); |
| 2618 | } |
| 2619 | for (const WatcherWithUserData &watcher : watchers) { |
| 2620 | watcher.watcher->NotifyModified(this, mh, watcher.userData); |
| 2621 | } |
| 2622 | } |
| 2623 | |
| 2624 | bool Document::IsWordPartSeparator(unsigned int ch) const { |
| 2625 | return (WordCharacterClass(ch) == CharacterClass::word) && IsPunctuation(ch); |
| 2626 | } |
| 2627 | |
| 2628 | Sci::Position Document::WordPartLeft(Sci::Position pos) const { |
| 2629 | if (pos > 0) { |
| 2630 | pos -= CharacterBefore(pos).widthBytes; |
| 2631 | CharacterExtracted ceStart = CharacterAfter(pos); |
| 2632 | if (IsWordPartSeparator(ceStart.character)) { |
| 2633 | while (pos > 0 && IsWordPartSeparator(CharacterAfter(pos).character)) { |
| 2634 | pos -= CharacterBefore(pos).widthBytes; |
| 2635 | } |
| 2636 | } |
| 2637 | if (pos > 0) { |
| 2638 | ceStart = CharacterAfter(pos); |
| 2639 | pos -= CharacterBefore(pos).widthBytes; |
| 2640 | if (IsLowerCase(ceStart.character)) { |
| 2641 | while (pos > 0 && IsLowerCase(CharacterAfter(pos).character)) |
| 2642 | pos -= CharacterBefore(pos).widthBytes; |
| 2643 | if (!IsUpperCase(CharacterAfter(pos).character) && !IsLowerCase(CharacterAfter(pos).character)) |
| 2644 | pos += CharacterAfter(pos).widthBytes; |
| 2645 | } else if (IsUpperCase(ceStart.character)) { |
| 2646 | while (pos > 0 && IsUpperCase(CharacterAfter(pos).character)) |
| 2647 | pos -= CharacterBefore(pos).widthBytes; |
| 2648 | if (!IsUpperCase(CharacterAfter(pos).character)) |
| 2649 | pos += CharacterAfter(pos).widthBytes; |
| 2650 | } else if (IsADigit(ceStart.character)) { |
| 2651 | while (pos > 0 && IsADigit(CharacterAfter(pos).character)) |
| 2652 | pos -= CharacterBefore(pos).widthBytes; |
| 2653 | if (!IsADigit(CharacterAfter(pos).character)) |
| 2654 | pos += CharacterAfter(pos).widthBytes; |
| 2655 | } else if (IsPunctuation(ceStart.character)) { |
| 2656 | while (pos > 0 && IsPunctuation(CharacterAfter(pos).character)) |
| 2657 | pos -= CharacterBefore(pos).widthBytes; |
| 2658 | if (!IsPunctuation(CharacterAfter(pos).character)) |
| 2659 | pos += CharacterAfter(pos).widthBytes; |
| 2660 | } else if (IsASpace(ceStart.character)) { |
| 2661 | while (pos > 0 && IsASpace(CharacterAfter(pos).character)) |
| 2662 | pos -= CharacterBefore(pos).widthBytes; |
| 2663 | if (!IsASpace(CharacterAfter(pos).character)) |
| 2664 | pos += CharacterAfter(pos).widthBytes; |
| 2665 | } else if (!IsASCII(ceStart.character)) { |
| 2666 | while (pos > 0 && !IsASCII(CharacterAfter(pos).character)) |
| 2667 | pos -= CharacterBefore(pos).widthBytes; |
| 2668 | if (IsASCII(CharacterAfter(pos).character)) |
| 2669 | pos += CharacterAfter(pos).widthBytes; |
| 2670 | } else { |
| 2671 | pos += CharacterAfter(pos).widthBytes; |
| 2672 | } |
| 2673 | } |
| 2674 | } |
| 2675 | return pos; |
| 2676 | } |
| 2677 | |
| 2678 | Sci::Position Document::WordPartRight(Sci::Position pos) const { |
| 2679 | CharacterExtracted ceStart = CharacterAfter(pos); |
| 2680 | const Sci::Position length = LengthNoExcept(); |
| 2681 | if (IsWordPartSeparator(ceStart.character)) { |
| 2682 | while (pos < length && IsWordPartSeparator(CharacterAfter(pos).character)) |
| 2683 | pos += CharacterAfter(pos).widthBytes; |
| 2684 | ceStart = CharacterAfter(pos); |
| 2685 | } |
| 2686 | if (!IsASCII(ceStart.character)) { |
| 2687 | while (pos < length && !IsASCII(CharacterAfter(pos).character)) |
| 2688 | pos += CharacterAfter(pos).widthBytes; |
| 2689 | } else if (IsLowerCase(ceStart.character)) { |
| 2690 | while (pos < length && IsLowerCase(CharacterAfter(pos).character)) |
| 2691 | pos += CharacterAfter(pos).widthBytes; |
| 2692 | } else if (IsUpperCase(ceStart.character)) { |
| 2693 | if (IsLowerCase(CharacterAfter(pos + ceStart.widthBytes).character)) { |
| 2694 | pos += CharacterAfter(pos).widthBytes; |
| 2695 | while (pos < length && IsLowerCase(CharacterAfter(pos).character)) |
| 2696 | pos += CharacterAfter(pos).widthBytes; |
| 2697 | } else { |
| 2698 | while (pos < length && IsUpperCase(CharacterAfter(pos).character)) |
| 2699 | pos += CharacterAfter(pos).widthBytes; |
| 2700 | } |
| 2701 | if (IsLowerCase(CharacterAfter(pos).character) && IsUpperCase(CharacterBefore(pos).character)) |
| 2702 | pos -= CharacterBefore(pos).widthBytes; |
| 2703 | } else if (IsADigit(ceStart.character)) { |
| 2704 | while (pos < length && IsADigit(CharacterAfter(pos).character)) |
| 2705 | pos += CharacterAfter(pos).widthBytes; |
| 2706 | } else if (IsPunctuation(ceStart.character)) { |
| 2707 | while (pos < length && IsPunctuation(CharacterAfter(pos).character)) |
| 2708 | pos += CharacterAfter(pos).widthBytes; |
| 2709 | } else if (IsASpace(ceStart.character)) { |
| 2710 | while (pos < length && IsASpace(CharacterAfter(pos).character)) |
| 2711 | pos += CharacterAfter(pos).widthBytes; |
| 2712 | } else { |
| 2713 | pos += CharacterAfter(pos).widthBytes; |
| 2714 | } |
| 2715 | return pos; |
| 2716 | } |
| 2717 | |
| 2718 | Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) noexcept { |
| 2719 | const char sStart = cb.StyleAt(pos); |
| 2720 | if (delta < 0) { |
| 2721 | while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos)))) |
| 2722 | pos--; |
| 2723 | pos++; |
| 2724 | } else { |
| 2725 | while (pos < (LengthNoExcept()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos)))) |
| 2726 | pos++; |
| 2727 | } |
| 2728 | return pos; |
| 2729 | } |
| 2730 | |
| 2731 | static char BraceOpposite(char ch) noexcept { |
| 2732 | switch (ch) { |
| 2733 | case '(': |
| 2734 | return ')'; |
| 2735 | case ')': |
| 2736 | return '('; |
| 2737 | case '[': |
| 2738 | return ']'; |
| 2739 | case ']': |
| 2740 | return '['; |
| 2741 | case '{': |
| 2742 | return '}'; |
| 2743 | case '}': |
| 2744 | return '{'; |
| 2745 | case '<': |
| 2746 | return '>'; |
| 2747 | case '>': |
| 2748 | return '<'; |
| 2749 | default: |
| 2750 | return '\0'; |
| 2751 | } |
| 2752 | } |
| 2753 | |
| 2754 | // TODO: should be able to extend styled region to find matching brace |
| 2755 | Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /*maxReStyle*/, Sci::Position startPos, bool useStartPos) noexcept { |
| 2756 | const char chBrace = CharAt(position); |
| 2757 | const char chSeek = BraceOpposite(chBrace); |
| 2758 | if (chSeek == '\0') |
| 2759 | return - 1; |
| 2760 | const int styBrace = StyleIndexAt(position); |
| 2761 | int direction = -1; |
| 2762 | if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<') |
| 2763 | direction = 1; |
| 2764 | int depth = 1; |
| 2765 | position = useStartPos ? startPos : NextPosition(position, direction); |
| 2766 | while ((position >= 0) && (position < LengthNoExcept())) { |
| 2767 | const char chAtPos = CharAt(position); |
| 2768 | const int styAtPos = StyleIndexAt(position); |
| 2769 | if ((position > GetEndStyled()) || (styAtPos == styBrace)) { |
| 2770 | if (chAtPos == chBrace) |
| 2771 | depth++; |
| 2772 | if (chAtPos == chSeek) |
| 2773 | depth--; |
| 2774 | if (depth == 0) |
| 2775 | return position; |
| 2776 | } |
| 2777 | const Sci::Position positionBeforeMove = position; |
| 2778 | position = NextPosition(position, direction); |
| 2779 | if (position == positionBeforeMove) |
| 2780 | break; |
| 2781 | } |
| 2782 | return - 1; |
| 2783 | } |
| 2784 | |
| 2785 | /** |
| 2786 | * Implementation of RegexSearchBase for the default built-in regular expression engine |
| 2787 | */ |
| 2788 | class BuiltinRegex : public RegexSearchBase { |
| 2789 | public: |
| 2790 | explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {} |
| 2791 | |
| 2792 | Sci::Position FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s, |
| 2793 | bool caseSensitive, bool word, bool wordStart, FindOption flags, |
| 2794 | Sci::Position *length) override; |
| 2795 | |
| 2796 | const char *SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) override; |
| 2797 | |
| 2798 | private: |
| 2799 | RESearch search; |
| 2800 | std::string substituted; |
| 2801 | }; |
| 2802 | |
| 2803 | namespace { |
| 2804 | |
| 2805 | /** |
| 2806 | * RESearchRange keeps track of search range. |
| 2807 | */ |
| 2808 | class RESearchRange { |
| 2809 | public: |
| 2810 | const Document *doc; |
| 2811 | int increment; |
| 2812 | Sci::Position startPos; |
| 2813 | Sci::Position endPos; |
| 2814 | Sci::Line lineRangeStart; |
| 2815 | Sci::Line lineRangeEnd; |
| 2816 | Sci::Line lineRangeBreak; |
| 2817 | RESearchRange(const Document *doc_, Sci::Position minPos, Sci::Position maxPos) noexcept : doc(doc_) { |
| 2818 | increment = (minPos <= maxPos) ? 1 : -1; |
| 2819 | |
| 2820 | // Range endpoints should not be inside DBCS characters or between a CR and LF, |
| 2821 | // but just in case, move them. |
| 2822 | startPos = doc->MovePositionOutsideChar(minPos, 1, true); |
| 2823 | endPos = doc->MovePositionOutsideChar(maxPos, 1, true); |
| 2824 | |
| 2825 | lineRangeStart = doc->SciLineFromPosition(startPos); |
| 2826 | lineRangeEnd = doc->SciLineFromPosition(endPos); |
| 2827 | lineRangeBreak = lineRangeEnd + increment; |
| 2828 | } |
| 2829 | Range LineRange(Sci::Line line) const { |
| 2830 | Range range(doc->LineStart(line), doc->LineEnd(line)); |
| 2831 | if (increment == 1) { |
| 2832 | if (line == lineRangeStart) |
| 2833 | range.start = startPos; |
| 2834 | if (line == lineRangeEnd) |
| 2835 | range.end = endPos; |
| 2836 | } else { |
| 2837 | if (line == lineRangeEnd) |
| 2838 | range.start = endPos; |
| 2839 | if (line == lineRangeStart) |
| 2840 | range.end = startPos; |
| 2841 | } |
| 2842 | return range; |
| 2843 | } |
| 2844 | }; |
| 2845 | |
| 2846 | // Define a way for the Regular Expression code to access the document |
| 2847 | class DocumentIndexer : public CharacterIndexer { |
| 2848 | Document *pdoc; |
| 2849 | Sci::Position end; |
| 2850 | public: |
| 2851 | DocumentIndexer(Document *pdoc_, Sci::Position end_) noexcept : |
| 2852 | pdoc(pdoc_), end(end_) { |
| 2853 | } |
| 2854 | |
| 2855 | DocumentIndexer(const DocumentIndexer &) = delete; |
| 2856 | DocumentIndexer(DocumentIndexer &&) = delete; |
| 2857 | DocumentIndexer &operator=(const DocumentIndexer &) = delete; |
| 2858 | DocumentIndexer &operator=(DocumentIndexer &&) = delete; |
| 2859 | |
| 2860 | ~DocumentIndexer() override = default; |
| 2861 | |
| 2862 | char CharAt(Sci::Position index) const noexcept override { |
| 2863 | if (index < 0 || index >= end) |
| 2864 | return 0; |
| 2865 | else |
| 2866 | return pdoc->CharAt(index); |
| 2867 | } |
| 2868 | }; |
| 2869 | |
| 2870 | #ifndef NO_CXX11_REGEX |
| 2871 | |
| 2872 | class ByteIterator { |
| 2873 | public: |
| 2874 | using iterator_category = std::bidirectional_iterator_tag; |
| 2875 | using value_type = char; |
| 2876 | using difference_type = ptrdiff_t; |
| 2877 | using pointer = char*; |
| 2878 | using reference = char&; |
| 2879 | |
| 2880 | const Document *doc; |
| 2881 | Sci::Position position; |
| 2882 | |
| 2883 | explicit ByteIterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept : |
| 2884 | doc(doc_), position(position_) { |
| 2885 | } |
| 2886 | char operator*() const noexcept { |
| 2887 | return doc->CharAt(position); |
| 2888 | } |
| 2889 | ByteIterator &operator++() noexcept { |
| 2890 | position++; |
| 2891 | return *this; |
| 2892 | } |
| 2893 | ByteIterator operator++(int) noexcept { |
| 2894 | ByteIterator retVal(*this); |
| 2895 | position++; |
| 2896 | return retVal; |
| 2897 | } |
| 2898 | ByteIterator &operator--() noexcept { |
| 2899 | position--; |
| 2900 | return *this; |
| 2901 | } |
| 2902 | bool operator==(const ByteIterator &other) const noexcept { |
| 2903 | return doc == other.doc && position == other.position; |
| 2904 | } |
| 2905 | bool operator!=(const ByteIterator &other) const noexcept { |
| 2906 | return doc != other.doc || position != other.position; |
| 2907 | } |
| 2908 | Sci::Position Pos() const noexcept { |
| 2909 | return position; |
| 2910 | } |
| 2911 | Sci::Position PosRoundUp() const noexcept { |
| 2912 | return position; |
| 2913 | } |
| 2914 | }; |
| 2915 | |
| 2916 | // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide. |
| 2917 | // Would be better to use sizeof(wchar_t) or similar to differentiate |
| 2918 | // but easier for now to hard-code platforms. |
| 2919 | // C++11 has char16_t and char32_t but neither Clang nor Visual C++ |
| 2920 | // appear to allow specializing basic_regex over these. |
| 2921 | |
| 2922 | #ifdef _WIN32 |
| 2923 | #define WCHAR_T_IS_16 1 |
| 2924 | #else |
| 2925 | #define WCHAR_T_IS_16 0 |
| 2926 | #endif |
| 2927 | |
| 2928 | #if WCHAR_T_IS_16 |
| 2929 | |
| 2930 | // On Windows, report non-BMP characters as 2 separate surrogates as that |
| 2931 | // matches wregex since it is based on wchar_t. |
| 2932 | class UTF8Iterator { |
| 2933 | // These 3 fields determine the iterator position and are used for comparisons |
| 2934 | const Document *doc; |
| 2935 | Sci::Position position; |
| 2936 | size_t characterIndex; |
| 2937 | // Remaining fields are derived from the determining fields so are excluded in comparisons |
| 2938 | unsigned int lenBytes; |
| 2939 | size_t lenCharacters; |
| 2940 | wchar_t buffered[2]; |
| 2941 | public: |
| 2942 | using iterator_category = std::bidirectional_iterator_tag; |
| 2943 | using value_type = wchar_t; |
| 2944 | using difference_type = ptrdiff_t; |
| 2945 | using pointer = wchar_t*; |
| 2946 | using reference = wchar_t&; |
| 2947 | |
| 2948 | explicit UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept : |
| 2949 | doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0), buffered{} { |
| 2950 | buffered[0] = 0; |
| 2951 | buffered[1] = 0; |
| 2952 | if (doc) { |
| 2953 | ReadCharacter(); |
| 2954 | } |
| 2955 | } |
| 2956 | wchar_t operator*() const noexcept { |
| 2957 | assert(lenCharacters != 0); |
| 2958 | return buffered[characterIndex]; |
| 2959 | } |
| 2960 | UTF8Iterator &operator++() noexcept { |
| 2961 | if ((characterIndex + 1) < (lenCharacters)) { |
| 2962 | characterIndex++; |
| 2963 | } else { |
| 2964 | position += lenBytes; |
| 2965 | ReadCharacter(); |
| 2966 | characterIndex = 0; |
| 2967 | } |
| 2968 | return *this; |
| 2969 | } |
| 2970 | UTF8Iterator operator++(int) noexcept { |
| 2971 | UTF8Iterator retVal(*this); |
| 2972 | if ((characterIndex + 1) < (lenCharacters)) { |
| 2973 | characterIndex++; |
| 2974 | } else { |
| 2975 | position += lenBytes; |
| 2976 | ReadCharacter(); |
| 2977 | characterIndex = 0; |
| 2978 | } |
| 2979 | return retVal; |
| 2980 | } |
| 2981 | UTF8Iterator &operator--() noexcept { |
| 2982 | if (characterIndex) { |
| 2983 | characterIndex--; |
| 2984 | } else { |
| 2985 | position = doc->NextPosition(position, -1); |
| 2986 | ReadCharacter(); |
| 2987 | characterIndex = lenCharacters - 1; |
| 2988 | } |
| 2989 | return *this; |
| 2990 | } |
| 2991 | bool operator==(const UTF8Iterator &other) const noexcept { |
| 2992 | // Only test the determining fields, not the character widths and values derived from this |
| 2993 | return doc == other.doc && |
| 2994 | position == other.position && |
| 2995 | characterIndex == other.characterIndex; |
| 2996 | } |
| 2997 | bool operator!=(const UTF8Iterator &other) const noexcept { |
| 2998 | // Only test the determining fields, not the character widths and values derived from this |
| 2999 | return doc != other.doc || |
| 3000 | position != other.position || |
| 3001 | characterIndex != other.characterIndex; |
| 3002 | } |
| 3003 | Sci::Position Pos() const noexcept { |
| 3004 | return position; |
| 3005 | } |
| 3006 | Sci::Position PosRoundUp() const noexcept { |
| 3007 | if (characterIndex) |
| 3008 | return position + lenBytes; // Force to end of character |
| 3009 | else |
| 3010 | return position; |
| 3011 | } |
| 3012 | private: |
| 3013 | void ReadCharacter() noexcept { |
| 3014 | const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position); |
| 3015 | lenBytes = charExtracted.widthBytes; |
| 3016 | if (charExtracted.character == unicodeReplacementChar) { |
| 3017 | lenCharacters = 1; |
| 3018 | buffered[0] = static_cast<wchar_t>(charExtracted.character); |
| 3019 | } else { |
| 3020 | lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered); |
| 3021 | } |
| 3022 | } |
| 3023 | }; |
| 3024 | |
| 3025 | #else |
| 3026 | |
| 3027 | // On Unix, report non-BMP characters as single characters |
| 3028 | |
| 3029 | class UTF8Iterator { |
| 3030 | const Document *doc; |
| 3031 | Sci::Position position; |
| 3032 | public: |
| 3033 | using iterator_category = std::bidirectional_iterator_tag; |
| 3034 | using value_type = wchar_t; |
| 3035 | using difference_type = ptrdiff_t; |
| 3036 | using pointer = wchar_t*; |
| 3037 | using reference = wchar_t&; |
| 3038 | |
| 3039 | explicit UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept : |
| 3040 | doc(doc_), position(position_) { |
| 3041 | } |
| 3042 | wchar_t operator*() const noexcept { |
| 3043 | const Document::CharacterExtracted = doc->ExtractCharacter(position); |
| 3044 | return charExtracted.character; |
| 3045 | } |
| 3046 | UTF8Iterator &operator++() noexcept { |
| 3047 | position = doc->NextPosition(position, 1); |
| 3048 | return *this; |
| 3049 | } |
| 3050 | UTF8Iterator operator++(int) noexcept { |
| 3051 | UTF8Iterator retVal(*this); |
| 3052 | position = doc->NextPosition(position, 1); |
| 3053 | return retVal; |
| 3054 | } |
| 3055 | UTF8Iterator &operator--() noexcept { |
| 3056 | position = doc->NextPosition(position, -1); |
| 3057 | return *this; |
| 3058 | } |
| 3059 | bool operator==(const UTF8Iterator &other) const noexcept { |
| 3060 | return doc == other.doc && position == other.position; |
| 3061 | } |
| 3062 | bool operator!=(const UTF8Iterator &other) const noexcept { |
| 3063 | return doc != other.doc || position != other.position; |
| 3064 | } |
| 3065 | Sci::Position Pos() const noexcept { |
| 3066 | return position; |
| 3067 | } |
| 3068 | Sci::Position PosRoundUp() const noexcept { |
| 3069 | return position; |
| 3070 | } |
| 3071 | }; |
| 3072 | |
| 3073 | #endif |
| 3074 | |
| 3075 | std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos) { |
| 3076 | std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default; |
| 3077 | if (!doc->IsLineStartPosition(startPos)) |
| 3078 | flagsMatch |= std::regex_constants::match_not_bol; |
| 3079 | if (!doc->IsLineEndPosition(endPos)) |
| 3080 | flagsMatch |= std::regex_constants::match_not_eol; |
| 3081 | return flagsMatch; |
| 3082 | } |
| 3083 | |
| 3084 | template<typename Iterator, typename Regex> |
| 3085 | bool MatchOnLines(const Document *doc, const Regex ®exp, const RESearchRange &resr, RESearch &search) { |
| 3086 | std::match_results<Iterator> match; |
| 3087 | |
| 3088 | // MSVC and libc++ have problems with ^ and $ matching line ends inside a range. |
| 3089 | // CRLF line ends are also a problem as ^ and $ only treat LF as a line end. |
| 3090 | // The std::regex::multiline option was added to C++17 to improve behaviour but |
| 3091 | // has not been implemented by compiler runtimes with MSVC always in multiline |
| 3092 | // mode and libc++ and libstdc++ always in single-line mode. |
| 3093 | // If multiline regex worked well then the line by line iteration could be removed |
| 3094 | // for the forwards case and replaced with the following 4 lines: |
| 3095 | #ifdef REGEX_MULTILINE |
| 3096 | Iterator itStart(doc, resr.startPos); |
| 3097 | Iterator itEnd(doc, resr.endPos); |
| 3098 | const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos); |
| 3099 | const bool matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch); |
| 3100 | #else |
| 3101 | // Line by line. |
| 3102 | bool matched = false; |
| 3103 | for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) { |
| 3104 | const Range lineRange = resr.LineRange(line); |
| 3105 | Iterator itStart(doc, lineRange.start); |
| 3106 | Iterator itEnd(doc, lineRange.end); |
| 3107 | std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end); |
| 3108 | matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch); |
| 3109 | // Check for the last match on this line. |
| 3110 | if (matched) { |
| 3111 | if (resr.increment == -1) { |
| 3112 | while (matched) { |
| 3113 | Iterator itNext(doc, match[0].second.PosRoundUp()); |
| 3114 | flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end); |
| 3115 | std::match_results<Iterator> matchNext; |
| 3116 | matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch); |
| 3117 | if (matched) { |
| 3118 | if (match[0].first == match[0].second) { |
| 3119 | // Empty match means failure so exit |
| 3120 | return false; |
| 3121 | } |
| 3122 | match = matchNext; |
| 3123 | } |
| 3124 | } |
| 3125 | matched = true; |
| 3126 | } |
| 3127 | break; |
| 3128 | } |
| 3129 | } |
| 3130 | #endif |
| 3131 | if (matched) { |
| 3132 | for (size_t co = 0; co < match.size() && co < RESearch::MAXTAG; co++) { |
| 3133 | search.bopat[co] = match[co].first.Pos(); |
| 3134 | search.eopat[co] = match[co].second.PosRoundUp(); |
| 3135 | const Sci::Position lenMatch = search.eopat[co] - search.bopat[co]; |
| 3136 | search.pat[co].resize(lenMatch); |
| 3137 | for (Sci::Position iPos = 0; iPos < lenMatch; iPos++) { |
| 3138 | search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]); |
| 3139 | } |
| 3140 | } |
| 3141 | } |
| 3142 | return matched; |
| 3143 | } |
| 3144 | |
| 3145 | Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s, |
| 3146 | bool caseSensitive, Sci::Position *length, RESearch &search) { |
| 3147 | const RESearchRange resr(doc, minPos, maxPos); |
| 3148 | try { |
| 3149 | //ElapsedPeriod ep; |
| 3150 | std::regex::flag_type flagsRe = std::regex::ECMAScript; |
| 3151 | // Flags that appear to have no effect: |
| 3152 | // | std::regex::collate | std::regex::extended; |
| 3153 | if (!caseSensitive) |
| 3154 | flagsRe = flagsRe | std::regex::icase; |
| 3155 | |
| 3156 | // Clear the RESearch so can fill in matches |
| 3157 | search.Clear(); |
| 3158 | |
| 3159 | bool matched = false; |
| 3160 | if (CpUtf8 == doc->dbcsCodePage) { |
| 3161 | const std::wstring ws = WStringFromUTF8(s); |
| 3162 | std::wregex regexp; |
| 3163 | regexp.assign(ws, flagsRe); |
| 3164 | matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search); |
| 3165 | |
| 3166 | } else { |
| 3167 | std::regex regexp; |
| 3168 | regexp.assign(s, flagsRe); |
| 3169 | matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search); |
| 3170 | } |
| 3171 | |
| 3172 | Sci::Position posMatch = -1; |
| 3173 | if (matched) { |
| 3174 | posMatch = search.bopat[0]; |
| 3175 | *length = search.eopat[0] - search.bopat[0]; |
| 3176 | } |
| 3177 | // Example - search in doc/ScintillaHistory.html for |
| 3178 | // [[:upper:]]eta[[:space:]] |
| 3179 | // On MacBook, normally around 1 second but with locale imbued -> 14 seconds. |
| 3180 | //const double durSearch = ep.Duration(true); |
| 3181 | //Platform::DebugPrintf("Search:%9.6g \n", durSearch); |
| 3182 | return posMatch; |
| 3183 | } catch (std::regex_error &) { |
| 3184 | // Failed to create regular expression |
| 3185 | throw RegexError(); |
| 3186 | } catch (...) { |
| 3187 | // Failed in some other way |
| 3188 | return -1; |
| 3189 | } |
| 3190 | } |
| 3191 | |
| 3192 | #endif |
| 3193 | |
| 3194 | } |
| 3195 | |
| 3196 | Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s, |
| 3197 | bool caseSensitive, bool, bool, FindOption flags, |
| 3198 | Sci::Position *length) { |
| 3199 | |
| 3200 | #ifndef NO_CXX11_REGEX |
| 3201 | if (FlagSet(flags, FindOption::Cxx11RegEx)) { |
| 3202 | return Cxx11RegexFindText(doc, minPos, maxPos, s, |
| 3203 | caseSensitive, length, search); |
| 3204 | } |
| 3205 | #endif |
| 3206 | |
| 3207 | const RESearchRange resr(doc, minPos, maxPos); |
| 3208 | |
| 3209 | const bool posix = FlagSet(flags, FindOption::Posix); |
| 3210 | |
| 3211 | const char *errmsg = search.Compile(s, *length, caseSensitive, posix); |
| 3212 | if (errmsg) { |
| 3213 | return -1; |
| 3214 | } |
| 3215 | // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\)) |
| 3216 | // Replace first '.' with '-' in each property file variable reference: |
| 3217 | // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\)) |
| 3218 | // Replace: $(\1-\2) |
| 3219 | Sci::Position pos = -1; |
| 3220 | Sci::Position lenRet = 0; |
| 3221 | const bool searchforLineStart = s[0] == '^'; |
| 3222 | const char searchEnd = s[*length - 1]; |
| 3223 | const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0'; |
| 3224 | const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\'); |
| 3225 | for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) { |
| 3226 | Sci::Position startOfLine = doc->LineStart(line); |
| 3227 | Sci::Position endOfLine = doc->LineEnd(line); |
| 3228 | if (resr.increment == 1) { |
| 3229 | if (line == resr.lineRangeStart) { |
| 3230 | if ((resr.startPos != startOfLine) && searchforLineStart) |
| 3231 | continue; // Can't match start of line if start position after start of line |
| 3232 | startOfLine = resr.startPos; |
| 3233 | } |
| 3234 | if (line == resr.lineRangeEnd) { |
| 3235 | if ((resr.endPos != endOfLine) && searchforLineEnd) |
| 3236 | continue; // Can't match end of line if end position before end of line |
| 3237 | endOfLine = resr.endPos; |
| 3238 | } |
| 3239 | } else { |
| 3240 | if (line == resr.lineRangeEnd) { |
| 3241 | if ((resr.endPos != startOfLine) && searchforLineStart) |
| 3242 | continue; // Can't match start of line if end position after start of line |
| 3243 | startOfLine = resr.endPos; |
| 3244 | } |
| 3245 | if (line == resr.lineRangeStart) { |
| 3246 | if ((resr.startPos != endOfLine) && searchforLineEnd) |
| 3247 | continue; // Can't match end of line if start position before end of line |
| 3248 | endOfLine = resr.startPos; |
| 3249 | } |
| 3250 | } |
| 3251 | |
| 3252 | const DocumentIndexer di(doc, endOfLine); |
| 3253 | int success = search.Execute(di, startOfLine, endOfLine); |
| 3254 | if (success) { |
| 3255 | pos = search.bopat[0]; |
| 3256 | // Ensure only whole characters selected |
| 3257 | search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false); |
| 3258 | lenRet = search.eopat[0] - search.bopat[0]; |
| 3259 | // There can be only one start of a line, so no need to look for last match in line |
| 3260 | if ((resr.increment == -1) && !searchforLineStart) { |
| 3261 | // Check for the last match on this line. |
| 3262 | int repetitions = 1000; // Break out of infinite loop |
| 3263 | while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) { |
| 3264 | success = search.Execute(di, pos+1, endOfLine); |
| 3265 | if (success) { |
| 3266 | if (search.eopat[0] <= minPos) { |
| 3267 | pos = search.bopat[0]; |
| 3268 | lenRet = search.eopat[0] - search.bopat[0]; |
| 3269 | } else { |
| 3270 | success = 0; |
| 3271 | } |
| 3272 | } |
| 3273 | } |
| 3274 | } |
| 3275 | break; |
| 3276 | } |
| 3277 | } |
| 3278 | *length = lenRet; |
| 3279 | return pos; |
| 3280 | } |
| 3281 | |
| 3282 | const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) { |
| 3283 | substituted.clear(); |
| 3284 | const DocumentIndexer di(doc, doc->Length()); |
| 3285 | search.GrabMatches(di); |
| 3286 | for (Sci::Position j = 0; j < *length; j++) { |
| 3287 | if (text[j] == '\\') { |
| 3288 | if (text[j + 1] >= '0' && text[j + 1] <= '9') { |
| 3289 | const unsigned int patNum = text[j + 1] - '0'; |
| 3290 | const Sci::Position len = search.eopat[patNum] - search.bopat[patNum]; |
| 3291 | if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur |
| 3292 | substituted.append(search.pat[patNum].c_str(), len); |
| 3293 | j++; |
| 3294 | } else { |
| 3295 | j++; |
| 3296 | switch (text[j]) { |
| 3297 | case 'a': |
| 3298 | substituted.push_back('\a'); |
| 3299 | break; |
| 3300 | case 'b': |
| 3301 | substituted.push_back('\b'); |
| 3302 | break; |
| 3303 | case 'f': |
| 3304 | substituted.push_back('\f'); |
| 3305 | break; |
| 3306 | case 'n': |
| 3307 | substituted.push_back('\n'); |
| 3308 | break; |
| 3309 | case 'r': |
| 3310 | substituted.push_back('\r'); |
| 3311 | break; |
| 3312 | case 't': |
| 3313 | substituted.push_back('\t'); |
| 3314 | break; |
| 3315 | case 'v': |
| 3316 | substituted.push_back('\v'); |
| 3317 | break; |
| 3318 | case '\\': |
| 3319 | substituted.push_back('\\'); |
| 3320 | break; |
| 3321 | default: |
| 3322 | substituted.push_back('\\'); |
| 3323 | j--; |
| 3324 | } |
| 3325 | } |
| 3326 | } else { |
| 3327 | substituted.push_back(text[j]); |
| 3328 | } |
| 3329 | } |
| 3330 | *length = substituted.length(); |
| 3331 | return substituted.c_str(); |
| 3332 | } |
| 3333 | |
| 3334 | #ifndef SCI_OWNREGEX |
| 3335 | |
| 3336 | RegexSearchBase *Scintilla::Internal::CreateRegexSearch(CharClassify *charClassTable) { |
| 3337 | return new BuiltinRegex(charClassTable); |
| 3338 | } |
| 3339 | |
| 3340 | #endif |
| 3341 | |