SourceCode.cpp source code [llvm/clang-tools-extra/clangd/SourceCode.cpp]

1	//===--- SourceCode.h - Manipulating source code as strings ------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	#include "SourceCode.h"
9
10	#include "FuzzyMatch.h"
11	#include "Preamble.h"
12	#include "Protocol.h"
13	#include "support/Context.h"
14	#include "support/Logger.h"
15	#include "clang/Basic/FileEntry.h"
16	#include "clang/Basic/LangOptions.h"
17	#include "clang/Basic/SourceLocation.h"
18	#include "clang/Basic/SourceManager.h"
19	#include "clang/Basic/TokenKinds.h"
20	#include "clang/Driver/Types.h"
21	#include "clang/Format/Format.h"
22	#include "clang/Lex/Lexer.h"
23	#include "clang/Lex/Preprocessor.h"
24	#include "clang/Lex/Token.h"
25	#include "clang/Tooling/Core/Replacement.h"
26	#include "clang/Tooling/Syntax/Tokens.h"
27	#include "llvm/ADT/ArrayRef.h"
28	#include "llvm/ADT/BitVector.h"
29	#include "llvm/ADT/STLExtras.h"
30	#include "llvm/ADT/StringExtras.h"
31	#include "llvm/ADT/StringMap.h"
32	#include "llvm/ADT/StringRef.h"
33	#include "llvm/Support/Compiler.h"
34	#include "llvm/Support/Errc.h"
35	#include "llvm/Support/Error.h"
36	#include "llvm/Support/ErrorHandling.h"
37	#include "llvm/Support/LineIterator.h"
38	#include "llvm/Support/MemoryBuffer.h"
39	#include "llvm/Support/Path.h"
40	#include "llvm/Support/VirtualFileSystem.h"
41	#include "llvm/Support/xxhash.h"
42	#include <algorithm>
43	#include <cstddef>
44	#include <optional>
45	#include <string>
46	#include <vector>
47
48	namespace clang {
49	namespace clangd {
50
51	// Here be dragons. LSP positions use columns measured in UTF-16 code units!
52	// Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial.
53
54	// Iterates over unicode codepoints in the (UTF-8) string. For each,
55	// invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true.
56	// Returns true if CB returned true, false if we hit the end of string.
57	//
58	// If the string is not valid UTF-8, we log this error and "decode" the
59	// text in some arbitrary way. This is pretty sad, but this tends to happen deep
60	// within indexing of headers where clang misdetected the encoding, and
61	// propagating the error all the way back up is (probably?) not be worth it.
62	template <typename Callback>
63	static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) {
64	bool LoggedInvalid = false;
65	// A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
66	// Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
67	for (size_t I = `0`; I < U8.size();) {
68	unsigned char C = static_cast<unsigned char>(U8 [I]);
69	if (LLVM_LIKELY(!(C & `0x80`))) { // ASCII character.
70	if (CB(`1`, `1`))
71	return true;
72	++I;
73	continue;
74	}
75	// This convenient property of UTF-8 holds for all non-ASCII characters.
76	size_t UTF8Length = llvm::countl_one(C);
77	// 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here.
78	// 11111xxx is not valid UTF-8 at all, maybe some ISO-8859-.*
79	if (LLVM_UNLIKELY(UTF8Length < `2` \|\| UTF8Length > `4`)) {
80	if (!LoggedInvalid) {
81	elog("File has invalid UTF-8 near offset {0}: {1}", I, llvm::toHex(U8));
82	LoggedInvalid = true;
83	}
84	// We can't give a correct result, but avoid returning something wild.
85	// Pretend this is a valid ASCII byte, for lack of better options.
86	// (Too late to get ISO-8859- right, we've skipped some bytes already).*
87	if (CB(`1`, `1`))
88	return true;
89	++I;
90	continue;
91	}
92	I += UTF8Length; // Skip over all trailing bytes.
93	// A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
94	// Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...)
95	if (CB(UTF8Length, UTF8Length == `4` ? `2` : `1`))
96	return true;
97	}
98	return false;
99	}
100
101	// Returns the byte offset into the string that is an offset of \p Units in
102	// the specified encoding.
103	// Conceptually, this converts to the encoding, truncates to CodeUnits,
104	// converts back to UTF-8, and returns the length in bytes.
105	static size_t measureUnits(llvm::StringRef U8, int Units, OffsetEncoding Enc,
106	bool &Valid) {
107	Valid = Units >= `0`;
108	if (Units <= `0`)
109	return `0`;
110	size_t Result = `0`;
111	switch (Enc) {
112	case OffsetEncoding::UTF8:
113	Result = Units;
114	break;
115	case OffsetEncoding::UTF16:
116	Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
117	Result += U8Len;
118	Units -= U16Len;
119	return Units <= `0`;
120	});
121	if (Units < `0`) // Offset in the middle of a surrogate pair.
122	Valid = false;
123	break;
124	case OffsetEncoding::UTF32:
125	Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
126	Result += U8Len;
127	Units--;
128	return Units <= `0`;
129	});
130	break;
131	case OffsetEncoding::UnsupportedEncoding:
132	llvm_unreachable("unsupported encoding");
133	}
134	// Don't return an out-of-range index if we overran.
135	if (Result > U8.size()) {
136	Valid = false;
137	return U8.size();
138	}
139	return Result;
140	}
141
142	Key<OffsetEncoding> kCurrentOffsetEncoding;
143	static OffsetEncoding lspEncoding() {
144	auto *Enc = Context::current().get(kCurrentOffsetEncoding);
145	return Enc ? *Enc : OffsetEncoding::UTF16;
146	}
147
148	// Like most strings in clangd, the input is UTF-8 encoded.
149	size_t lspLength(llvm::StringRef Code) {
150	size_t Count = `0`;
151	switch (lspEncoding()) {
152	case OffsetEncoding::UTF8:
153	Count = Code.size();
154	break;
155	case OffsetEncoding::UTF16:
156	iterateCodepoints(Code, [&](int U8Len, int U16Len) {
157	Count += U16Len;
158	return false;
159	});
160	break;
161	case OffsetEncoding::UTF32:
162	iterateCodepoints(Code, [&](int U8Len, int U16Len) {
163	++Count;
164	return false;
165	});
166	break;
167	case OffsetEncoding::UnsupportedEncoding:
168	llvm_unreachable("unsupported encoding");
169	}
170	return Count;
171	}
172
173	llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P,
174	bool AllowColumnsBeyondLineLength) {
175	if (P.line < `0`)
176	return error(llvm::errc::invalid_argument,
177	"Line value can't be negative ({0})", P.line);
178	if (P.character < `0`)
179	return error(llvm::errc::invalid_argument,
180	"Character value can't be negative ({0})", P.character);
181	size_t StartOfLine = `0`;
182	for (int I = `0`; I != P.line; ++I) {
183	size_t NextNL = Code.find(`'\n'`, StartOfLine);
184	if (NextNL == llvm::StringRef::npos)
185	return error(llvm::errc::invalid_argument,
186	"Line value is out of range ({0})", P.line);
187	StartOfLine = NextNL + `1`;
188	}
189	StringRef Line =
190	Code.substr(StartOfLine).take_until([](char C) { return C == `'\n'`; });
191
192	// P.character may be in UTF-16, transcode if necessary.
193	bool Valid;
194	size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid);
195	if (!Valid && !AllowColumnsBeyondLineLength)
196	return error(llvm::errc::invalid_argument,
197	"{0} offset {1} is invalid for line {2}", lspEncoding(),
198	P.character, P.line);
199	return StartOfLine + ByteInLine;
200	}
201
202	Position offsetToPosition(llvm::StringRef Code, size_t Offset) {
203	Offset = std::min(Code.size(), Offset);
204	llvm::StringRef Before = Code.substr(`0`, Offset);
205	int Lines = Before.count(`'\n'`);
206	size_t PrevNL = Before.rfind(`'\n'`);
207	size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? `0` : (PrevNL + `1`);
208	Position Pos;
209	Pos.line = Lines;
210	Pos.character = lspLength(Before.substr(StartOfLine));
211	return Pos;
212	}
213
214	Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc) {
215	// We use the SourceManager's line tables, but its column number is in bytes.
216	FileID FID;
217	unsigned Offset;
218	std::tie(FID, Offset) = SM.getDecomposedSpellingLoc(Loc);
219	Position P;
220	P.line = static_cast<int>(SM.getLineNumber(FID, Offset)) - `1`;
221	bool Invalid = false;
222	llvm::StringRef Code = SM.getBufferData(FID, &Invalid);
223	if (!Invalid) {
224	auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - `1`;
225	auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes);
226	P.character = lspLength(LineSoFar);
227	}
228	return P;
229	}
230
231	bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM) {
232	if (Loc.isFileID())
233	return true;
234	auto Spelling = SM.getDecomposedSpellingLoc(Loc);
235	StringRef SpellingFile = SM.getSLocEntry(Spelling.first).getFile().getName();
236	if (SpellingFile == "<scratch space>")
237	return false;
238	if (SpellingFile == "<built-in>")
239	// __STDC__ etc are considered spelled, but BAR in arg -DFOO=BAR is not.
240	return !SM.isWrittenInCommandLineFile(
241	SM.getComposedLoc(Spelling.first, Spelling.second));
242	return true;
243	}
244
245	bool isValidFileRange(const SourceManager &Mgr, SourceRange R) {
246	if (!R.getBegin().isValid() \|\| !R.getEnd().isValid())
247	return false;
248
249	FileID BeginFID;
250	size_t BeginOffset = `0`;
251	std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin());
252
253	FileID EndFID;
254	size_t EndOffset = `0`;
255	std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd());
256
257	return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset;
258	}
259
260	SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM) {
261	assert(SM.getLocForEndOfFile(IncludedFile).isFileID());
262	FileID IncludingFile;
263	unsigned Offset;
264	std::tie(IncludingFile, Offset) =
265	SM.getDecomposedExpansionLoc(SM.getIncludeLoc(IncludedFile));
266	bool Invalid = false;
267	llvm::StringRef Buf = SM.getBufferData(IncludingFile, &Invalid);
268	if (Invalid)
269	return SourceLocation ();
270	// Now buf is "...\n#include <foo>\n..."
271	// and Offset points here: ^
272	// Rewind to the preceding # on the line.
273	assert(Offset < Buf.size());
274	for (;; --Offset) {
275	if (Buf [Offset] == `'#'`)
276	return SM.getComposedLoc(IncludingFile, Offset);
277	if (Buf [Offset] == `'\n'` \|\| Offset == `0`) // no hash, what's going on?
278	return SourceLocation ();
279	}
280	}
281
282	static unsigned getTokenLengthAtLoc(SourceLocation Loc, const SourceManager &SM,
283	const LangOptions &LangOpts) {
284	Token TheTok;
285	if (Lexer::getRawToken(Loc, TheTok, SM, LangOpts))
286	return `0`;
287	// FIXME: Here we check whether the token at the location is a greatergreater
288	// (>>) token and consider it as a single greater (>). This is to get it
289	// working for templates but it isn't correct for the right shift operator. We
290	// can avoid this by using half open char ranges in getFileRange() but getting
291	// token ending is not well supported in macroIDs.
292	if (TheTok.is(tok::greatergreater))
293	return `1`;
294	return TheTok.getLength();
295	}
296
297	// Returns location of the last character of the token at a given loc
298	static SourceLocation getLocForTokenEnd(SourceLocation BeginLoc,
299	const SourceManager &SM,
300	const LangOptions &LangOpts) {
301	unsigned Len = getTokenLengthAtLoc(BeginLoc, SM, LangOpts);
302	return BeginLoc.getLocWithOffset(Len ? Len - `1` : `0`);
303	}
304
305	// Returns location of the starting of the token at a given EndLoc
306	static SourceLocation getLocForTokenBegin(SourceLocation EndLoc,
307	const SourceManager &SM,
308	const LangOptions &LangOpts) {
309	return EndLoc.getLocWithOffset(
310	-(signed)getTokenLengthAtLoc(EndLoc, SM, LangOpts));
311	}
312
313	// Converts a char source range to a token range.
314	static SourceRange toTokenRange(CharSourceRange Range, const SourceManager &SM,
315	const LangOptions &LangOpts) {
316	if (!Range.isTokenRange())
317	Range.setEnd(getLocForTokenBegin(Range.getEnd(), SM, LangOpts));
318	return Range.getAsRange();
319	}
320	// Returns the union of two token ranges.
321	// To find the maximum of the Ends of the ranges, we compare the location of the
322	// last character of the token.
323	static SourceRange unionTokenRange(SourceRange R1, SourceRange R2,
324	const SourceManager &SM,
325	const LangOptions &LangOpts) {
326	SourceLocation Begin =
327	SM.isBeforeInTranslationUnit(R1.getBegin(), R2.getBegin())
328	? R1.getBegin()
329	: R2.getBegin();
330	SourceLocation End =
331	SM.isBeforeInTranslationUnit(getLocForTokenEnd(R1.getEnd(), SM, LangOpts),
332	getLocForTokenEnd(R2.getEnd(), SM, LangOpts))
333	? R2.getEnd()
334	: R1.getEnd();
335	return SourceRange (Begin, End);
336	}
337
338	// Given a range whose endpoints may be in different expansions or files,
339	// tries to find a range within a common file by following up the expansion and
340	// include location in each.
341	static SourceRange rangeInCommonFile(SourceRange R, const SourceManager &SM,
342	const LangOptions &LangOpts) {
343	// Fast path for most common cases.
344	if (SM.isWrittenInSameFile(R.getBegin(), R.getEnd()))
345	return R;
346	// Record the stack of expansion locations for the beginning, keyed by FileID.
347	llvm::DenseMap<FileID, SourceLocation> BeginExpansions;
348	for (SourceLocation Begin = R.getBegin(); Begin.isValid();
349	Begin = Begin.isFileID()
350	? includeHashLoc(SM.getFileID(Begin), SM)
351	: SM.getImmediateExpansionRange(Begin).getBegin()) {
352	BeginExpansions [SM.getFileID(Begin)] = Begin;
353	}
354	// Move up the stack of expansion locations for the end until we find the
355	// location in BeginExpansions with that has the same file id.
356	for (SourceLocation End = R.getEnd(); End.isValid();
357	End = End.isFileID() ? includeHashLoc(SM.getFileID(End), SM)
358	: toTokenRange(SM.getImmediateExpansionRange(End),
359	SM, LangOpts)
360	.getEnd()) {
361	auto It = BeginExpansions.find(SM.getFileID(End));
362	if (It != BeginExpansions.end()) {
363	if (SM.getFileOffset(It ->second) > SM.getFileOffset(End))
364	return SourceLocation ();
365	return {It ->second, End};
366	}
367	}
368	return SourceRange ();
369	}
370
371	// Find an expansion range (not necessarily immediate) the ends of which are in
372	// the same file id.
373	static SourceRange
374	getExpansionTokenRangeInSameFile(SourceLocation Loc, const SourceManager &SM,
375	const LangOptions &LangOpts) {
376	return rangeInCommonFile(
377	toTokenRange(SM.getImmediateExpansionRange(Loc), SM, LangOpts), SM,
378	LangOpts);
379	}
380
381	// Returns the file range for a given Location as a Token Range
382	// This is quite similar to getFileLoc in SourceManager as both use
383	// getImmediateExpansionRange and getImmediateSpellingLoc (for macro IDs).
384	// However:
385	// - We want to maintain the full range information as we move from one file to
386	// the next. getFileLoc only uses the BeginLoc of getImmediateExpansionRange.
387	// - We want to split '>>' tokens as the lexer parses the '>>' in nested
388	// template instantiations as a '>>' instead of two '>'s.
389	// There is also getExpansionRange but it simply calls
390	// getImmediateExpansionRange on the begin and ends separately which is wrong.
391	static SourceRange getTokenFileRange(SourceLocation Loc,
392	const SourceManager &SM,
393	const LangOptions &LangOpts) {
394	SourceRange FileRange = Loc;
395	while (!FileRange.getBegin().isFileID()) {
396	if (SM.isMacroArgExpansion(FileRange.getBegin())) {
397	FileRange = unionTokenRange(
398	SM.getImmediateSpellingLoc(FileRange.getBegin()),
399	SM.getImmediateSpellingLoc(FileRange.getEnd()), SM, LangOpts);
400	assert(SM.isWrittenInSameFile(FileRange.getBegin(), FileRange.getEnd()));
401	} else {
402	SourceRange ExpansionRangeForBegin =
403	getExpansionTokenRangeInSameFile(FileRange.getBegin(), SM, LangOpts);
404	SourceRange ExpansionRangeForEnd =
405	getExpansionTokenRangeInSameFile(FileRange.getEnd(), SM, LangOpts);
406	if (ExpansionRangeForBegin.isInvalid() \|\|
407	ExpansionRangeForEnd.isInvalid())
408	return SourceRange ();
409	assert(SM.isWrittenInSameFile(ExpansionRangeForBegin.getBegin(),
410	ExpansionRangeForEnd.getBegin()) &&
411	"Both Expansion ranges should be in same file.");
412	FileRange = unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd,
413	SM, LangOpts);
414	}
415	}
416	return FileRange;
417	}
418
419	bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM) {
420	if (!Loc.isValid())
421	return false;
422	FileID FID = SM.getFileID(SM.getExpansionLoc(Loc));
423	return FID == SM.getMainFileID() \|\| FID == SM.getPreambleFileID();
424	}
425
426	std::optional<SourceRange> toHalfOpenFileRange(const SourceManager &SM,
427	const LangOptions &LangOpts,
428	SourceRange R) {
429	SourceRange R1 = getTokenFileRange(R.getBegin(), SM, LangOpts);
430	if (!isValidFileRange(SM, R1))
431	return std::nullopt;
432
433	SourceRange R2 = getTokenFileRange(R.getEnd(), SM, LangOpts);
434	if (!isValidFileRange(SM, R2))
435	return std::nullopt;
436
437	SourceRange Result =
438	rangeInCommonFile(unionTokenRange(R1, R2, SM, LangOpts), SM, LangOpts);
439	unsigned TokLen = getTokenLengthAtLoc(Result.getEnd(), SM, LangOpts);
440	// Convert from closed token range to half-open (char) range
441	Result.setEnd(Result.getEnd().getLocWithOffset(TokLen));
442	if (!isValidFileRange(SM, Result))
443	return std::nullopt;
444
445	return Result;
446	}
447
448	llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) {
449	assert(isValidFileRange(SM, R));
450	auto Buf = SM.getBufferOrNone(SM.getFileID(R.getBegin()));
451	assert(Buf);
452
453	size_t BeginOffset = SM.getFileOffset(R.getBegin());
454	size_t EndOffset = SM.getFileOffset(R.getEnd());
455	return Buf ->getBuffer().substr(BeginOffset, EndOffset - BeginOffset);
456	}
457
458	llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
459	Position P) {
460	llvm::StringRef Code = SM.getBufferOrFake(SM.getMainFileID()).getBuffer();
461	auto Offset =
462	positionToOffset(Code, P, /AllowColumnsBeyondLineLength=/false);
463	if (!Offset)
464	return Offset.takeError();
465	return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset);
466	}
467
468	Range halfOpenToRange(const SourceManager &SM, CharSourceRange R) {
469	// Clang is 1-based, LSP uses 0-based indexes.
470	Position Begin = sourceLocToPosition(SM, R.getBegin());
471	Position End = sourceLocToPosition(SM, R.getEnd());
472
473	return {Begin, End};
474	}
475
476	void unionRanges(Range &A, Range B) {
477	if (B.start < A.start)
478	A.start = B.start;
479	if (A.end < B.end)
480	A.end = B.end;
481	}
482
483	std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code,
484	size_t Offset) {
485	Offset = std::min(Code.size(), Offset);
486	llvm::StringRef Before = Code.substr(`0`, Offset);
487	int Lines = Before.count(`'\n'`);
488	size_t PrevNL = Before.rfind(`'\n'`);
489	size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? `0` : (PrevNL + `1`);
490	return {Lines + `1`, Offset - StartOfLine + `1`};
491	}
492
493	std::pair<StringRef, StringRef> splitQualifiedName(StringRef QName) {
494	size_t Pos = QName.rfind("::");
495	if (Pos == llvm::StringRef::npos)
496	return {llvm::StringRef (), QName};
497	return {QName.substr(`0`, Pos + `2`), QName.substr(Pos + `2`)};
498	}
499
500	TextEdit replacementToEdit(llvm::StringRef Code,
501	const tooling::Replacement &R) {
502	Range ReplacementRange = {
503	offsetToPosition(Code, R.getOffset()),
504	offsetToPosition(Code, R.getOffset() + R.getLength())};
505	return {ReplacementRange, std::string (R.getReplacementText())};
506	}
507
508	std::vector<TextEdit> replacementsToEdits(llvm::StringRef Code,
509	const tooling::Replacements &Repls) {
510	std::vector<TextEdit> Edits;
511	for (const auto &R : Repls)
512	Edits.push_back(replacementToEdit(Code, R));
513	return Edits;
514	}
515
516	std::optional<std::string> getCanonicalPath(const FileEntryRef F,
517	FileManager &FileMgr) {
518	llvm::SmallString<`128`> FilePath = F.getName();
519	if (!llvm::sys::path::is_absolute(FilePath)) {
520	if (auto EC =
521	FileMgr.getVirtualFileSystem().makeAbsolute(
522	FilePath)) {
523	elog("Could not turn relative path '{0}' to absolute: {1}", FilePath,
524	EC.message());
525	return std::nullopt;
526	}
527	}
528
529	// Handle the symbolic link path case where the current working directory
530	// (getCurrentWorkingDirectory) is a symlink. We always want to the real
531	// file path (instead of the symlink path) for the C++ symbols.
532	//
533	// Consider the following example:
534	//
535	// src dir: /project/src/foo.h
536	// current working directory (symlink): /tmp/build -> /project/src/
537	//
538	// The file path of Symbol is "/project/src/foo.h" instead of
539	// "/tmp/build/foo.h"
540	if (auto Dir = FileMgr.getOptionalDirectoryRef(
541	llvm::sys::path::parent_path(FilePath))) {
542	llvm::SmallString<`128`> RealPath;
543	llvm::StringRef DirName = FileMgr.getCanonicalName(*Dir);
544	llvm::sys::path::append(RealPath, DirName,
545	llvm::sys::path::filename(FilePath));
546	return RealPath.str().str();
547	}
548
549	return FilePath.str().str();
550	}
551
552	TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
553	const LangOptions &L) {
554	TextEdit Result;
555	Result.range =
556	halfOpenToRange(M, Lexer::makeFileCharRange(FixIt.RemoveRange, M, L));
557	Result.newText = FixIt.CodeToInsert;
558	return Result;
559	}
560
561	FileDigest digest(llvm::StringRef Content) {
562	uint64_t Hash{llvm::xxHash64(Content)};
563	FileDigest Result;
564	for (unsigned I = `0`; I < Result.size(); ++I) {
565	Result [I] = uint8_t(Hash);
566	Hash >>= `8`;
567	}
568	return Result;
569	}
570
571	std::optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
572	bool Invalid = false;
573	llvm::StringRef Content = SM.getBufferData(FID, &Invalid);
574	if (Invalid)
575	return std::nullopt;
576	return digest(Content);
577	}
578
579	format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
580	llvm::StringRef Content,
581	const ThreadsafeFS &TFS) {
582	auto Style = format::getStyle(format::DefaultFormatStyle, File,
583	format::DefaultFallbackStyle, Content,
584	TFS.view(/CWD=/std::nullopt).get());
585	if (!Style) {
586	log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File,
587	Style.takeError());
588	return format::getLLVMStyle();
589	}
590	return *Style;
591	}
592
593	llvm::Expected<tooling::Replacements>
594	cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
595	const format::FormatStyle &Style) {
596	auto CleanReplaces = cleanupAroundReplacements(Code, Replaces, Style);
597	if (!CleanReplaces)
598	return CleanReplaces;
599	return formatReplacements(Code, std::move(*CleanReplaces), Style);
600	}
601
602	static void
603	lex(llvm::StringRef Code, const LangOptions &LangOpts,
604	llvm::function_ref<void(const syntax::Token &, const SourceManager &SM)>
605	Action) {
606	// FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
607	std::string NullTerminatedCode = Code.str();
608	SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode);
609	auto &SM = FileSM.get();
610	for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts))
611	Action (Tok, SM);
612	}
613
614	llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
615	const format::FormatStyle &Style) {
616	llvm::StringMap<unsigned> Identifiers;
617	auto LangOpt = format::getFormattingLangOpts(Style);
618	lex(Content, LangOpt, [&](const syntax::Token &Tok, const SourceManager &SM) {
619	if (Tok.kind() == tok::identifier)
620	++Identifiers [Tok.text(SM)];
621	// FIXME: Should this function really return keywords too ?
622	else if (const auto *Keyword = tok::getKeywordSpelling(Tok.kind()))
623	++Identifiers [Keyword];
624	});
625	return Identifiers;
626	}
627
628	std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier,
629	llvm::StringRef Content,
630	const LangOptions &LangOpts) {
631	std::vector<Range> Ranges;
632	lex(Content, LangOpts,
633	[&](const syntax::Token &Tok, const SourceManager &SM) {
634	if (Tok.kind() != tok::identifier \|\| Tok.text(SM) != Identifier)
635	return;
636	Ranges.push_back(halfOpenToRange(SM, Tok.range(SM).toCharRange(SM)));
637	});
638	return Ranges;
639	}
640
641	bool isKeyword(llvm::StringRef NewName, const LangOptions &LangOpts) {
642	// Keywords are initialized in constructor.
643	clang::IdentifierTable KeywordsTable(LangOpts);
644	return KeywordsTable.find(NewName) != KeywordsTable.end();
645	}
646
647	namespace {
648	struct NamespaceEvent {
649	enum {
650	BeginNamespace, // namespace <ns> {. Payload is resolved <ns>.
651	EndNamespace, // } // namespace <ns>. Payload is resolved outer
652	// namespace.
653	UsingDirective // using namespace <ns>. Payload is unresolved <ns>.
654	} Trigger;
655	std::string Payload;
656	Position Pos;
657	};
658	// Scans C++ source code for constructs that change the visible namespaces.
659	void parseNamespaceEvents(llvm::StringRef Code, const LangOptions &LangOpts,
660	llvm::function_ref<void(NamespaceEvent)> Callback) {
661
662	// Stack of enclosing namespaces, e.g. {"clang", "clangd"}
663	std::vector<std::string> Enclosing; // Contains e.g. "clang", "clangd"
664	// Stack counts open braces. true if the brace opened a namespace.
665	llvm::BitVector BraceStack;
666
667	enum {
668	Default,
669	Namespace, // just saw 'namespace'
670	NamespaceName, // just saw 'namespace' NSName
671	Using, // just saw 'using'
672	UsingNamespace, // just saw 'using namespace'
673	UsingNamespaceName, // just saw 'using namespace' NSName
674	} State = Default;
675	std::string NSName;
676
677	NamespaceEvent Event;
678	lex(Code, LangOpts, [&](const syntax::Token &Tok, const SourceManager &SM) {
679	Event.Pos = sourceLocToPosition(SM, Tok.location());
680	switch (Tok.kind()) {
681	case tok::kw_using:
682	State = State == Default ? Using : Default;
683	break;
684	case tok::kw_namespace:
685	switch (State) {
686	case Using:
687	State = UsingNamespace;
688	break;
689	case Default:
690	State = Namespace;
691	break;
692	default:
693	State = Default;
694	break;
695	}
696	break;
697	case tok::identifier:
698	switch (State) {
699	case UsingNamespace:
700	NSName.clear();
701	[[fallthrough]];
702	case UsingNamespaceName:
703	NSName.append(Tok.text(SM).str());
704	State = UsingNamespaceName;
705	break;
706	case Namespace:
707	NSName.clear();
708	[[fallthrough]];
709	case NamespaceName:
710	NSName.append(Tok.text(SM).str());
711	State = NamespaceName;
712	break;
713	case Using:
714	case Default:
715	State = Default;
716	break;
717	}
718	break;
719	case tok::coloncolon:
720	// This can come at the beginning or in the middle of a namespace
721	// name.
722	switch (State) {
723	case UsingNamespace:
724	NSName.clear();
725	[[fallthrough]];
726	case UsingNamespaceName:
727	NSName.append("::");
728	State = UsingNamespaceName;
729	break;
730	case NamespaceName:
731	NSName.append("::");
732	State = NamespaceName;
733	break;
734	case Namespace: // Not legal here.
735	case Using:
736	case Default:
737	State = Default;
738	break;
739	}
740	break;
741	case tok::l_brace:
742	// Record which { started a namespace, so we know when } ends one.
743	if (State == NamespaceName) {
744	// Parsed: namespace <name> {
745	BraceStack.push_back(true);
746	Enclosing.push_back(NSName);
747	Event.Trigger = NamespaceEvent::BeginNamespace;
748	Event.Payload = llvm::join(Enclosing, "::");
749	Callback (Event);
750	} else {
751	// This case includes anonymous namespaces (State = Namespace).
752	// For our purposes, they're not namespaces and we ignore them.
753	BraceStack.push_back(false);
754	}
755	State = Default;
756	break;
757	case tok::r_brace:
758	// If braces are unmatched, we're going to be confused, but don't
759	// crash.
760	if (!BraceStack.empty()) {
761	if (BraceStack.back()) {
762	// Parsed: } // namespace
763	Enclosing.pop_back();
764	Event.Trigger = NamespaceEvent::EndNamespace;
765	Event.Payload = llvm::join(Enclosing, "::");
766	Callback (Event);
767	}
768	BraceStack.pop_back();
769	}
770	break;
771	case tok::semi:
772	if (State == UsingNamespaceName) {
773	// Parsed: using namespace <name> ;
774	Event.Trigger = NamespaceEvent::UsingDirective;
775	Event.Payload = std::move(NSName);
776	Callback (Event);
777	}
778	State = Default;
779	break;
780	default:
781	State = Default;
782	break;
783	}
784	});
785	}
786
787	// Returns the prefix namespaces of NS: {"" ... NS}.
788	llvm::SmallVector<llvm::StringRef> ancestorNamespaces(llvm::StringRef NS) {
789	llvm::SmallVector<llvm::StringRef> Results;
790	Results.push_back(NS.take_front(`0`));
791	NS.split(Results, "::", /MaxSplit=/-`1`, /KeepEmpty=/false);
792	for (llvm::StringRef &R : Results)
793	R = NS.take_front(R.end() - NS.begin());
794	return Results;
795	}
796
797	// Checks whether \p FileName is a valid spelling of main file.
798	bool isMainFile(llvm::StringRef FileName, const SourceManager &SM) {
799	auto FE = SM.getFileManager().getFile(FileName);
800	return FE && *FE == SM.getFileEntryForID(SM.getMainFileID());
801	}
802
803	} // namespace
804
805	std::vector<std::string> visibleNamespaces(llvm::StringRef Code,
806	const LangOptions &LangOpts) {
807	std::string Current;
808	// Map from namespace to (resolved) namespaces introduced via using directive.
809	llvm::StringMap<llvm::StringSet<>> UsingDirectives;
810
811	parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) {
812	llvm::StringRef NS = Event.Payload;
813	switch (Event.Trigger) {
814	case NamespaceEvent::BeginNamespace:
815	case NamespaceEvent::EndNamespace:
816	Current = std::move(Event.Payload);
817	break;
818	case NamespaceEvent::UsingDirective:
819	if (NS.consume_front("::"))
820	UsingDirectives [Current].insert(NS);
821	else {
822	for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) {
823	if (Enclosing.empty())
824	UsingDirectives [Current].insert(NS);
825	else
826	UsingDirectives [Current].insert((Enclosing + "::" + NS).str());
827	}
828	}
829	break;
830	}
831	});
832
833	std::vector<std::string> Found;
834	for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) {
835	Found.push_back(std::string (Enclosing));
836	auto It = UsingDirectives.find(Enclosing);
837	if (It != UsingDirectives.end())
838	for (const auto &Used : It ->second)
839	Found.push_back(std::string (Used.getKey()));
840	}
841
842	llvm::sort(Found, [&](const std::string &LHS, const std::string &RHS) {
843	if (Current == RHS)
844	return false;
845	if (Current == LHS)
846	return true;
847	return LHS < RHS;
848	});
849	Found.erase(std::unique(Found.begin(), Found.end()), Found.end());
850	return Found;
851	}
852
853	llvm::StringSet<> collectWords(llvm::StringRef Content) {
854	// We assume short words are not significant.
855	// We may want to consider other stopwords, e.g. language keywords.
856	// (A very naive implementation showed no benefit, but lexing might do better)
857	static constexpr int MinWordLength = `4`;
858
859	std::vector<CharRole> Roles(Content.size());
860	calculateRoles(Content, Roles);
861
862	llvm::StringSet<> Result;
863	llvm::SmallString<`256`> Word;
864	auto Flush = [&] {
865	if (Word.size() >= MinWordLength) {
866	for (char &C : Word)
867	C = llvm::toLower(C);
868	Result.insert(Word);
869	}
870	Word.clear();
871	};
872	for (unsigned I = `0`; I < Content.size(); ++I) {
873	switch (Roles [I]) {
874	case Head:
875	Flush ();
876	[[fallthrough]];
877	case Tail:
878	Word.push_back(Content [I]);
879	break;
880	case Unknown:
881	case Separator:
882	Flush ();
883	break;
884	}
885	}
886	Flush ();
887
888	return Result;
889	}
890
891	static bool isLikelyIdentifier(llvm::StringRef Word, llvm::StringRef Before,
892	llvm::StringRef After) {
893	// `foo` is an identifier.
894	if (Before.endswith("`") && After.startswith("`"))
895	return true;
896	// In foo::bar, both foo and bar are identifiers.
897	if (Before.endswith("::") \|\| After.startswith("::"))
898	return true;
899	// Doxygen tags like \c foo indicate identifiers.
900	// Don't search too far back.
901	// This duplicates clang's doxygen parser, revisit if it gets complicated.
902	Before = Before.take_back(`100`); // Don't search too far back.
903	auto Pos = Before.find_last_of("\\@");
904	if (Pos != llvm::StringRef::npos) {
905	llvm::StringRef Tag = Before.substr(Pos + `1`).rtrim(`' '`);
906	if (Tag == "p" \|\| Tag == "c" \|\| Tag == "class" \|\| Tag == "tparam" \|\|
907	Tag == "param" \|\| Tag == "param[in]" \|\| Tag == "param[out]" \|\|
908	Tag == "param[in,out]" \|\| Tag == "retval" \|\| Tag == "throw" \|\|
909	Tag == "throws" \|\| Tag == "link")
910	return true;
911	}
912
913	// Word contains underscore.
914	// This handles things like snake_case and MACRO_CASE.
915	if (Word.contains(`'_'`)) {
916	return true;
917	}
918	// Word contains capital letter other than at beginning.
919	// This handles things like lowerCamel and UpperCamel.
920	// The check for also containing a lowercase letter is to rule out
921	// initialisms like "HTTP".
922	bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos;
923	bool HasUpper = Word.substr(`1`).find_if(clang::isUppercase) != StringRef::npos;
924	if (HasLower && HasUpper) {
925	return true;
926	}
927	// FIXME: consider mid-sentence Capitalization?
928	return false;
929	}
930
931	std::optional<SpelledWord> SpelledWord::touching(SourceLocation SpelledLoc,
932	const syntax::TokenBuffer &TB,
933	const LangOptions &LangOpts) {
934	const auto &SM = TB.sourceManager();
935	auto Touching = syntax::spelledTokensTouching(SpelledLoc, TB);
936	for (const auto &T : Touching) {
937	// If the token is an identifier or a keyword, don't use any heuristics.
938	if (tok::isAnyIdentifier(T.kind()) \|\| tok::getKeywordSpelling(T.kind())) {
939	SpelledWord Result;
940	Result.Location = T.location();
941	Result.Text = T.text(SM);
942	Result.LikelyIdentifier = tok::isAnyIdentifier(T.kind());
943	Result.PartOfSpelledToken = &T;
944	Result.SpelledToken = &T;
945	auto Expanded =
946	TB.expandedTokens(SM.getMacroArgExpandedLocation(T.location()));
947	if (Expanded.size() == `1` && Expanded.front().text(SM) == Result.Text)
948	Result.ExpandedToken = &Expanded.front();
949	return Result;
950	}
951	}
952	FileID File;
953	unsigned Offset;
954	std::tie(File, Offset) = SM.getDecomposedLoc(SpelledLoc);
955	bool Invalid = false;
956	llvm::StringRef Code = SM.getBufferData(File, &Invalid);
957	if (Invalid)
958	return std::nullopt;
959	unsigned B = Offset, E = Offset;
960	while (B > `0` && isAsciiIdentifierContinue(Code [B - `1`]))
961	--B;
962	while (E < Code.size() && isAsciiIdentifierContinue(Code [E]))
963	++E;
964	if (B == E)
965	return std::nullopt;
966
967	SpelledWord Result;
968	Result.Location = SM.getComposedLoc(File, B);
969	Result.Text = Code.slice(B, E);
970	Result.LikelyIdentifier =
971	isLikelyIdentifier(Result.Text, Code.substr(`0`, B), Code.substr(E)) &&
972	// should not be a keyword
973	tok::isAnyIdentifier(
974	IdentifierTable (LangOpts).get(Result.Text).getTokenID());
975	for (const auto &T : Touching)
976	if (T.location() <= Result.Location)
977	Result.PartOfSpelledToken = &T;
978	return Result;
979	}
980
981	std::optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok,
982	Preprocessor &PP) {
983	if (SpelledTok.kind() != tok::identifier)
984	return std::nullopt;
985	SourceLocation Loc = SpelledTok.location();
986	assert(Loc.isFileID());
987	const auto &SM = PP.getSourceManager();
988	IdentifierInfo *IdentifierInfo = PP.getIdentifierInfo(SpelledTok.text(SM));
989	if (!IdentifierInfo \|\| !IdentifierInfo->hadMacroDefinition())
990	return std::nullopt;
991
992	// We need to take special case to handle #define and #undef.
993	// Preprocessor::getMacroDefinitionAtLoc() only considers a macro
994	// definition to be in scope after* the location of the macro name in a*
995	// #define that introduces it, and before* the location of the macro name*
996	// in an #undef that undefines it. To handle these cases, we check for
997	// the macro being in scope either just after or just before the location
998	// of the token. In getting the location before, we also take care to check
999	// for start-of-file.
1000	FileID FID = SM.getFileID(Loc);
1001	assert(Loc != SM.getLocForEndOfFile(FID));
1002	SourceLocation JustAfterToken = Loc.getLocWithOffset(`1`);
1003	auto *MacroInfo =
1004	PP.getMacroDefinitionAtLoc(IdentifierInfo, JustAfterToken).getMacroInfo();
1005	if (!MacroInfo && SM.getLocForStartOfFile(FID) != Loc) {
1006	SourceLocation JustBeforeToken = Loc.getLocWithOffset(-`1`);
1007	MacroInfo = PP.getMacroDefinitionAtLoc(IdentifierInfo, JustBeforeToken)
1008	.getMacroInfo();
1009	}
1010	if (!MacroInfo) {
1011	return std::nullopt;
1012	}
1013	return DefinedMacro{
1014	IdentifierInfo->getName(), MacroInfo,
1015	translatePreamblePatchLocation(MacroInfo->getDefinitionLoc(), SM)};
1016	}
1017
1018	llvm::Expected<std::string> Edit::apply() const {
1019	return tooling::applyAllReplacements(InitialCode, Replacements);
1020	}
1021
1022	std::vector<TextEdit> Edit::asTextEdits() const {
1023	return replacementsToEdits(InitialCode, Replacements);
1024	}
1025
1026	bool Edit::canApplyTo(llvm::StringRef Code) const {
1027	// Create line iterators, since line numbers are important while applying our
1028	// edit we cannot skip blank lines.
1029	auto LHS = llvm::MemoryBuffer::getMemBuffer(Code);
1030	llvm::line_iterator LHSIt(LHS, /SkipBlanks=/*false);
1031
1032	auto RHS = llvm::MemoryBuffer::getMemBuffer(InitialCode);
1033	llvm::line_iterator RHSIt(RHS, /SkipBlanks=/*false);
1034
1035	// Compare the InitialCode we prepared the edit for with the Code we received
1036	// line by line to make sure there are no differences.
1037	// FIXME: This check is too conservative now, it should be enough to only
1038	// check lines around the replacements contained inside the Edit.
1039	while (!LHSIt.is_at_eof() && !RHSIt.is_at_eof()) {
1040	if (LHSIt != RHSIt)
1041	return false;
1042	++LHSIt;
1043	++RHSIt;
1044	}
1045
1046	// After we reach EOF for any of the files we make sure the other one doesn't
1047	// contain any additional content except empty lines, they should not
1048	// interfere with the edit we produced.
1049	while (!LHSIt.is_at_eof()) {
1050	if (!LHSIt ->empty())
1051	return false;
1052	++LHSIt;
1053	}
1054	while (!RHSIt.is_at_eof()) {
1055	if (!RHSIt ->empty())
1056	return false;
1057	++RHSIt;
1058	}
1059	return true;
1060	}
1061
1062	llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style) {
1063	if (auto NewEdits = cleanupAndFormat(E.InitialCode, E.Replacements, Style))
1064	E.Replacements = std::move(*NewEdits);
1065	else
1066	return NewEdits.takeError();
1067	return llvm::Error::success();
1068	}
1069
1070	// Workaround for editors that have buggy handling of newlines at end of file.
1071	//
1072	// The editor is supposed to expose document contents over LSP as an exact
1073	// string, with whitespace and newlines well-defined. But internally many
1074	// editors treat text as an array of lines, and there can be ambiguity over
1075	// whether the last line ends with a newline or not.
1076	//
1077	// This confusion can lead to incorrect edits being sent. Failing to apply them
1078	// is catastrophic: we're desynced, LSP has no mechanism to get back in sync.
1079	// We apply a heuristic to avoid this state.
1080	//
1081	// If our current view of an N-line file does not* end in a newline, but the*
1082	// editor refers to the start of the next line (an impossible location), then
1083	// we silently add a newline to make this valid.
1084	// We will still validate that the rangeLength is correct, including* the*
1085	// inferred newline.
1086	//
1087	// See https://github.com/neovim/neovim/issues/17085
1088	static void inferFinalNewline(llvm::Expected<size_t> &Err,
1089	std::string &Contents, const Position &Pos) {
1090	if (Err)
1091	return;
1092	if (!Contents.empty() && Contents.back() == `'\n'`)
1093	return;
1094	if (Pos.character != `0`)
1095	return;
1096	if (Pos.line != llvm::count(Contents, `'\n'`) + `1`)
1097	return;
1098	log("Editor sent invalid change coordinates, inferring newline at EOF");
1099	Contents.push_back(`'\n'`);
1100	consumeError(Err.takeError());
1101	Err = Contents.size();
1102	}
1103
1104	llvm::Error applyChange(std::string &Contents,
1105	const TextDocumentContentChangeEvent &Change) {
1106	if (!Change.range) {
1107	Contents = Change.text;
1108	return llvm::Error::success();
1109	}
1110
1111	const Position &Start = Change.range ->start;
1112	llvm::Expected<size_t> StartIndex = positionToOffset(Contents, Start, false);
1113	inferFinalNewline(StartIndex, Contents, Start);
1114	if (!StartIndex)
1115	return StartIndex.takeError();
1116
1117	const Position &End = Change.range ->end;
1118	llvm::Expected<size_t> EndIndex = positionToOffset(Contents, End, false);
1119	inferFinalNewline(EndIndex, Contents, End);
1120	if (!EndIndex)
1121	return EndIndex.takeError();
1122
1123	if (EndIndex < StartIndex)
1124	return error(llvm::errc::invalid_argument,
1125	"Range's end position ({0}) is before start position ({1})",
1126	End, Start);
1127
1128	// Since the range length between two LSP positions is dependent on the
1129	// contents of the buffer we compute the range length between the start and
1130	// end position ourselves and compare it to the range length of the LSP
1131	// message to verify the buffers of the client and server are in sync.
1132
1133	// EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16
1134	// code units.
1135	ssize_t ComputedRangeLength =
1136	lspLength(Contents.substr(StartIndex, EndIndex - *StartIndex));
1137
1138	if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength)
1139	return error(llvm::errc::invalid_argument,
1140	"Change's rangeLength ({0}) doesn't match the "
1141	"computed range length ({1}).",
1142	*Change.rangeLength, ComputedRangeLength);
1143
1144	Contents.replace(StartIndex, EndIndex - *StartIndex, Change.text);
1145
1146	return llvm::Error::success();
1147	}
1148
1149	EligibleRegion getEligiblePoints(llvm::StringRef Code,
1150	llvm::StringRef FullyQualifiedName,
1151	const LangOptions &LangOpts) {
1152	EligibleRegion ER;
1153	// Start with global namespace.
1154	std::vector<std::string> Enclosing = {""};
1155	// FIXME: In addition to namespaces try to generate events for function
1156	// definitions as well. One might use a closing parantheses(")" followed by an
1157	// opening brace "{" to trigger the start.
1158	parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) {
1159	// Using Directives only introduces declarations to current scope, they do
1160	// not change the current namespace, so skip them.
1161	if (Event.Trigger == NamespaceEvent::UsingDirective)
1162	return;
1163	// Do not qualify the global namespace.
1164	if (!Event.Payload.empty())
1165	Event.Payload.append("::");
1166
1167	std::string CurrentNamespace;
1168	if (Event.Trigger == NamespaceEvent::BeginNamespace) {
1169	Enclosing.emplace_back(std::move(Event.Payload));
1170	CurrentNamespace = Enclosing.back();
1171	// parseNameSpaceEvents reports the beginning position of a token; we want
1172	// to insert after '{', so increment by one.
1173	++Event.Pos.character;
1174	} else {
1175	// Event.Payload points to outer namespace when exiting a scope, so use
1176	// the namespace we've last entered instead.
1177	CurrentNamespace = std::move(Enclosing.back());
1178	Enclosing.pop_back();
1179	assert(Enclosing.back() == Event.Payload);
1180	}
1181
1182	// Ignore namespaces that are not a prefix of the target.
1183	if (!FullyQualifiedName.startswith(CurrentNamespace))
1184	return;
1185
1186	// Prefer the namespace that shares the longest prefix with target.
1187	if (CurrentNamespace.size() > ER.EnclosingNamespace.size()) {
1188	ER.EligiblePoints.clear();
1189	ER.EnclosingNamespace = CurrentNamespace;
1190	}
1191	if (CurrentNamespace.size() == ER.EnclosingNamespace.size())
1192	ER.EligiblePoints.emplace_back(std::move(Event.Pos));
1193	});
1194	// If there were no shared namespaces just return EOF.
1195	if (ER.EligiblePoints.empty()) {
1196	assert(ER.EnclosingNamespace.empty());
1197	ER.EligiblePoints.emplace_back(offsetToPosition(Code, Code.size()));
1198	}
1199	return ER;
1200	}
1201
1202	bool isHeaderFile(llvm::StringRef FileName,
1203	std::optional<LangOptions> LangOpts) {
1204	// Respect the langOpts, for non-file-extension cases, e.g. standard library
1205	// files.
1206	if (LangOpts && LangOpts ->IsHeaderFile)
1207	return true;
1208	namespace types = clang::driver::types;
1209	auto Lang = types::lookupTypeForExtension(
1210	llvm::sys::path::extension(FileName).substr(`1`));
1211	return Lang != types::TY_INVALID && types::onlyPrecompileType(Lang);
1212	}
1213
1214	bool isProtoFile(SourceLocation Loc, const SourceManager &SM) {
1215	auto FileName = SM.getFilename(Loc);
1216	if (!FileName.endswith(".proto.h") && !FileName.endswith(".pb.h"))
1217	return false;
1218	auto FID = SM.getFileID(Loc);
1219	// All proto generated headers should start with this line.
1220	static const char *ProtoHeaderComment =
1221	"// Generated by the protocol buffer compiler. DO NOT EDIT!";
1222	// Double check that this is an actual protobuf header.
1223	return SM.getBufferData(FID).startswith(ProtoHeaderComment);
1224	}
1225
1226	SourceLocation translatePreamblePatchLocation(SourceLocation Loc,
1227	const SourceManager &SM) {
1228	auto DefFile = SM.getFileID(Loc);
1229	if (auto FE = SM.getFileEntryRefForID(DefFile)) {
1230	auto IncludeLoc = SM.getIncludeLoc(DefFile);
1231	// Preamble patch is included inside the builtin file.
1232	if (IncludeLoc.isValid() && SM.isWrittenInBuiltinFile(IncludeLoc) &&
1233	FE ->getName().endswith(PreamblePatch::HeaderName)) {
1234	auto Presumed = SM.getPresumedLoc(Loc);
1235	// Check that line directive is pointing at main file.
1236	if (Presumed.isValid() && Presumed.getFileID().isInvalid() &&
1237	isMainFile(Presumed.getFilename(), SM)) {
1238	Loc = SM.translateLineCol(SM.getMainFileID(), Presumed.getLine(),
1239	Presumed.getColumn());
1240	}
1241	}
1242	}
1243	return Loc;
1244	}
1245
1246	clangd::Range rangeTillEOL(llvm::StringRef Code, unsigned HashOffset) {
1247	clangd::Range Result;
1248	Result.end = Result.start = offsetToPosition(Code, HashOffset);
1249
1250	// Span the warning until the EOL or EOF.
1251	Result.end.character +=
1252	lspLength(Code.drop_front(HashOffset).take_until([](char C) {
1253	return C == `'\n'` \|\| C == `'\r'`;
1254	}));
1255	return Result;
1256	}
1257	} // namespace clangd
1258	} // namespace clang
1259

Browse the source code of llvm/clang-tools-extra/clangd/SourceCode.cpp