1//===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "IncludeCleaner.h"
10#include "Diagnostics.h"
11#include "Headers.h"
12#include "ParsedAST.h"
13#include "Preamble.h"
14#include "Protocol.h"
15#include "SourceCode.h"
16#include "clang-include-cleaner/Analysis.h"
17#include "clang-include-cleaner/IncludeSpeller.h"
18#include "clang-include-cleaner/Record.h"
19#include "clang-include-cleaner/Types.h"
20#include "support/Logger.h"
21#include "support/Path.h"
22#include "support/Trace.h"
23#include "clang/AST/ASTContext.h"
24#include "clang/Basic/Diagnostic.h"
25#include "clang/Basic/LLVM.h"
26#include "clang/Basic/SourceLocation.h"
27#include "clang/Basic/SourceManager.h"
28#include "clang/Format/Format.h"
29#include "clang/Lex/HeaderSearch.h"
30#include "clang/Lex/Preprocessor.h"
31#include "clang/Tooling/Core/Replacement.h"
32#include "clang/Tooling/Inclusions/HeaderIncludes.h"
33#include "clang/Tooling/Inclusions/StandardLibrary.h"
34#include "clang/Tooling/Syntax/Tokens.h"
35#include "llvm/ADT/ArrayRef.h"
36#include "llvm/ADT/DenseSet.h"
37#include "llvm/ADT/GenericUniformityImpl.h"
38#include "llvm/ADT/STLExtras.h"
39#include "llvm/ADT/SmallString.h"
40#include "llvm/ADT/SmallVector.h"
41#include "llvm/ADT/StringRef.h"
42#include "llvm/Support/Error.h"
43#include "llvm/Support/ErrorHandling.h"
44#include "llvm/Support/FormatVariadic.h"
45#include "llvm/Support/Path.h"
46#include "llvm/Support/Regex.h"
47#include <cassert>
48#include <iterator>
49#include <map>
50#include <memory>
51#include <optional>
52#include <string>
53#include <utility>
54#include <vector>
55
56namespace clang::clangd {
57namespace {
58
59bool isIgnored(llvm::StringRef HeaderPath, HeaderFilter IgnoreHeaders) {
60 // Convert the path to Unix slashes and try to match against the filter.
61 llvm::SmallString<64> NormalizedPath(HeaderPath);
62 llvm::sys::path::native(NormalizedPath, llvm::sys::path::Style::posix);
63 for (auto &Filter : IgnoreHeaders) {
64 if (Filter(NormalizedPath))
65 return true;
66 }
67 return false;
68}
69
70bool mayConsiderUnused(
71 const Inclusion &Inc, ParsedAST &AST,
72 const include_cleaner::PragmaIncludes *PI) {
73 if (PI && PI->shouldKeep(Inc.HashLine + 1))
74 return false;
75 // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
76 // System headers are likely to be standard library headers.
77 // Until we have good support for umbrella headers, don't warn about them.
78 if (Inc.Written.front() == '<')
79 return tooling::stdlib::Header::named(Inc.Written).has_value();
80 assert(Inc.HeaderID);
81 auto HID = static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID);
82 auto FE = AST.getSourceManager().getFileManager().getFileRef(
83 AST.getIncludeStructure().getRealPath(HID));
84 assert(FE);
85 if (PI) {
86 // Check if main file is the public interface for a private header. If so we
87 // shouldn't diagnose it as unused.
88 if (auto PHeader = PI->getPublic(*FE); !PHeader.empty()) {
89 PHeader = PHeader.trim("<>\"");
90 // Since most private -> public mappings happen in a verbatim way, we
91 // check textually here. This might go wrong in presence of symlinks or
92 // header mappings. But that's not different than rest of the places.
93 if (AST.tuPath().endswith(PHeader))
94 return false;
95 }
96 }
97 // Headers without include guards have side effects and are not
98 // self-contained, skip them.
99 if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
100 &FE->getFileEntry())) {
101 dlog("{0} doesn't have header guard and will not be considered unused",
102 FE->getName());
103 return false;
104 }
105 return true;
106}
107
108std::vector<Diag> generateMissingIncludeDiagnostics(
109 ParsedAST &AST, llvm::ArrayRef<MissingIncludeDiagInfo> MissingIncludes,
110 llvm::StringRef Code, HeaderFilter IgnoreHeaders) {
111 std::vector<Diag> Result;
112 const SourceManager &SM = AST.getSourceManager();
113 const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
114
115 auto FileStyle = format::getStyle(
116 format::DefaultFormatStyle, AST.tuPath(), format::DefaultFallbackStyle,
117 Code, &SM.getFileManager().getVirtualFileSystem());
118 if (!FileStyle) {
119 elog("Couldn't infer style", FileStyle.takeError());
120 FileStyle = format::getLLVMStyle();
121 }
122
123 tooling::HeaderIncludes HeaderIncludes(AST.tuPath(), Code,
124 FileStyle->IncludeStyle);
125 for (const auto &SymbolWithMissingInclude : MissingIncludes) {
126 llvm::StringRef ResolvedPath =
127 SymbolWithMissingInclude.Providers.front().resolvedPath();
128 if (isIgnored(ResolvedPath, IgnoreHeaders)) {
129 dlog("IncludeCleaner: not diagnosing missing include {0}, filtered by "
130 "config",
131 ResolvedPath);
132 continue;
133 }
134
135 std::string Spelling = include_cleaner::spellHeader(
136 {SymbolWithMissingInclude.Providers.front(),
137 AST.getPreprocessor().getHeaderSearchInfo(), MainFile});
138
139 llvm::StringRef HeaderRef{Spelling};
140 bool Angled = HeaderRef.starts_with("<");
141 // We might suggest insertion of an existing include in edge cases, e.g.,
142 // include is present in a PP-disabled region, or spelling of the header
143 // turns out to be the same as one of the unresolved includes in the
144 // main file.
145 std::optional<tooling::Replacement> Replacement = HeaderIncludes.insert(
146 HeaderRef.trim("\"<>"), Angled, tooling::IncludeDirective::Include);
147 if (!Replacement.has_value())
148 continue;
149
150 Diag &D = Result.emplace_back();
151 D.Message =
152 llvm::formatv("No header providing \"{0}\" is directly included",
153 SymbolWithMissingInclude.Symbol.name());
154 D.Name = "missing-includes";
155 D.Source = Diag::DiagSource::Clangd;
156 D.File = AST.tuPath();
157 D.InsideMainFile = true;
158 // We avoid the "warning" severity here in favor of LSP's "information".
159 //
160 // Users treat most warnings on code being edited as high-priority.
161 // They don't think of include cleanups the same way: they want to edit
162 // lines with existing violations without fixing them.
163 // Diagnostics at the same level tend to be visually indistinguishable,
164 // and a few missing includes can cause many diagnostics.
165 // Marking these as "information" leaves them visible, but less intrusive.
166 //
167 // (These concerns don't apply to unused #include warnings: these are fewer,
168 // they appear on infrequently-edited lines with few other warnings, and
169 // the 'Unneccesary' tag often result in a different rendering)
170 //
171 // Usually clang's "note" severity usually has special semantics, being
172 // translated into LSP RelatedInformation of a parent diagnostic.
173 // But not here: these aren't processed by clangd's DiagnosticConsumer.
174 D.Severity = DiagnosticsEngine::Note;
175 D.Range = clangd::Range{
176 offsetToPosition(Code,
177 SymbolWithMissingInclude.SymRefRange.beginOffset()),
178 offsetToPosition(Code,
179 SymbolWithMissingInclude.SymRefRange.endOffset())};
180 auto &F = D.Fixes.emplace_back();
181 F.Message = "#include " + Spelling;
182 TextEdit Edit = replacementToEdit(Code, *Replacement);
183 F.Edits.emplace_back(std::move(Edit));
184 }
185 return Result;
186}
187
188std::vector<Diag> generateUnusedIncludeDiagnostics(
189 PathRef FileName, llvm::ArrayRef<const Inclusion *> UnusedIncludes,
190 llvm::StringRef Code, HeaderFilter IgnoreHeaders) {
191 std::vector<Diag> Result;
192 for (const auto *Inc : UnusedIncludes) {
193 if (isIgnored(Inc->Resolved, IgnoreHeaders))
194 continue;
195 Diag &D = Result.emplace_back();
196 D.Message =
197 llvm::formatv("included header {0} is not used directly",
198 llvm::sys::path::filename(
199 Inc->Written.substr(1, Inc->Written.size() - 2),
200 llvm::sys::path::Style::posix));
201 D.Name = "unused-includes";
202 D.Source = Diag::DiagSource::Clangd;
203 D.File = FileName;
204 D.InsideMainFile = true;
205 D.Severity = DiagnosticsEngine::Warning;
206 D.Tags.push_back(Unnecessary);
207 D.Range = rangeTillEOL(Code, Inc->HashOffset);
208 // FIXME(kirillbobyrev): Removing inclusion might break the code if the
209 // used headers are only reachable transitively through this one. Suggest
210 // including them directly instead.
211 // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
212 // (keep/export) remove the warning once we support IWYU pragmas.
213 auto &F = D.Fixes.emplace_back();
214 F.Message = "remove #include directive";
215 F.Edits.emplace_back();
216 F.Edits.back().range.start.line = Inc->HashLine;
217 F.Edits.back().range.end.line = Inc->HashLine + 1;
218 }
219 return Result;
220}
221
222std::optional<Fix>
223removeAllUnusedIncludes(llvm::ArrayRef<Diag> UnusedIncludes) {
224 if (UnusedIncludes.empty())
225 return std::nullopt;
226
227 Fix RemoveAll;
228 RemoveAll.Message = "remove all unused includes";
229 for (const auto &Diag : UnusedIncludes) {
230 assert(Diag.Fixes.size() == 1 && "Expected exactly one fix.");
231 RemoveAll.Edits.insert(RemoveAll.Edits.end(),
232 Diag.Fixes.front().Edits.begin(),
233 Diag.Fixes.front().Edits.end());
234 }
235
236 // TODO(hokein): emit a suitable text for the label.
237 ChangeAnnotation Annotation = {/*label=*/"",
238 /*needsConfirmation=*/true,
239 /*description=*/""};
240 static const ChangeAnnotationIdentifier RemoveAllUnusedID =
241 "RemoveAllUnusedIncludes";
242 for (unsigned I = 0; I < RemoveAll.Edits.size(); ++I) {
243 ChangeAnnotationIdentifier ID = RemoveAllUnusedID + std::to_string(I);
244 RemoveAll.Edits[I].annotationId = ID;
245 RemoveAll.Annotations.push_back({ID, Annotation});
246 }
247 return RemoveAll;
248}
249
250std::optional<Fix>
251addAllMissingIncludes(llvm::ArrayRef<Diag> MissingIncludeDiags) {
252 if (MissingIncludeDiags.empty())
253 return std::nullopt;
254
255 Fix AddAllMissing;
256 AddAllMissing.Message = "add all missing includes";
257 // A map to deduplicate the edits with the same new text.
258 // newText (#include "my_missing_header.h") -> TextEdit.
259 std::map<std::string, TextEdit> Edits;
260 for (const auto &Diag : MissingIncludeDiags) {
261 assert(Diag.Fixes.size() == 1 && "Expected exactly one fix.");
262 for (const auto &Edit : Diag.Fixes.front().Edits) {
263 Edits.try_emplace(Edit.newText, Edit);
264 }
265 }
266 // FIXME(hokein): emit used symbol reference in the annotation.
267 ChangeAnnotation Annotation = {/*label=*/"",
268 /*needsConfirmation=*/true,
269 /*description=*/""};
270 static const ChangeAnnotationIdentifier AddAllMissingID =
271 "AddAllMissingIncludes";
272 unsigned I = 0;
273 for (auto &It : Edits) {
274 ChangeAnnotationIdentifier ID = AddAllMissingID + std::to_string(I++);
275 AddAllMissing.Edits.push_back(std::move(It.second));
276 AddAllMissing.Edits.back().annotationId = ID;
277
278 AddAllMissing.Annotations.push_back({ID, Annotation});
279 }
280 return AddAllMissing;
281}
282Fix fixAll(const Fix &RemoveAllUnused, const Fix &AddAllMissing) {
283 Fix FixAll;
284 FixAll.Message = "fix all includes";
285
286 for (const auto &F : RemoveAllUnused.Edits)
287 FixAll.Edits.push_back(F);
288 for (const auto &F : AddAllMissing.Edits)
289 FixAll.Edits.push_back(F);
290
291 for (const auto &A : RemoveAllUnused.Annotations)
292 FixAll.Annotations.push_back(A);
293 for (const auto &A : AddAllMissing.Annotations)
294 FixAll.Annotations.push_back(A);
295 return FixAll;
296}
297
298std::vector<const Inclusion *>
299getUnused(ParsedAST &AST,
300 const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles) {
301 trace::Span Tracer("IncludeCleaner::getUnused");
302 std::vector<const Inclusion *> Unused;
303 for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
304 if (!MFI.HeaderID)
305 continue;
306 auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
307 if (ReferencedFiles.contains(IncludeID))
308 continue;
309 if (!mayConsiderUnused(MFI, AST, AST.getPragmaIncludes().get())) {
310 dlog("{0} was not used, but is not eligible to be diagnosed as unused",
311 MFI.Written);
312 continue;
313 }
314 Unused.push_back(&MFI);
315 }
316 return Unused;
317}
318
319} // namespace
320
321std::vector<include_cleaner::SymbolReference>
322collectMacroReferences(ParsedAST &AST) {
323 const auto &SM = AST.getSourceManager();
324 auto &PP = AST.getPreprocessor();
325 std::vector<include_cleaner::SymbolReference> Macros;
326 for (const auto &[_, Refs] : AST.getMacros().MacroRefs) {
327 for (const auto &Ref : Refs) {
328 auto Loc = SM.getComposedLoc(SM.getMainFileID(), Ref.StartOffset);
329 const auto *Tok = AST.getTokens().spelledTokenAt(Loc);
330 if (!Tok)
331 continue;
332 auto Macro = locateMacroAt(*Tok, PP);
333 if (!Macro)
334 continue;
335 auto DefLoc = Macro->NameLoc;
336 if (!DefLoc.isValid())
337 continue;
338 Macros.push_back(
339 {include_cleaner::Macro{/*Name=*/PP.getIdentifierInfo(Tok->text(SM)),
340 DefLoc},
341 Tok->location(),
342 Ref.InConditionalDirective ? include_cleaner::RefType::Ambiguous
343 : include_cleaner::RefType::Explicit});
344 }
345 }
346
347 return Macros;
348}
349
350include_cleaner::Includes
351convertIncludes(const SourceManager &SM,
352 const llvm::ArrayRef<Inclusion> Includes) {
353 include_cleaner::Includes ConvertedIncludes;
354 for (const Inclusion &Inc : Includes) {
355 include_cleaner::Include TransformedInc;
356 llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written);
357 TransformedInc.Spelled = WrittenRef.trim("\"<>");
358 TransformedInc.HashLocation =
359 SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset);
360 TransformedInc.Line = Inc.HashLine + 1;
361 TransformedInc.Angled = WrittenRef.starts_with("<");
362 auto FE = SM.getFileManager().getFile(Inc.Resolved);
363 if (!FE) {
364 elog("IncludeCleaner: Failed to get an entry for resolved path {0}: {1}",
365 Inc.Resolved, FE.getError().message());
366 continue;
367 }
368 TransformedInc.Resolved = *FE;
369 ConvertedIncludes.add(std::move(TransformedInc));
370 }
371 return ConvertedIncludes;
372}
373
374IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST) {
375 // Interaction is only polished for C/CPP.
376 if (AST.getLangOpts().ObjC)
377 return {};
378 const auto &SM = AST.getSourceManager();
379 const auto &Includes = AST.getIncludeStructure();
380 include_cleaner::Includes ConvertedIncludes =
381 convertIncludes(SM, Includes.MainFileIncludes);
382 const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
383 auto *PreamblePatch = PreamblePatch::getPatchEntry(AST.tuPath(), SM);
384
385 std::vector<include_cleaner::SymbolReference> Macros =
386 collectMacroReferences(AST);
387 std::vector<MissingIncludeDiagInfo> MissingIncludes;
388 llvm::DenseSet<IncludeStructure::HeaderID> Used;
389 trace::Span Tracer("include_cleaner::walkUsed");
390 include_cleaner::walkUsed(
391 AST.getLocalTopLevelDecls(), /*MacroRefs=*/Macros,
392 AST.getPragmaIncludes().get(), SM,
393 [&](const include_cleaner::SymbolReference &Ref,
394 llvm::ArrayRef<include_cleaner::Header> Providers) {
395 bool Satisfied = false;
396 for (const auto &H : Providers) {
397 if (H.kind() == include_cleaner::Header::Physical &&
398 (H.physical() == MainFile || H.physical() == PreamblePatch)) {
399 Satisfied = true;
400 continue;
401 }
402 for (auto *Inc : ConvertedIncludes.match(H)) {
403 Satisfied = true;
404 auto HeaderID = Includes.getID(Inc->Resolved);
405 assert(HeaderID.has_value() &&
406 "ConvertedIncludes only contains resolved includes.");
407 Used.insert(*HeaderID);
408 }
409 }
410
411 if (Satisfied || Providers.empty() ||
412 Ref.RT != include_cleaner::RefType::Explicit)
413 return;
414
415 // We actually always want to map usages to their spellings, but
416 // spelling locations can point into preamble section. Using these
417 // offsets could lead into crashes in presence of stale preambles. Hence
418 // we use "getFileLoc" instead to make sure it always points into main
419 // file.
420 // FIXME: Use presumed locations to map such usages back to patched
421 // locations safely.
422 auto Loc = SM.getFileLoc(Ref.RefLocation);
423 // File locations can be outside of the main file if macro is expanded
424 // through an #include.
425 while (SM.getFileID(Loc) != SM.getMainFileID())
426 Loc = SM.getIncludeLoc(SM.getFileID(Loc));
427 auto TouchingTokens =
428 syntax::spelledTokensTouching(Loc, AST.getTokens());
429 assert(!TouchingTokens.empty());
430 // Loc points to the start offset of the ref token, here we use the last
431 // element of the TouchingTokens, e.g. avoid getting the "::" for
432 // "ns::^abc".
433 MissingIncludeDiagInfo DiagInfo{
434 Ref.Target, TouchingTokens.back().range(SM), Providers};
435 MissingIncludes.push_back(std::move(DiagInfo));
436 });
437 // Put possibly equal diagnostics together for deduplication.
438 // The duplicates might be from macro arguments that get expanded multiple
439 // times.
440 llvm::stable_sort(MissingIncludes, [](const MissingIncludeDiagInfo &LHS,
441 const MissingIncludeDiagInfo &RHS) {
442 // First sort by reference location.
443 if (LHS.SymRefRange != RHS.SymRefRange) {
444 // We can get away just by comparing the offsets as all the ranges are in
445 // main file.
446 return LHS.SymRefRange.beginOffset() < RHS.SymRefRange.beginOffset();
447 }
448 // For the same location, break ties using the symbol. Note that this won't
449 // be stable across runs.
450 using MapInfo = llvm::DenseMapInfo<include_cleaner::Symbol>;
451 return MapInfo::getHashValue(LHS.Symbol) <
452 MapInfo::getHashValue(RHS.Symbol);
453 });
454 MissingIncludes.erase(llvm::unique(MissingIncludes), MissingIncludes.end());
455 std::vector<const Inclusion *> UnusedIncludes = getUnused(AST, Used);
456 return {std::move(UnusedIncludes), std::move(MissingIncludes)};
457}
458
459std::vector<Diag>
460issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code,
461 const IncludeCleanerFindings &Findings,
462 HeaderFilter IgnoreHeaders) {
463 trace::Span Tracer("IncludeCleaner::issueIncludeCleanerDiagnostics");
464 std::vector<Diag> UnusedIncludes = generateUnusedIncludeDiagnostics(
465 AST.tuPath(), Findings.UnusedIncludes, Code, IgnoreHeaders);
466 std::optional<Fix> RemoveAllUnused = removeAllUnusedIncludes(UnusedIncludes);
467
468 std::vector<Diag> MissingIncludeDiags = generateMissingIncludeDiagnostics(
469 AST, Findings.MissingIncludes, Code, IgnoreHeaders);
470 std::optional<Fix> AddAllMissing = addAllMissingIncludes(MissingIncludeDiags);
471
472 std::optional<Fix> FixAll;
473 if (RemoveAllUnused && AddAllMissing)
474 FixAll = fixAll(*RemoveAllUnused, *AddAllMissing);
475
476 auto AddBatchFix = [](const std::optional<Fix> &F, clang::clangd::Diag *Out) {
477 if (!F)
478 return;
479 Out->Fixes.push_back(*F);
480 };
481 for (auto &Diag : MissingIncludeDiags) {
482 AddBatchFix(MissingIncludeDiags.size() > 1 ? AddAllMissing : std::nullopt,
483 &Diag);
484 AddBatchFix(FixAll, &Diag);
485 }
486 for (auto &Diag : UnusedIncludes) {
487 AddBatchFix(UnusedIncludes.size() > 1 ? RemoveAllUnused : std::nullopt,
488 &Diag);
489 AddBatchFix(FixAll, &Diag);
490 }
491
492 auto Result = std::move(MissingIncludeDiags);
493 llvm::move(UnusedIncludes, std::back_inserter(Result));
494 return Result;
495}
496
497std::optional<include_cleaner::Header>
498firstMatchedProvider(const include_cleaner::Includes &Includes,
499 llvm::ArrayRef<include_cleaner::Header> Providers) {
500 for (const auto &H : Providers) {
501 if (!Includes.match(H).empty())
502 return H;
503 }
504 // No match for this provider in the includes list.
505 return std::nullopt;
506}
507} // namespace clang::clangd
508