1 | //===--- Preamble.h - Reusing expensive parts of the AST ---------*- C++-*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // The vast majority of code in a typical translation unit is in the headers |
10 | // included at the top of the file. |
11 | // |
12 | // The preamble optimization says that we can parse this code once, and reuse |
13 | // the result multiple times. The preamble is invalidated by changes to the |
14 | // code in the preamble region, to the compile command, or to files on disk. |
15 | // |
16 | // This is the most important optimization in clangd: it allows operations like |
17 | // code-completion to have sub-second latency. It is supported by the |
18 | // PrecompiledPreamble functionality in clang, which wraps the techniques used |
19 | // by PCH files, modules etc into a convenient interface. |
20 | // |
21 | //===----------------------------------------------------------------------===// |
22 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_PREAMBLE_H |
23 | #define |
24 | |
25 | #include "CollectMacros.h" |
26 | #include "Compiler.h" |
27 | #include "Diagnostics.h" |
28 | #include "FS.h" |
29 | #include "Headers.h" |
30 | #include "clang-include-cleaner/Record.h" |
31 | #include "support/Path.h" |
32 | #include "clang/Basic/SourceManager.h" |
33 | #include "clang/Frontend/CompilerInvocation.h" |
34 | #include "clang/Frontend/PrecompiledPreamble.h" |
35 | #include "clang/Lex/Lexer.h" |
36 | #include "clang/Tooling/CompilationDatabase.h" |
37 | #include "llvm/ADT/ArrayRef.h" |
38 | #include "llvm/ADT/StringRef.h" |
39 | |
40 | #include <cstddef> |
41 | #include <functional> |
42 | #include <memory> |
43 | #include <string> |
44 | #include <utility> |
45 | #include <vector> |
46 | |
47 | namespace clang { |
48 | namespace clangd { |
49 | |
50 | /// The captured AST context. |
51 | /// Keeps necessary structs for an ASTContext and Preprocessor alive. |
52 | /// This enables consuming them after context that produced the AST is gone. |
53 | /// (e.g. indexing a preamble ast on a separate thread). ASTContext stored |
54 | /// inside is still not thread-safe. |
55 | |
56 | struct CapturedASTCtx { |
57 | public: |
58 | CapturedASTCtx(CompilerInstance &Clang) |
59 | : Invocation(Clang.getInvocationPtr()), |
60 | Diagnostics(Clang.getDiagnosticsPtr()), Target(Clang.getTargetPtr()), |
61 | AuxTarget(Clang.getAuxTarget()), FileMgr(Clang.getFileManagerPtr()), |
62 | SourceMgr(Clang.getSourceManagerPtr()), PP(Clang.getPreprocessorPtr()), |
63 | Context(Clang.getASTContextPtr()) {} |
64 | |
65 | CapturedASTCtx(const CapturedASTCtx &) = delete; |
66 | CapturedASTCtx &operator=(const CapturedASTCtx &) = delete; |
67 | CapturedASTCtx(CapturedASTCtx &&) = default; |
68 | CapturedASTCtx &operator=(CapturedASTCtx &&) = default; |
69 | |
70 | ASTContext &getASTContext() { return *Context; } |
71 | Preprocessor &getPreprocessor() { return *PP; } |
72 | CompilerInvocation &getCompilerInvocation() { return *Invocation; } |
73 | FileManager &getFileManager() { return *FileMgr; } |
74 | void setStatCache(std::shared_ptr<PreambleFileStatusCache> StatCache) { |
75 | this->StatCache = StatCache; |
76 | } |
77 | |
78 | private: |
79 | std::shared_ptr<CompilerInvocation> Invocation; |
80 | IntrusiveRefCntPtr<DiagnosticsEngine> Diagnostics; |
81 | IntrusiveRefCntPtr<TargetInfo> Target; |
82 | IntrusiveRefCntPtr<TargetInfo> AuxTarget; |
83 | IntrusiveRefCntPtr<FileManager> FileMgr; |
84 | IntrusiveRefCntPtr<SourceManager> SourceMgr; |
85 | std::shared_ptr<Preprocessor> PP; |
86 | IntrusiveRefCntPtr<ASTContext> Context; |
87 | std::shared_ptr<PreambleFileStatusCache> StatCache; |
88 | }; |
89 | |
90 | /// The parsed preamble and associated data. |
91 | /// |
92 | /// As we must avoid re-parsing the preamble, any information that can only |
93 | /// be obtained during parsing must be eagerly captured and stored here. |
94 | struct PreambleData { |
95 | PreambleData(PrecompiledPreamble Preamble) : Preamble(std::move(Preamble)) {} |
96 | |
97 | // Version of the ParseInputs this preamble was built from. |
98 | std::string Version; |
99 | tooling::CompileCommand CompileCommand; |
100 | PrecompiledPreamble Preamble; |
101 | std::vector<Diag> Diags; |
102 | // Processes like code completions and go-to-definitions will need #include |
103 | // information, and their compile action skips preamble range. |
104 | IncludeStructure Includes; |
105 | // Captures #include-mapping information in #included headers. |
106 | std::shared_ptr<const include_cleaner::PragmaIncludes> Pragmas; |
107 | // Macros defined in the preamble section of the main file. |
108 | // Users care about headers vs main-file, not preamble vs non-preamble. |
109 | // These should be treated as main-file entities e.g. for code completion. |
110 | MainFileMacros Macros; |
111 | // Pragma marks defined in the preamble section of the main file. |
112 | std::vector<PragmaMark> Marks; |
113 | // Cache of FS operations performed when building the preamble. |
114 | // When reusing a preamble, this cache can be consumed to save IO. |
115 | std::shared_ptr<PreambleFileStatusCache> StatCache; |
116 | // Whether there was a (possibly-incomplete) include-guard on the main file. |
117 | // We need to propagate this information "by hand" to subsequent parses. |
118 | bool MainIsIncludeGuarded = false; |
119 | }; |
120 | |
121 | using PreambleParsedCallback = |
122 | std::function<void(CapturedASTCtx ASTCtx, |
123 | std::shared_ptr<const include_cleaner::PragmaIncludes>)>; |
124 | |
125 | /// Timings and statistics from the premble build. Unlike PreambleData, these |
126 | /// do not need to be stored for later, but can be useful for logging, metrics, |
127 | /// etc. |
128 | struct PreambleBuildStats { |
129 | /// Total wall time it took to build preamble, in seconds. |
130 | double TotalBuildTime; |
131 | /// Time spent in filesystem operations during the build, in seconds. |
132 | double FileSystemTime; |
133 | |
134 | /// Estimate of the memory used while building the preamble. |
135 | /// This memory has been released when buildPreamble returns. |
136 | /// For example, this includes the size of the in-memory AST (ASTContext). |
137 | size_t BuildSize; |
138 | /// The serialized size of the preamble. |
139 | /// This storage is needed while the preamble is used (but may be on disk). |
140 | size_t SerializedSize; |
141 | }; |
142 | |
143 | /// Build a preamble for the new inputs unless an old one can be reused. |
144 | /// If \p PreambleCallback is set, it will be run on top of the AST while |
145 | /// building the preamble. |
146 | /// If Stats is not non-null, build statistics will be exported there. |
147 | std::shared_ptr<const PreambleData> |
148 | buildPreamble(PathRef FileName, CompilerInvocation CI, |
149 | const ParseInputs &Inputs, bool StoreInMemory, |
150 | PreambleParsedCallback PreambleCallback, |
151 | PreambleBuildStats *Stats = nullptr); |
152 | |
153 | /// Returns true if \p Preamble is reusable for \p Inputs. Note that it will |
154 | /// return true when some missing headers are now available. |
155 | /// FIXME: Should return more information about the delta between \p Preamble |
156 | /// and \p Inputs, e.g. new headers. |
157 | bool isPreambleCompatible(const PreambleData &Preamble, |
158 | const ParseInputs &Inputs, PathRef FileName, |
159 | const CompilerInvocation &CI); |
160 | |
161 | /// Stores information required to parse a TU using a (possibly stale) Baseline |
162 | /// preamble. Later on this information can be injected into the main file by |
163 | /// updating compiler invocation with \c apply. This injected section |
164 | /// approximately reflects additions to the preamble in Modified contents, e.g. |
165 | /// new include directives. |
166 | class PreamblePatch { |
167 | public: |
168 | enum class PatchType { MacroDirectives, All }; |
169 | /// \p Preamble is used verbatim. |
170 | static PreamblePatch unmodified(const PreambleData &Preamble); |
171 | /// Builds a patch that contains new PP directives introduced to the preamble |
172 | /// section of \p Modified compared to \p Baseline. |
173 | /// FIXME: This only handles include directives, we should at least handle |
174 | /// define/undef. |
175 | static PreamblePatch createFullPatch(llvm::StringRef FileName, |
176 | const ParseInputs &Modified, |
177 | const PreambleData &Baseline); |
178 | static PreamblePatch createMacroPatch(llvm::StringRef FileName, |
179 | const ParseInputs &Modified, |
180 | const PreambleData &Baseline); |
181 | /// Returns the FileEntry for the preamble patch of MainFilePath in SM, if |
182 | /// any. |
183 | static const FileEntry *getPatchEntry(llvm::StringRef MainFilePath, |
184 | const SourceManager &SM); |
185 | |
186 | /// Adjusts CI (which compiles the modified inputs) to be used with the |
187 | /// baseline preamble. This is done by inserting an artificial include to the |
188 | /// \p CI that contains new directives calculated in create. |
189 | void apply(CompilerInvocation &CI) const; |
190 | |
191 | /// Returns #include directives from the \c Modified preamble that were |
192 | /// resolved using the \c Baseline preamble. This covers the new locations of |
193 | /// inclusions that were moved around, but not inclusions of new files. Those |
194 | /// will be recorded when parsing the main file: the includes in the injected |
195 | /// section will be resolved back to their spelled positions in the main file |
196 | /// using the presumed-location mechanism. |
197 | std::vector<Inclusion> preambleIncludes() const; |
198 | |
199 | /// Returns preamble bounds for the Modified. |
200 | PreambleBounds modifiedBounds() const { return ModifiedBounds; } |
201 | |
202 | /// Returns textual patch contents. |
203 | llvm::StringRef text() const { return PatchContents; } |
204 | |
205 | /// Returns diag locations for Modified contents. |
206 | llvm::ArrayRef<Diag> patchedDiags() const { return PatchedDiags; } |
207 | |
208 | static constexpr llvm::StringLiteral = "__preamble_patch__.h" ; |
209 | |
210 | llvm::ArrayRef<PragmaMark> marks() const; |
211 | const MainFileMacros &mainFileMacros() const; |
212 | |
213 | private: |
214 | static PreamblePatch create(llvm::StringRef FileName, |
215 | const ParseInputs &Modified, |
216 | const PreambleData &Baseline, |
217 | PatchType PatchType); |
218 | |
219 | PreamblePatch() = default; |
220 | std::string PatchContents; |
221 | std::string PatchFileName; |
222 | // Includes that are present in both Baseline and Modified. Used for |
223 | // patching includes of baseline preamble. |
224 | std::vector<Inclusion> PreambleIncludes; |
225 | // Diags that were attached to a line preserved in Modified contents. |
226 | std::vector<Diag> PatchedDiags; |
227 | PreambleBounds ModifiedBounds = {0, false}; |
228 | const PreambleData *Baseline = nullptr; |
229 | std::vector<PragmaMark> PatchedMarks; |
230 | MainFileMacros PatchedMacros; |
231 | }; |
232 | |
233 | } // namespace clangd |
234 | } // namespace clang |
235 | |
236 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_PREAMBLE_H |
237 | |