1 | //===--- ConfigYAML.cpp - Loading configuration fragments from YAML files -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #include "ConfigFragment.h" |
9 | #include "llvm/ADT/SmallSet.h" |
10 | #include "llvm/ADT/SmallString.h" |
11 | #include "llvm/ADT/StringRef.h" |
12 | #include "llvm/Support/MemoryBuffer.h" |
13 | #include "llvm/Support/SourceMgr.h" |
14 | #include "llvm/Support/YAMLParser.h" |
15 | #include <optional> |
16 | #include <string> |
17 | #include <system_error> |
18 | |
19 | namespace clang { |
20 | namespace clangd { |
21 | namespace config { |
22 | namespace { |
23 | using llvm::yaml::BlockScalarNode; |
24 | using llvm::yaml::MappingNode; |
25 | using llvm::yaml::Node; |
26 | using llvm::yaml::ScalarNode; |
27 | using llvm::yaml::SequenceNode; |
28 | |
29 | std::optional<llvm::StringRef> |
30 | bestGuess(llvm::StringRef Search, |
31 | llvm::ArrayRef<llvm::StringRef> AllowedValues) { |
32 | unsigned MaxEdit = (Search.size() + 1) / 3; |
33 | if (!MaxEdit) |
34 | return std::nullopt; |
35 | std::optional<llvm::StringRef> Result; |
36 | for (const auto &AllowedValue : AllowedValues) { |
37 | unsigned EditDistance = Search.edit_distance(AllowedValue, true, MaxEdit); |
38 | // We can't do better than an edit distance of 1, so just return this and |
39 | // save computing other values. |
40 | if (EditDistance == 1U) |
41 | return AllowedValue; |
42 | if (EditDistance == MaxEdit && !Result) { |
43 | Result = AllowedValue; |
44 | } else if (EditDistance < MaxEdit) { |
45 | Result = AllowedValue; |
46 | MaxEdit = EditDistance; |
47 | } |
48 | } |
49 | return Result; |
50 | } |
51 | |
52 | class Parser { |
53 | llvm::SourceMgr &SM; |
54 | bool HadError = false; |
55 | |
56 | public: |
57 | Parser(llvm::SourceMgr &SM) : SM(SM) {} |
58 | |
59 | // Tries to parse N into F, returning false if it failed and we couldn't |
60 | // meaningfully recover (YAML syntax error, or hard semantic error). |
61 | bool parse(Fragment &F, Node &N) { |
62 | DictParser Dict("Config" , this); |
63 | Dict.handle("If" , [&](Node &N) { parse(F.If, N); }); |
64 | Dict.handle("CompileFlags" , [&](Node &N) { parse(F.CompileFlags, N); }); |
65 | Dict.handle("Index" , [&](Node &N) { parse(F.Index, N); }); |
66 | Dict.handle("Style" , [&](Node &N) { parse(F.Style, N); }); |
67 | Dict.handle("Diagnostics" , [&](Node &N) { parse(F.Diagnostics, N); }); |
68 | Dict.handle("Completion" , [&](Node &N) { parse(F.Completion, N); }); |
69 | Dict.handle("Hover" , [&](Node &N) { parse(F.Hover, N); }); |
70 | Dict.handle("InlayHints" , [&](Node &N) { parse(F.InlayHints, N); }); |
71 | Dict.handle("SemanticTokens" , [&](Node &N) { parse(F.SemanticTokens, N); }); |
72 | Dict.parse(N); |
73 | return !(N.failed() || HadError); |
74 | } |
75 | |
76 | private: |
77 | void parse(Fragment::IfBlock &F, Node &N) { |
78 | DictParser Dict("If" , this); |
79 | Dict.unrecognized([&](Located<std::string>, Node &) { |
80 | F.HasUnrecognizedCondition = true; |
81 | return true; // Emit a warning for the unrecognized key. |
82 | }); |
83 | Dict.handle("PathMatch" , [&](Node &N) { |
84 | if (auto Values = scalarValues(N)) |
85 | F.PathMatch = std::move(*Values); |
86 | }); |
87 | Dict.handle("PathExclude" , [&](Node &N) { |
88 | if (auto Values = scalarValues(N)) |
89 | F.PathExclude = std::move(*Values); |
90 | }); |
91 | Dict.parse(N); |
92 | } |
93 | |
94 | void parse(Fragment::CompileFlagsBlock &F, Node &N) { |
95 | DictParser Dict("CompileFlags" , this); |
96 | Dict.handle("Compiler" , [&](Node &N) { |
97 | if (auto Value = scalarValue(N, "Compiler" )) |
98 | F.Compiler = std::move(*Value); |
99 | }); |
100 | Dict.handle("Add" , [&](Node &N) { |
101 | if (auto Values = scalarValues(N)) |
102 | F.Add = std::move(*Values); |
103 | }); |
104 | Dict.handle("Remove" , [&](Node &N) { |
105 | if (auto Values = scalarValues(N)) |
106 | F.Remove = std::move(*Values); |
107 | }); |
108 | Dict.handle("CompilationDatabase" , [&](Node &N) { |
109 | F.CompilationDatabase = scalarValue(N, "CompilationDatabase" ); |
110 | }); |
111 | Dict.parse(N); |
112 | } |
113 | |
114 | void parse(Fragment::StyleBlock &F, Node &N) { |
115 | DictParser Dict("Style" , this); |
116 | Dict.handle("FullyQualifiedNamespaces" , [&](Node &N) { |
117 | if (auto Values = scalarValues(N)) |
118 | F.FullyQualifiedNamespaces = std::move(*Values); |
119 | }); |
120 | Dict.parse(N); |
121 | } |
122 | |
123 | void parse(Fragment::DiagnosticsBlock &F, Node &N) { |
124 | DictParser Dict("Diagnostics" , this); |
125 | Dict.handle("Suppress" , [&](Node &N) { |
126 | if (auto Values = scalarValues(N)) |
127 | F.Suppress = std::move(*Values); |
128 | }); |
129 | Dict.handle("UnusedIncludes" , [&](Node &N) { |
130 | F.UnusedIncludes = scalarValue(N, "UnusedIncludes" ); |
131 | }); |
132 | Dict.handle("MissingIncludes" , [&](Node &N) { |
133 | F.MissingIncludes = scalarValue(N, "MissingIncludes" ); |
134 | }); |
135 | Dict.handle("Includes" , [&](Node &N) { parse(F.Includes, N); }); |
136 | Dict.handle("ClangTidy" , [&](Node &N) { parse(F.ClangTidy, N); }); |
137 | Dict.parse(N); |
138 | } |
139 | |
140 | void parse(Fragment::DiagnosticsBlock::ClangTidyBlock &F, Node &N) { |
141 | DictParser Dict("ClangTidy" , this); |
142 | Dict.handle("Add" , [&](Node &N) { |
143 | if (auto Values = scalarValues(N)) |
144 | F.Add = std::move(*Values); |
145 | }); |
146 | Dict.handle("Remove" , [&](Node &N) { |
147 | if (auto Values = scalarValues(N)) |
148 | F.Remove = std::move(*Values); |
149 | }); |
150 | Dict.handle("CheckOptions" , [&](Node &N) { |
151 | DictParser CheckOptDict("CheckOptions" , this); |
152 | CheckOptDict.unrecognized([&](Located<std::string> &&Key, Node &Val) { |
153 | if (auto Value = scalarValue(Val, *Key)) |
154 | F.CheckOptions.emplace_back(std::move(Key), std::move(*Value)); |
155 | return false; // Don't emit a warning |
156 | }); |
157 | CheckOptDict.parse(N); |
158 | }); |
159 | Dict.parse(N); |
160 | } |
161 | |
162 | void parse(Fragment::DiagnosticsBlock::IncludesBlock &F, Node &N) { |
163 | DictParser Dict("Includes" , this); |
164 | Dict.handle("IgnoreHeader" , [&](Node &N) { |
165 | if (auto Values = scalarValues(N)) |
166 | F.IgnoreHeader = std::move(*Values); |
167 | }); |
168 | Dict.parse(N); |
169 | } |
170 | |
171 | void parse(Fragment::IndexBlock &F, Node &N) { |
172 | DictParser Dict("Index" , this); |
173 | Dict.handle("Background" , |
174 | [&](Node &N) { F.Background = scalarValue(N, "Background" ); }); |
175 | Dict.handle("External" , [&](Node &N) { |
176 | Fragment::IndexBlock::ExternalBlock External; |
177 | // External block can either be a mapping or a scalar value. Dispatch |
178 | // accordingly. |
179 | if (N.getType() == Node::NK_Mapping) { |
180 | parse(External, N); |
181 | } else if (N.getType() == Node::NK_Scalar || |
182 | N.getType() == Node::NK_BlockScalar) { |
183 | parse(External, *scalarValue(N, "External" )); |
184 | } else { |
185 | error("External must be either a scalar or a mapping." , N); |
186 | return; |
187 | } |
188 | F.External.emplace(std::move(External)); |
189 | F.External->Range = N.getSourceRange(); |
190 | }); |
191 | Dict.handle("StandardLibrary" , [&](Node &N) { |
192 | if (auto StandardLibrary = boolValue(N, "StandardLibrary" )) |
193 | F.StandardLibrary = *StandardLibrary; |
194 | }); |
195 | Dict.parse(N); |
196 | } |
197 | |
198 | void parse(Fragment::IndexBlock::ExternalBlock &F, |
199 | Located<std::string> ExternalVal) { |
200 | if (!llvm::StringRef(*ExternalVal).equals_insensitive("none" )) { |
201 | error("Only scalar value supported for External is 'None'" , |
202 | ExternalVal.Range); |
203 | return; |
204 | } |
205 | F.IsNone = true; |
206 | F.IsNone.Range = ExternalVal.Range; |
207 | } |
208 | |
209 | void parse(Fragment::IndexBlock::ExternalBlock &F, Node &N) { |
210 | DictParser Dict("External" , this); |
211 | Dict.handle("File" , [&](Node &N) { F.File = scalarValue(N, "File" ); }); |
212 | Dict.handle("Server" , |
213 | [&](Node &N) { F.Server = scalarValue(N, "Server" ); }); |
214 | Dict.handle("MountPoint" , |
215 | [&](Node &N) { F.MountPoint = scalarValue(N, "MountPoint" ); }); |
216 | Dict.parse(N); |
217 | } |
218 | |
219 | void parse(Fragment::CompletionBlock &F, Node &N) { |
220 | DictParser Dict("Completion" , this); |
221 | Dict.handle("AllScopes" , [&](Node &N) { |
222 | if (auto AllScopes = boolValue(N, "AllScopes" )) |
223 | F.AllScopes = *AllScopes; |
224 | }); |
225 | Dict.parse(N); |
226 | } |
227 | |
228 | void parse(Fragment::HoverBlock &F, Node &N) { |
229 | DictParser Dict("Hover" , this); |
230 | Dict.handle("ShowAKA" , [&](Node &N) { |
231 | if (auto ShowAKA = boolValue(N, "ShowAKA" )) |
232 | F.ShowAKA = *ShowAKA; |
233 | }); |
234 | Dict.parse(N); |
235 | } |
236 | |
237 | void parse(Fragment::InlayHintsBlock &F, Node &N) { |
238 | DictParser Dict("InlayHints" , this); |
239 | Dict.handle("Enabled" , [&](Node &N) { |
240 | if (auto Value = boolValue(N, "Enabled" )) |
241 | F.Enabled = *Value; |
242 | }); |
243 | Dict.handle("ParameterNames" , [&](Node &N) { |
244 | if (auto Value = boolValue(N, "ParameterNames" )) |
245 | F.ParameterNames = *Value; |
246 | }); |
247 | Dict.handle("DeducedTypes" , [&](Node &N) { |
248 | if (auto Value = boolValue(N, "DeducedTypes" )) |
249 | F.DeducedTypes = *Value; |
250 | }); |
251 | Dict.handle("Designators" , [&](Node &N) { |
252 | if (auto Value = boolValue(N, "Designators" )) |
253 | F.Designators = *Value; |
254 | }); |
255 | Dict.handle("BlockEnd" , [&](Node &N) { |
256 | if (auto Value = boolValue(N, "BlockEnd" )) |
257 | F.BlockEnd = *Value; |
258 | }); |
259 | Dict.handle("TypeNameLimit" , [&](Node &N) { |
260 | if (auto Value = uint32Value(N, "TypeNameLimit" )) |
261 | F.TypeNameLimit = *Value; |
262 | }); |
263 | Dict.parse(N); |
264 | } |
265 | |
266 | void parse(Fragment::SemanticTokensBlock &F, Node &N) { |
267 | DictParser Dict("SemanticTokens" , this); |
268 | Dict.handle("DisabledKinds" , [&](Node &N) { |
269 | if (auto Values = scalarValues(N)) |
270 | F.DisabledKinds = std::move(*Values); |
271 | }); |
272 | Dict.handle("DisabledModifiers" , [&](Node &N) { |
273 | if (auto Values = scalarValues(N)) |
274 | F.DisabledModifiers = std::move(*Values); |
275 | }); |
276 | Dict.parse(N); |
277 | } |
278 | |
279 | // Helper for parsing mapping nodes (dictionaries). |
280 | // We don't use YamlIO as we want to control over unknown keys. |
281 | class DictParser { |
282 | llvm::StringRef Description; |
283 | std::vector<std::pair<llvm::StringRef, std::function<void(Node &)>>> Keys; |
284 | std::function<bool(Located<std::string>, Node &)> UnknownHandler; |
285 | Parser *Outer; |
286 | |
287 | public: |
288 | DictParser(llvm::StringRef Description, Parser *Outer) |
289 | : Description(Description), Outer(Outer) {} |
290 | |
291 | // Parse is called when Key is encountered, and passed the associated value. |
292 | // It should emit diagnostics if the value is invalid (e.g. wrong type). |
293 | // If Key is seen twice, Parse runs only once and an error is reported. |
294 | void handle(llvm::StringLiteral Key, std::function<void(Node &)> Parse) { |
295 | for (const auto &Entry : Keys) { |
296 | (void)Entry; |
297 | assert(Entry.first != Key && "duplicate key handler" ); |
298 | } |
299 | Keys.emplace_back(Key, std::move(Parse)); |
300 | } |
301 | |
302 | // Handler is called when a Key is not matched by any handle(). |
303 | // If this is unset or the Handler returns true, a warning is emitted for |
304 | // the unknown key. |
305 | void |
306 | unrecognized(std::function<bool(Located<std::string>, Node &)> Handler) { |
307 | UnknownHandler = std::move(Handler); |
308 | } |
309 | |
310 | // Process a mapping node and call handlers for each key/value pair. |
311 | void parse(Node &N) const { |
312 | if (N.getType() != Node::NK_Mapping) { |
313 | Outer->error(Description + " should be a dictionary" , N); |
314 | return; |
315 | } |
316 | llvm::SmallSet<std::string, 8> Seen; |
317 | llvm::SmallVector<Located<std::string>, 0> UnknownKeys; |
318 | // We *must* consume all items, even on error, or the parser will assert. |
319 | for (auto &KV : llvm::cast<MappingNode>(N)) { |
320 | auto *K = KV.getKey(); |
321 | if (!K) // YAMLParser emitted an error. |
322 | continue; |
323 | auto Key = Outer->scalarValue(*K, "Dictionary key" ); |
324 | if (!Key) |
325 | continue; |
326 | if (!Seen.insert(**Key).second) { |
327 | Outer->warning("Duplicate key " + **Key + " is ignored" , *K); |
328 | if (auto *Value = KV.getValue()) |
329 | Value->skip(); |
330 | continue; |
331 | } |
332 | auto *Value = KV.getValue(); |
333 | if (!Value) // YAMLParser emitted an error. |
334 | continue; |
335 | bool Matched = false; |
336 | for (const auto &Handler : Keys) { |
337 | if (Handler.first == **Key) { |
338 | Matched = true; |
339 | Handler.second(*Value); |
340 | break; |
341 | } |
342 | } |
343 | if (!Matched) { |
344 | bool Warn = !UnknownHandler; |
345 | if (UnknownHandler) |
346 | Warn = UnknownHandler( |
347 | Located<std::string>(**Key, K->getSourceRange()), *Value); |
348 | if (Warn) |
349 | UnknownKeys.push_back(std::move(*Key)); |
350 | } |
351 | } |
352 | if (!UnknownKeys.empty()) |
353 | warnUnknownKeys(UnknownKeys, Seen); |
354 | } |
355 | |
356 | private: |
357 | void warnUnknownKeys(llvm::ArrayRef<Located<std::string>> UnknownKeys, |
358 | const llvm::SmallSet<std::string, 8> &SeenKeys) const { |
359 | llvm::SmallVector<llvm::StringRef> UnseenKeys; |
360 | for (const auto &KeyAndHandler : Keys) |
361 | if (!SeenKeys.count(KeyAndHandler.first.str())) |
362 | UnseenKeys.push_back(KeyAndHandler.first); |
363 | |
364 | for (const Located<std::string> &UnknownKey : UnknownKeys) |
365 | if (auto BestGuess = bestGuess(*UnknownKey, UnseenKeys)) |
366 | Outer->warning("Unknown " + Description + " key '" + *UnknownKey + |
367 | "'; did you mean '" + *BestGuess + "'?" , |
368 | UnknownKey.Range); |
369 | else |
370 | Outer->warning("Unknown " + Description + " key '" + *UnknownKey + |
371 | "'" , |
372 | UnknownKey.Range); |
373 | } |
374 | }; |
375 | |
376 | // Try to parse a single scalar value from the node, warn on failure. |
377 | std::optional<Located<std::string>> scalarValue(Node &N, |
378 | llvm::StringRef Desc) { |
379 | llvm::SmallString<256> Buf; |
380 | if (auto *S = llvm::dyn_cast<ScalarNode>(&N)) |
381 | return Located<std::string>(S->getValue(Buf).str(), N.getSourceRange()); |
382 | if (auto *BS = llvm::dyn_cast<BlockScalarNode>(&N)) |
383 | return Located<std::string>(BS->getValue().str(), N.getSourceRange()); |
384 | warning(Desc + " should be scalar" , N); |
385 | return std::nullopt; |
386 | } |
387 | |
388 | std::optional<Located<bool>> boolValue(Node &N, llvm::StringRef Desc) { |
389 | if (auto Scalar = scalarValue(N, Desc)) { |
390 | if (auto Bool = llvm::yaml::parseBool(**Scalar)) |
391 | return Located<bool>(*Bool, Scalar->Range); |
392 | warning(Desc + " should be a boolean" , N); |
393 | } |
394 | return std::nullopt; |
395 | } |
396 | |
397 | std::optional<Located<uint32_t>> uint32Value(Node &N, llvm::StringRef Desc) { |
398 | if (auto Scalar = scalarValue(N, Desc)) { |
399 | unsigned long long Num; |
400 | if (!llvm::getAsUnsignedInteger(**Scalar, 0, Num)) { |
401 | return Located<uint32_t>(Num, Scalar->Range); |
402 | } |
403 | } |
404 | warning(Desc + " invalid number" , N); |
405 | return std::nullopt; |
406 | } |
407 | |
408 | // Try to parse a list of single scalar values, or just a single value. |
409 | std::optional<std::vector<Located<std::string>>> scalarValues(Node &N) { |
410 | std::vector<Located<std::string>> Result; |
411 | if (auto *S = llvm::dyn_cast<ScalarNode>(&N)) { |
412 | llvm::SmallString<256> Buf; |
413 | Result.emplace_back(S->getValue(Buf).str(), N.getSourceRange()); |
414 | } else if (auto *S = llvm::dyn_cast<BlockScalarNode>(&N)) { |
415 | Result.emplace_back(S->getValue().str(), N.getSourceRange()); |
416 | } else if (auto *S = llvm::dyn_cast<SequenceNode>(&N)) { |
417 | // We *must* consume all items, even on error, or the parser will assert. |
418 | for (auto &Child : *S) { |
419 | if (auto Value = scalarValue(Child, "List item" )) |
420 | Result.push_back(std::move(*Value)); |
421 | } |
422 | } else { |
423 | warning("Expected scalar or list of scalars" , N); |
424 | return std::nullopt; |
425 | } |
426 | return Result; |
427 | } |
428 | |
429 | // Report a "hard" error, reflecting a config file that can never be valid. |
430 | void error(const llvm::Twine &Msg, llvm::SMRange Range) { |
431 | HadError = true; |
432 | SM.PrintMessage(Range.Start, llvm::SourceMgr::DK_Error, Msg, Range); |
433 | } |
434 | void error(const llvm::Twine &Msg, const Node &N) { |
435 | return error(Msg, N.getSourceRange()); |
436 | } |
437 | |
438 | // Report a "soft" error that could be caused by e.g. version skew. |
439 | void warning(const llvm::Twine &Msg, llvm::SMRange Range) { |
440 | SM.PrintMessage(Range.Start, llvm::SourceMgr::DK_Warning, Msg, Range); |
441 | } |
442 | void warning(const llvm::Twine &Msg, const Node &N) { |
443 | return warning(Msg, N.getSourceRange()); |
444 | } |
445 | }; |
446 | |
447 | } // namespace |
448 | |
449 | std::vector<Fragment> Fragment::parseYAML(llvm::StringRef YAML, |
450 | llvm::StringRef BufferName, |
451 | DiagnosticCallback Diags) { |
452 | // The YAML document may contain multiple conditional fragments. |
453 | // The SourceManager is shared for all of them. |
454 | auto SM = std::make_shared<llvm::SourceMgr>(); |
455 | auto Buf = llvm::MemoryBuffer::getMemBufferCopy(YAML, BufferName); |
456 | // Adapt DiagnosticCallback to function-pointer interface. |
457 | // Callback receives both errors we emit and those from the YAML parser. |
458 | SM->setDiagHandler( |
459 | [](const llvm::SMDiagnostic &Diag, void *Ctx) { |
460 | (*reinterpret_cast<DiagnosticCallback *>(Ctx))(Diag); |
461 | }, |
462 | &Diags); |
463 | std::vector<Fragment> Result; |
464 | for (auto &Doc : llvm::yaml::Stream(*Buf, *SM)) { |
465 | if (Node *N = Doc.getRoot()) { |
466 | Fragment Fragment; |
467 | Fragment.Source.Manager = SM; |
468 | Fragment.Source.Location = N->getSourceRange().Start; |
469 | SM->PrintMessage(Fragment.Source.Location, llvm::SourceMgr::DK_Note, |
470 | "Parsing config fragment" ); |
471 | if (Parser(*SM).parse(Fragment, *N)) |
472 | Result.push_back(std::move(Fragment)); |
473 | } |
474 | } |
475 | SM->PrintMessage(SM->FindLocForLineAndColumn(SM->getMainFileID(), 0, 0), |
476 | llvm::SourceMgr::DK_Note, |
477 | "Parsed " + llvm::Twine(Result.size()) + |
478 | " fragments from file" ); |
479 | // Hack: stash the buffer in the SourceMgr to keep it alive. |
480 | // SM has two entries: "main" non-owning buffer, and ignored owning buffer. |
481 | SM->AddNewSourceBuffer(std::move(Buf), llvm::SMLoc()); |
482 | return Result; |
483 | } |
484 | |
485 | } // namespace config |
486 | } // namespace clangd |
487 | } // namespace clang |
488 | |