1//===--- ConfigYAML.cpp - Loading configuration fragments from YAML files -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "ConfigFragment.h"
9#include "llvm/ADT/SmallSet.h"
10#include "llvm/ADT/SmallString.h"
11#include "llvm/ADT/StringRef.h"
12#include "llvm/Support/MemoryBuffer.h"
13#include "llvm/Support/SourceMgr.h"
14#include "llvm/Support/YAMLParser.h"
15#include <optional>
16#include <string>
17#include <system_error>
18
19namespace clang {
20namespace clangd {
21namespace config {
22namespace {
23using llvm::yaml::BlockScalarNode;
24using llvm::yaml::MappingNode;
25using llvm::yaml::Node;
26using llvm::yaml::ScalarNode;
27using llvm::yaml::SequenceNode;
28
29std::optional<llvm::StringRef>
30bestGuess(llvm::StringRef Search,
31 llvm::ArrayRef<llvm::StringRef> AllowedValues) {
32 unsigned MaxEdit = (Search.size() + 1) / 3;
33 if (!MaxEdit)
34 return std::nullopt;
35 std::optional<llvm::StringRef> Result;
36 for (const auto &AllowedValue : AllowedValues) {
37 unsigned EditDistance = Search.edit_distance(AllowedValue, true, MaxEdit);
38 // We can't do better than an edit distance of 1, so just return this and
39 // save computing other values.
40 if (EditDistance == 1U)
41 return AllowedValue;
42 if (EditDistance == MaxEdit && !Result) {
43 Result = AllowedValue;
44 } else if (EditDistance < MaxEdit) {
45 Result = AllowedValue;
46 MaxEdit = EditDistance;
47 }
48 }
49 return Result;
50}
51
52class Parser {
53 llvm::SourceMgr &SM;
54 bool HadError = false;
55
56public:
57 Parser(llvm::SourceMgr &SM) : SM(SM) {}
58
59 // Tries to parse N into F, returning false if it failed and we couldn't
60 // meaningfully recover (YAML syntax error, or hard semantic error).
61 bool parse(Fragment &F, Node &N) {
62 DictParser Dict("Config", this);
63 Dict.handle("If", [&](Node &N) { parse(F.If, N); });
64 Dict.handle("CompileFlags", [&](Node &N) { parse(F.CompileFlags, N); });
65 Dict.handle("Index", [&](Node &N) { parse(F.Index, N); });
66 Dict.handle("Style", [&](Node &N) { parse(F.Style, N); });
67 Dict.handle("Diagnostics", [&](Node &N) { parse(F.Diagnostics, N); });
68 Dict.handle("Completion", [&](Node &N) { parse(F.Completion, N); });
69 Dict.handle("Hover", [&](Node &N) { parse(F.Hover, N); });
70 Dict.handle("InlayHints", [&](Node &N) { parse(F.InlayHints, N); });
71 Dict.handle("SemanticTokens", [&](Node &N) { parse(F.SemanticTokens, N); });
72 Dict.parse(N);
73 return !(N.failed() || HadError);
74 }
75
76private:
77 void parse(Fragment::IfBlock &F, Node &N) {
78 DictParser Dict("If", this);
79 Dict.unrecognized([&](Located<std::string>, Node &) {
80 F.HasUnrecognizedCondition = true;
81 return true; // Emit a warning for the unrecognized key.
82 });
83 Dict.handle("PathMatch", [&](Node &N) {
84 if (auto Values = scalarValues(N))
85 F.PathMatch = std::move(*Values);
86 });
87 Dict.handle("PathExclude", [&](Node &N) {
88 if (auto Values = scalarValues(N))
89 F.PathExclude = std::move(*Values);
90 });
91 Dict.parse(N);
92 }
93
94 void parse(Fragment::CompileFlagsBlock &F, Node &N) {
95 DictParser Dict("CompileFlags", this);
96 Dict.handle("Compiler", [&](Node &N) {
97 if (auto Value = scalarValue(N, "Compiler"))
98 F.Compiler = std::move(*Value);
99 });
100 Dict.handle("Add", [&](Node &N) {
101 if (auto Values = scalarValues(N))
102 F.Add = std::move(*Values);
103 });
104 Dict.handle("Remove", [&](Node &N) {
105 if (auto Values = scalarValues(N))
106 F.Remove = std::move(*Values);
107 });
108 Dict.handle("CompilationDatabase", [&](Node &N) {
109 F.CompilationDatabase = scalarValue(N, "CompilationDatabase");
110 });
111 Dict.parse(N);
112 }
113
114 void parse(Fragment::StyleBlock &F, Node &N) {
115 DictParser Dict("Style", this);
116 Dict.handle("FullyQualifiedNamespaces", [&](Node &N) {
117 if (auto Values = scalarValues(N))
118 F.FullyQualifiedNamespaces = std::move(*Values);
119 });
120 Dict.parse(N);
121 }
122
123 void parse(Fragment::DiagnosticsBlock &F, Node &N) {
124 DictParser Dict("Diagnostics", this);
125 Dict.handle("Suppress", [&](Node &N) {
126 if (auto Values = scalarValues(N))
127 F.Suppress = std::move(*Values);
128 });
129 Dict.handle("UnusedIncludes", [&](Node &N) {
130 F.UnusedIncludes = scalarValue(N, "UnusedIncludes");
131 });
132 Dict.handle("MissingIncludes", [&](Node &N) {
133 F.MissingIncludes = scalarValue(N, "MissingIncludes");
134 });
135 Dict.handle("Includes", [&](Node &N) { parse(F.Includes, N); });
136 Dict.handle("ClangTidy", [&](Node &N) { parse(F.ClangTidy, N); });
137 Dict.parse(N);
138 }
139
140 void parse(Fragment::DiagnosticsBlock::ClangTidyBlock &F, Node &N) {
141 DictParser Dict("ClangTidy", this);
142 Dict.handle("Add", [&](Node &N) {
143 if (auto Values = scalarValues(N))
144 F.Add = std::move(*Values);
145 });
146 Dict.handle("Remove", [&](Node &N) {
147 if (auto Values = scalarValues(N))
148 F.Remove = std::move(*Values);
149 });
150 Dict.handle("CheckOptions", [&](Node &N) {
151 DictParser CheckOptDict("CheckOptions", this);
152 CheckOptDict.unrecognized([&](Located<std::string> &&Key, Node &Val) {
153 if (auto Value = scalarValue(Val, *Key))
154 F.CheckOptions.emplace_back(std::move(Key), std::move(*Value));
155 return false; // Don't emit a warning
156 });
157 CheckOptDict.parse(N);
158 });
159 Dict.parse(N);
160 }
161
162 void parse(Fragment::DiagnosticsBlock::IncludesBlock &F, Node &N) {
163 DictParser Dict("Includes", this);
164 Dict.handle("IgnoreHeader", [&](Node &N) {
165 if (auto Values = scalarValues(N))
166 F.IgnoreHeader = std::move(*Values);
167 });
168 Dict.parse(N);
169 }
170
171 void parse(Fragment::IndexBlock &F, Node &N) {
172 DictParser Dict("Index", this);
173 Dict.handle("Background",
174 [&](Node &N) { F.Background = scalarValue(N, "Background"); });
175 Dict.handle("External", [&](Node &N) {
176 Fragment::IndexBlock::ExternalBlock External;
177 // External block can either be a mapping or a scalar value. Dispatch
178 // accordingly.
179 if (N.getType() == Node::NK_Mapping) {
180 parse(External, N);
181 } else if (N.getType() == Node::NK_Scalar ||
182 N.getType() == Node::NK_BlockScalar) {
183 parse(External, *scalarValue(N, "External"));
184 } else {
185 error("External must be either a scalar or a mapping.", N);
186 return;
187 }
188 F.External.emplace(std::move(External));
189 F.External->Range = N.getSourceRange();
190 });
191 Dict.handle("StandardLibrary", [&](Node &N) {
192 if (auto StandardLibrary = boolValue(N, "StandardLibrary"))
193 F.StandardLibrary = *StandardLibrary;
194 });
195 Dict.parse(N);
196 }
197
198 void parse(Fragment::IndexBlock::ExternalBlock &F,
199 Located<std::string> ExternalVal) {
200 if (!llvm::StringRef(*ExternalVal).equals_insensitive("none")) {
201 error("Only scalar value supported for External is 'None'",
202 ExternalVal.Range);
203 return;
204 }
205 F.IsNone = true;
206 F.IsNone.Range = ExternalVal.Range;
207 }
208
209 void parse(Fragment::IndexBlock::ExternalBlock &F, Node &N) {
210 DictParser Dict("External", this);
211 Dict.handle("File", [&](Node &N) { F.File = scalarValue(N, "File"); });
212 Dict.handle("Server",
213 [&](Node &N) { F.Server = scalarValue(N, "Server"); });
214 Dict.handle("MountPoint",
215 [&](Node &N) { F.MountPoint = scalarValue(N, "MountPoint"); });
216 Dict.parse(N);
217 }
218
219 void parse(Fragment::CompletionBlock &F, Node &N) {
220 DictParser Dict("Completion", this);
221 Dict.handle("AllScopes", [&](Node &N) {
222 if (auto AllScopes = boolValue(N, "AllScopes"))
223 F.AllScopes = *AllScopes;
224 });
225 Dict.parse(N);
226 }
227
228 void parse(Fragment::HoverBlock &F, Node &N) {
229 DictParser Dict("Hover", this);
230 Dict.handle("ShowAKA", [&](Node &N) {
231 if (auto ShowAKA = boolValue(N, "ShowAKA"))
232 F.ShowAKA = *ShowAKA;
233 });
234 Dict.parse(N);
235 }
236
237 void parse(Fragment::InlayHintsBlock &F, Node &N) {
238 DictParser Dict("InlayHints", this);
239 Dict.handle("Enabled", [&](Node &N) {
240 if (auto Value = boolValue(N, "Enabled"))
241 F.Enabled = *Value;
242 });
243 Dict.handle("ParameterNames", [&](Node &N) {
244 if (auto Value = boolValue(N, "ParameterNames"))
245 F.ParameterNames = *Value;
246 });
247 Dict.handle("DeducedTypes", [&](Node &N) {
248 if (auto Value = boolValue(N, "DeducedTypes"))
249 F.DeducedTypes = *Value;
250 });
251 Dict.handle("Designators", [&](Node &N) {
252 if (auto Value = boolValue(N, "Designators"))
253 F.Designators = *Value;
254 });
255 Dict.handle("BlockEnd", [&](Node &N) {
256 if (auto Value = boolValue(N, "BlockEnd"))
257 F.BlockEnd = *Value;
258 });
259 Dict.handle("TypeNameLimit", [&](Node &N) {
260 if (auto Value = uint32Value(N, "TypeNameLimit"))
261 F.TypeNameLimit = *Value;
262 });
263 Dict.parse(N);
264 }
265
266 void parse(Fragment::SemanticTokensBlock &F, Node &N) {
267 DictParser Dict("SemanticTokens", this);
268 Dict.handle("DisabledKinds", [&](Node &N) {
269 if (auto Values = scalarValues(N))
270 F.DisabledKinds = std::move(*Values);
271 });
272 Dict.handle("DisabledModifiers", [&](Node &N) {
273 if (auto Values = scalarValues(N))
274 F.DisabledModifiers = std::move(*Values);
275 });
276 Dict.parse(N);
277 }
278
279 // Helper for parsing mapping nodes (dictionaries).
280 // We don't use YamlIO as we want to control over unknown keys.
281 class DictParser {
282 llvm::StringRef Description;
283 std::vector<std::pair<llvm::StringRef, std::function<void(Node &)>>> Keys;
284 std::function<bool(Located<std::string>, Node &)> UnknownHandler;
285 Parser *Outer;
286
287 public:
288 DictParser(llvm::StringRef Description, Parser *Outer)
289 : Description(Description), Outer(Outer) {}
290
291 // Parse is called when Key is encountered, and passed the associated value.
292 // It should emit diagnostics if the value is invalid (e.g. wrong type).
293 // If Key is seen twice, Parse runs only once and an error is reported.
294 void handle(llvm::StringLiteral Key, std::function<void(Node &)> Parse) {
295 for (const auto &Entry : Keys) {
296 (void)Entry;
297 assert(Entry.first != Key && "duplicate key handler");
298 }
299 Keys.emplace_back(Key, std::move(Parse));
300 }
301
302 // Handler is called when a Key is not matched by any handle().
303 // If this is unset or the Handler returns true, a warning is emitted for
304 // the unknown key.
305 void
306 unrecognized(std::function<bool(Located<std::string>, Node &)> Handler) {
307 UnknownHandler = std::move(Handler);
308 }
309
310 // Process a mapping node and call handlers for each key/value pair.
311 void parse(Node &N) const {
312 if (N.getType() != Node::NK_Mapping) {
313 Outer->error(Description + " should be a dictionary", N);
314 return;
315 }
316 llvm::SmallSet<std::string, 8> Seen;
317 llvm::SmallVector<Located<std::string>, 0> UnknownKeys;
318 // We *must* consume all items, even on error, or the parser will assert.
319 for (auto &KV : llvm::cast<MappingNode>(N)) {
320 auto *K = KV.getKey();
321 if (!K) // YAMLParser emitted an error.
322 continue;
323 auto Key = Outer->scalarValue(*K, "Dictionary key");
324 if (!Key)
325 continue;
326 if (!Seen.insert(**Key).second) {
327 Outer->warning("Duplicate key " + **Key + " is ignored", *K);
328 if (auto *Value = KV.getValue())
329 Value->skip();
330 continue;
331 }
332 auto *Value = KV.getValue();
333 if (!Value) // YAMLParser emitted an error.
334 continue;
335 bool Matched = false;
336 for (const auto &Handler : Keys) {
337 if (Handler.first == **Key) {
338 Matched = true;
339 Handler.second(*Value);
340 break;
341 }
342 }
343 if (!Matched) {
344 bool Warn = !UnknownHandler;
345 if (UnknownHandler)
346 Warn = UnknownHandler(
347 Located<std::string>(**Key, K->getSourceRange()), *Value);
348 if (Warn)
349 UnknownKeys.push_back(std::move(*Key));
350 }
351 }
352 if (!UnknownKeys.empty())
353 warnUnknownKeys(UnknownKeys, Seen);
354 }
355
356 private:
357 void warnUnknownKeys(llvm::ArrayRef<Located<std::string>> UnknownKeys,
358 const llvm::SmallSet<std::string, 8> &SeenKeys) const {
359 llvm::SmallVector<llvm::StringRef> UnseenKeys;
360 for (const auto &KeyAndHandler : Keys)
361 if (!SeenKeys.count(KeyAndHandler.first.str()))
362 UnseenKeys.push_back(KeyAndHandler.first);
363
364 for (const Located<std::string> &UnknownKey : UnknownKeys)
365 if (auto BestGuess = bestGuess(*UnknownKey, UnseenKeys))
366 Outer->warning("Unknown " + Description + " key '" + *UnknownKey +
367 "'; did you mean '" + *BestGuess + "'?",
368 UnknownKey.Range);
369 else
370 Outer->warning("Unknown " + Description + " key '" + *UnknownKey +
371 "'",
372 UnknownKey.Range);
373 }
374 };
375
376 // Try to parse a single scalar value from the node, warn on failure.
377 std::optional<Located<std::string>> scalarValue(Node &N,
378 llvm::StringRef Desc) {
379 llvm::SmallString<256> Buf;
380 if (auto *S = llvm::dyn_cast<ScalarNode>(&N))
381 return Located<std::string>(S->getValue(Buf).str(), N.getSourceRange());
382 if (auto *BS = llvm::dyn_cast<BlockScalarNode>(&N))
383 return Located<std::string>(BS->getValue().str(), N.getSourceRange());
384 warning(Desc + " should be scalar", N);
385 return std::nullopt;
386 }
387
388 std::optional<Located<bool>> boolValue(Node &N, llvm::StringRef Desc) {
389 if (auto Scalar = scalarValue(N, Desc)) {
390 if (auto Bool = llvm::yaml::parseBool(**Scalar))
391 return Located<bool>(*Bool, Scalar->Range);
392 warning(Desc + " should be a boolean", N);
393 }
394 return std::nullopt;
395 }
396
397 std::optional<Located<uint32_t>> uint32Value(Node &N, llvm::StringRef Desc) {
398 if (auto Scalar = scalarValue(N, Desc)) {
399 unsigned long long Num;
400 if (!llvm::getAsUnsignedInteger(**Scalar, 0, Num)) {
401 return Located<uint32_t>(Num, Scalar->Range);
402 }
403 }
404 warning(Desc + " invalid number", N);
405 return std::nullopt;
406 }
407
408 // Try to parse a list of single scalar values, or just a single value.
409 std::optional<std::vector<Located<std::string>>> scalarValues(Node &N) {
410 std::vector<Located<std::string>> Result;
411 if (auto *S = llvm::dyn_cast<ScalarNode>(&N)) {
412 llvm::SmallString<256> Buf;
413 Result.emplace_back(S->getValue(Buf).str(), N.getSourceRange());
414 } else if (auto *S = llvm::dyn_cast<BlockScalarNode>(&N)) {
415 Result.emplace_back(S->getValue().str(), N.getSourceRange());
416 } else if (auto *S = llvm::dyn_cast<SequenceNode>(&N)) {
417 // We *must* consume all items, even on error, or the parser will assert.
418 for (auto &Child : *S) {
419 if (auto Value = scalarValue(Child, "List item"))
420 Result.push_back(std::move(*Value));
421 }
422 } else {
423 warning("Expected scalar or list of scalars", N);
424 return std::nullopt;
425 }
426 return Result;
427 }
428
429 // Report a "hard" error, reflecting a config file that can never be valid.
430 void error(const llvm::Twine &Msg, llvm::SMRange Range) {
431 HadError = true;
432 SM.PrintMessage(Range.Start, llvm::SourceMgr::DK_Error, Msg, Range);
433 }
434 void error(const llvm::Twine &Msg, const Node &N) {
435 return error(Msg, N.getSourceRange());
436 }
437
438 // Report a "soft" error that could be caused by e.g. version skew.
439 void warning(const llvm::Twine &Msg, llvm::SMRange Range) {
440 SM.PrintMessage(Range.Start, llvm::SourceMgr::DK_Warning, Msg, Range);
441 }
442 void warning(const llvm::Twine &Msg, const Node &N) {
443 return warning(Msg, N.getSourceRange());
444 }
445};
446
447} // namespace
448
449std::vector<Fragment> Fragment::parseYAML(llvm::StringRef YAML,
450 llvm::StringRef BufferName,
451 DiagnosticCallback Diags) {
452 // The YAML document may contain multiple conditional fragments.
453 // The SourceManager is shared for all of them.
454 auto SM = std::make_shared<llvm::SourceMgr>();
455 auto Buf = llvm::MemoryBuffer::getMemBufferCopy(YAML, BufferName);
456 // Adapt DiagnosticCallback to function-pointer interface.
457 // Callback receives both errors we emit and those from the YAML parser.
458 SM->setDiagHandler(
459 [](const llvm::SMDiagnostic &Diag, void *Ctx) {
460 (*reinterpret_cast<DiagnosticCallback *>(Ctx))(Diag);
461 },
462 &Diags);
463 std::vector<Fragment> Result;
464 for (auto &Doc : llvm::yaml::Stream(*Buf, *SM)) {
465 if (Node *N = Doc.getRoot()) {
466 Fragment Fragment;
467 Fragment.Source.Manager = SM;
468 Fragment.Source.Location = N->getSourceRange().Start;
469 SM->PrintMessage(Fragment.Source.Location, llvm::SourceMgr::DK_Note,
470 "Parsing config fragment");
471 if (Parser(*SM).parse(Fragment, *N))
472 Result.push_back(std::move(Fragment));
473 }
474 }
475 SM->PrintMessage(SM->FindLocForLineAndColumn(SM->getMainFileID(), 0, 0),
476 llvm::SourceMgr::DK_Note,
477 "Parsed " + llvm::Twine(Result.size()) +
478 " fragments from file");
479 // Hack: stash the buffer in the SourceMgr to keep it alive.
480 // SM has two entries: "main" non-owning buffer, and ignored owning buffer.
481 SM->AddNewSourceBuffer(std::move(Buf), llvm::SMLoc());
482 return Result;
483}
484
485} // namespace config
486} // namespace clangd
487} // namespace clang
488