1#include "ConfigProcessor.h"
2
3#include <sys/utsname.h>
4#include <cerrno>
5#include <cstdlib>
6#include <cstring>
7#include <algorithm>
8#include <iostream>
9#include <functional>
10#include <Poco/DOM/Text.h>
11#include <Poco/DOM/Attr.h>
12#include <Poco/DOM/Comment.h>
13#include <Poco/Util/XMLConfiguration.h>
14#include <Common/ZooKeeper/ZooKeeperNodeCache.h>
15#include <Common/ZooKeeper/KeeperException.h>
16#include <Common/StringUtils/StringUtils.h>
17
18#define PREPROCESSED_SUFFIX "-preprocessed"
19
20
21using namespace Poco::XML;
22
23namespace DB
24{
25
26/// For cutting preprocessed path to this base
27static std::string main_config_path;
28
29/// Extracts from a string the first encountered number consisting of at least two digits.
30static std::string numberFromHost(const std::string & s)
31{
32 for (size_t i = 0; i < s.size(); ++i)
33 {
34 std::string res;
35 size_t j = i;
36 while (j < s.size() && isNumericASCII(s[j]))
37 res += s[j++];
38 if (res.size() >= 2)
39 {
40 while (res[0] == '0')
41 res.erase(res.begin());
42 return res;
43 }
44 }
45 return "";
46}
47
48bool ConfigProcessor::isPreprocessedFile(const std::string & path)
49{
50 return endsWith(Poco::Path(path).getBaseName(), PREPROCESSED_SUFFIX);
51}
52
53
54ConfigProcessor::ConfigProcessor(
55 const std::string & path_,
56 bool throw_on_bad_incl_,
57 bool log_to_console,
58 const Substitutions & substitutions_)
59 : path(path_)
60 , throw_on_bad_incl(throw_on_bad_incl_)
61 , substitutions(substitutions_)
62 /// We need larger name pool to allow to support vast amount of users in users.xml files for ClickHouse.
63 /// Size is prime because Poco::XML::NamePool uses bad (inefficient, low quality)
64 /// hash function internally, and its size was prime by default.
65 , name_pool(new Poco::XML::NamePool(65521))
66 , dom_parser(name_pool)
67{
68 if (log_to_console && !Logger::has("ConfigProcessor"))
69 {
70 channel_ptr = new Poco::ConsoleChannel;
71 log = &Logger::create("ConfigProcessor", channel_ptr.get(), Poco::Message::PRIO_TRACE);
72 }
73 else
74 {
75 log = &Logger::get("ConfigProcessor");
76 }
77}
78
79ConfigProcessor::~ConfigProcessor()
80{
81 if (channel_ptr) /// This means we have created a new console logger in the constructor.
82 Logger::destroy("ConfigProcessor");
83}
84
85
86/// Vector containing the name of the element and a sorted list of attribute names and values
87/// (except "remove" and "replace" attributes).
88/// Serves as a unique identifier of the element contents for comparison.
89using ElementIdentifier = std::vector<std::string>;
90
91using NamedNodeMapPtr = Poco::AutoPtr<Poco::XML::NamedNodeMap>;
92/// NOTE getting rid of iterating over the result of Node.childNodes() call is a good idea
93/// because accessing the i-th element of this list takes O(i) time.
94using NodeListPtr = Poco::AutoPtr<Poco::XML::NodeList>;
95
96static ElementIdentifier getElementIdentifier(Node * element)
97{
98 const NamedNodeMapPtr attrs = element->attributes();
99 std::vector<std::pair<std::string, std::string>> attrs_kv;
100 for (size_t i = 0, size = attrs->length(); i < size; ++i)
101 {
102 const Node * node = attrs->item(i);
103 std::string name = node->nodeName();
104 auto subst_name_pos = std::find(ConfigProcessor::SUBSTITUTION_ATTRS.begin(), ConfigProcessor::SUBSTITUTION_ATTRS.end(), name);
105 if (name == "replace" || name == "remove" ||
106 subst_name_pos != ConfigProcessor::SUBSTITUTION_ATTRS.end())
107 continue;
108 std::string value = node->nodeValue();
109 attrs_kv.push_back(std::make_pair(name, value));
110 }
111 std::sort(attrs_kv.begin(), attrs_kv.end());
112
113 ElementIdentifier res;
114 res.push_back(element->nodeName());
115 for (const auto & attr : attrs_kv)
116 {
117 res.push_back(attr.first);
118 res.push_back(attr.second);
119 }
120
121 return res;
122}
123
124static Node * getRootNode(Document * document)
125{
126 const NodeListPtr children = document->childNodes();
127 for (size_t i = 0, size = children->length(); i < size; ++i)
128 {
129 Node * child = children->item(i);
130 /// Besides the root element there can be comment nodes on the top level.
131 /// Skip them.
132 if (child->nodeType() == Node::ELEMENT_NODE)
133 return child;
134 }
135
136 throw Poco::Exception("No root node in document");
137}
138
139static bool allWhitespace(const std::string & s)
140{
141 return s.find_first_not_of(" \t\n\r") == std::string::npos;
142}
143
144void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root)
145{
146 const NodeListPtr with_nodes = with_root->childNodes();
147 using ElementsByIdentifier = std::multimap<ElementIdentifier, Node *>;
148 ElementsByIdentifier config_element_by_id;
149 for (Node * node = config_root->firstChild(); node;)
150 {
151 Node * next_node = node->nextSibling();
152 /// Remove text from the original config node.
153 if (node->nodeType() == Node::TEXT_NODE && !allWhitespace(node->getNodeValue()))
154 {
155 config_root->removeChild(node);
156 }
157 else if (node->nodeType() == Node::ELEMENT_NODE)
158 {
159 config_element_by_id.insert(ElementsByIdentifier::value_type(getElementIdentifier(node), node));
160 }
161 node = next_node;
162 }
163
164 for (size_t i = 0, size = with_nodes->length(); i < size; ++i)
165 {
166 Node * with_node = with_nodes->item(i);
167
168 bool merged = false;
169 bool remove = false;
170 if (with_node->nodeType() == Node::ELEMENT_NODE)
171 {
172 Element & with_element = dynamic_cast<Element &>(*with_node);
173 remove = with_element.hasAttribute("remove");
174 bool replace = with_element.hasAttribute("replace");
175
176 if (remove && replace)
177 throw Poco::Exception("both remove and replace attributes set for element <" + with_node->nodeName() + ">");
178
179 ElementsByIdentifier::iterator it = config_element_by_id.find(getElementIdentifier(with_node));
180
181 if (it != config_element_by_id.end())
182 {
183 Node * config_node = it->second;
184 config_element_by_id.erase(it);
185
186 if (remove)
187 {
188 config_root->removeChild(config_node);
189 }
190 else if (replace)
191 {
192 with_element.removeAttribute("replace");
193 NodePtr new_node = config->importNode(with_node, true);
194 config_root->replaceChild(new_node, config_node);
195 }
196 else
197 {
198 mergeRecursive(config, config_node, with_node);
199 }
200 merged = true;
201 }
202 }
203 if (!merged && !remove)
204 {
205 NodePtr new_node = config->importNode(with_node, true);
206 config_root->appendChild(new_node);
207 }
208 }
209}
210
211void ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with)
212{
213 Node * config_root = getRootNode(config.get());
214 Node * with_root = getRootNode(with.get());
215
216 if (config_root->nodeName() != with_root->nodeName())
217 throw Poco::Exception("Root element doesn't have the corresponding root element as the config file. It must be <" + config_root->nodeName() + ">");
218
219 mergeRecursive(config, config_root, with_root);
220}
221
222std::string ConfigProcessor::layerFromHost()
223{
224 utsname buf;
225 if (uname(&buf))
226 throw Poco::Exception(std::string("uname failed: ") + std::strerror(errno));
227
228 std::string layer = numberFromHost(buf.nodename);
229 if (layer.empty())
230 throw Poco::Exception(std::string("no layer in host name: ") + buf.nodename);
231
232 return layer;
233}
234
235void ConfigProcessor::doIncludesRecursive(
236 XMLDocumentPtr config,
237 XMLDocumentPtr include_from,
238 Node * node,
239 zkutil::ZooKeeperNodeCache * zk_node_cache,
240 const zkutil::EventPtr & zk_changed_event,
241 std::unordered_set<std::string> & contributing_zk_paths)
242{
243 if (node->nodeType() == Node::TEXT_NODE)
244 {
245 for (auto & substitution : substitutions)
246 {
247 std::string value = node->nodeValue();
248
249 bool replace_occured = false;
250 size_t pos;
251 while ((pos = value.find(substitution.first)) != std::string::npos)
252 {
253 value.replace(pos, substitution.first.length(), substitution.second);
254 replace_occured = true;
255 }
256
257 if (replace_occured)
258 node->setNodeValue(value);
259 }
260 }
261
262 if (node->nodeType() != Node::ELEMENT_NODE)
263 return;
264
265 /// Substitute <layer> for the number extracted from the hostname only if there is an
266 /// empty <layer> tag without attributes in the original file.
267 if (node->nodeName() == "layer"
268 && !node->hasAttributes()
269 && !node->hasChildNodes()
270 && node->nodeValue().empty())
271 {
272 NodePtr new_node = config->createTextNode(layerFromHost());
273 node->appendChild(new_node);
274 return;
275 }
276
277 std::map<std::string, const Node *> attr_nodes;
278 NamedNodeMapPtr attributes = node->attributes();
279 size_t substs_count = 0;
280 for (const auto & attr_name : SUBSTITUTION_ATTRS)
281 {
282 auto subst = attributes->getNamedItem(attr_name);
283 attr_nodes[attr_name] = subst;
284 substs_count += static_cast<size_t>(subst == nullptr);
285 }
286
287 if (substs_count < SUBSTITUTION_ATTRS.size() - 1) /// only one substitution is allowed
288 throw Poco::Exception("several substitutions attributes set for element <" + node->nodeName() + ">");
289
290 /// Replace the original contents, not add to it.
291 bool replace = attributes->getNamedItem("replace");
292
293 bool included_something = false;
294
295 auto process_include = [&](const Node * include_attr, const std::function<const Node * (const std::string &)> & get_node, const char * error_msg)
296 {
297 std::string name = include_attr->getNodeValue();
298 const Node * node_to_include = get_node(name);
299 if (!node_to_include)
300 {
301 if (attributes->getNamedItem("optional"))
302 node->parentNode()->removeChild(node);
303 else if (throw_on_bad_incl)
304 throw Poco::Exception(error_msg + name);
305 else
306 LOG_WARNING(log, error_msg << name);
307 }
308 else
309 {
310 Element & element = dynamic_cast<Element &>(*node);
311
312 for (const auto & attr_name : SUBSTITUTION_ATTRS)
313 element.removeAttribute(attr_name);
314
315 if (replace)
316 {
317 while (Node * child = node->firstChild())
318 node->removeChild(child);
319
320 element.removeAttribute("replace");
321 }
322
323 const NodeListPtr children = node_to_include->childNodes();
324 for (size_t i = 0, size = children->length(); i < size; ++i)
325 {
326 NodePtr new_node = config->importNode(children->item(i), true);
327 node->appendChild(new_node);
328 }
329
330 const NamedNodeMapPtr from_attrs = node_to_include->attributes();
331 for (size_t i = 0, size = from_attrs->length(); i < size; ++i)
332 {
333 element.setAttributeNode(dynamic_cast<Attr *>(config->importNode(from_attrs->item(i), true)));
334 }
335
336 included_something = true;
337 }
338 };
339
340 if (attr_nodes["incl"]) // we have include subst
341 {
342 auto get_incl_node = [&](const std::string & name)
343 {
344 return include_from ? include_from->getNodeByPath("yandex/" + name) : nullptr;
345 };
346
347 process_include(attr_nodes["incl"], get_incl_node, "Include not found: ");
348 }
349
350 if (attr_nodes["from_zk"]) /// we have zookeeper subst
351 {
352 contributing_zk_paths.insert(attr_nodes["from_zk"]->getNodeValue());
353
354 if (zk_node_cache)
355 {
356 XMLDocumentPtr zk_document;
357 auto get_zk_node = [&](const std::string & name) -> const Node *
358 {
359 zkutil::ZooKeeperNodeCache::ZNode znode = zk_node_cache->get(name, zk_changed_event);
360 if (!znode.exists)
361 return nullptr;
362
363 /// Enclose contents into a fake <from_zk> tag to allow pure text substitutions.
364 zk_document = dom_parser.parseString("<from_zk>" + znode.contents + "</from_zk>");
365 return getRootNode(zk_document.get());
366 };
367
368 process_include(attr_nodes["from_zk"], get_zk_node, "Could not get ZooKeeper node: ");
369 }
370 }
371
372 if (attr_nodes["from_env"]) /// we have env subst
373 {
374 XMLDocumentPtr env_document;
375 auto get_env_node = [&](const std::string & name) -> const Node *
376 {
377 const char * env_val = std::getenv(name.c_str());
378 if (env_val == nullptr)
379 return nullptr;
380
381 env_document = dom_parser.parseString("<from_env>" + std::string{env_val} + "</from_env>");
382
383 return getRootNode(env_document.get());
384 };
385
386 process_include(attr_nodes["from_env"], get_env_node, "Env variable is not set: ");
387 }
388
389 if (included_something)
390 doIncludesRecursive(config, include_from, node, zk_node_cache, zk_changed_event, contributing_zk_paths);
391 else
392 {
393 NodeListPtr children = node->childNodes();
394 Node * child = nullptr;
395 for (size_t i = 0; (child = children->item(i)); ++i)
396 doIncludesRecursive(config, include_from, child, zk_node_cache, zk_changed_event, contributing_zk_paths);
397 }
398}
399
400ConfigProcessor::Files ConfigProcessor::getConfigMergeFiles(const std::string & config_path)
401{
402 Files files;
403
404 Poco::Path merge_dir_path(config_path);
405 std::set<std::string> merge_dirs;
406
407 /// Add path_to_config/config_name.d dir
408 merge_dir_path.setExtension("d");
409 merge_dirs.insert(merge_dir_path.toString());
410 /// Add path_to_config/conf.d dir
411 merge_dir_path.setBaseName("conf");
412 merge_dirs.insert(merge_dir_path.toString());
413
414 for (const std::string & merge_dir_name : merge_dirs)
415 {
416 Poco::File merge_dir(merge_dir_name);
417 if (!merge_dir.exists() || !merge_dir.isDirectory())
418 continue;
419
420 for (Poco::DirectoryIterator it(merge_dir_name); it != Poco::DirectoryIterator(); ++it)
421 {
422 Poco::File & file = *it;
423 Poco::Path path(file.path());
424 std::string extension = path.getExtension();
425 std::string base_name = path.getBaseName();
426
427 // Skip non-config and temporary files
428 if (file.isFile() && (extension == "xml" || extension == "conf") && !startsWith(base_name, "."))
429 files.push_back(file.path());
430 }
431 }
432
433 std::sort(files.begin(), files.end());
434
435 return files;
436}
437
438XMLDocumentPtr ConfigProcessor::processConfig(
439 bool * has_zk_includes,
440 zkutil::ZooKeeperNodeCache * zk_node_cache,
441 const zkutil::EventPtr & zk_changed_event)
442{
443 XMLDocumentPtr config = dom_parser.parse(path);
444
445 std::vector<std::string> contributing_files;
446 contributing_files.push_back(path);
447
448 for (auto & merge_file : getConfigMergeFiles(path))
449 {
450 try
451 {
452 XMLDocumentPtr with = dom_parser.parse(merge_file);
453 merge(config, with);
454 contributing_files.push_back(merge_file);
455 }
456 catch (Exception & e)
457 {
458 e.addMessage("while merging config '" + path + "' with '" + merge_file + "'");
459 throw;
460 }
461 catch (Poco::Exception & e)
462 {
463 throw Poco::Exception("Failed to merge config with '" + merge_file + "': " + e.displayText());
464 }
465 }
466
467 std::unordered_set<std::string> contributing_zk_paths;
468 try
469 {
470 Node * node = config->getNodeByPath("yandex/include_from");
471 XMLDocumentPtr include_from;
472 std::string include_from_path;
473 if (node)
474 {
475 /// if we include_from env or zk.
476 doIncludesRecursive(config, nullptr, node, zk_node_cache, zk_changed_event, contributing_zk_paths);
477 include_from_path = node->innerText();
478 }
479 else
480 {
481 std::string default_path = "/etc/metrika.xml";
482 if (Poco::File(default_path).exists())
483 include_from_path = default_path;
484 }
485 if (!include_from_path.empty())
486 {
487 contributing_files.push_back(include_from_path);
488 include_from = dom_parser.parse(include_from_path);
489 }
490
491 doIncludesRecursive(config, include_from, getRootNode(config.get()), zk_node_cache, zk_changed_event, contributing_zk_paths);
492 }
493 catch (Exception & e)
494 {
495 e.addMessage("while preprocessing config '" + path + "'");
496 throw;
497 }
498 catch (Poco::Exception & e)
499 {
500 throw Poco::Exception("Failed to preprocess config '" + path + "': " + e.displayText(), e);
501 }
502
503 if (has_zk_includes)
504 *has_zk_includes = !contributing_zk_paths.empty();
505
506 std::stringstream comment;
507 comment << " This file was generated automatically.\n";
508 comment << " Do not edit it: it is likely to be discarded and generated again before it's read next time.\n";
509 comment << " Files used to generate this file:";
510 for (const std::string & contributing_file : contributing_files)
511 {
512 comment << "\n " << contributing_file;
513 }
514 if (zk_node_cache && !contributing_zk_paths.empty())
515 {
516 comment << "\n ZooKeeper nodes used to generate this file:";
517 for (const std::string & contributing_zk_path : contributing_zk_paths)
518 comment << "\n " << contributing_zk_path;
519 }
520
521 comment << " ";
522 NodePtr new_node = config->createTextNode("\n\n");
523 config->insertBefore(new_node, config->firstChild());
524 new_node = config->createComment(comment.str());
525 config->insertBefore(new_node, config->firstChild());
526
527 return config;
528}
529
530ConfigProcessor::LoadedConfig ConfigProcessor::loadConfig(bool allow_zk_includes)
531{
532 bool has_zk_includes;
533 XMLDocumentPtr config_xml = processConfig(&has_zk_includes);
534
535 if (has_zk_includes && !allow_zk_includes)
536 throw Poco::Exception("Error while loading config '" + path + "': from_zk includes are not allowed!");
537
538 ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(config_xml));
539
540 return LoadedConfig{configuration, has_zk_includes, /* loaded_from_preprocessed = */ false, config_xml, path};
541}
542
543ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes(
544 zkutil::ZooKeeperNodeCache & zk_node_cache,
545 const zkutil::EventPtr & zk_changed_event,
546 bool fallback_to_preprocessed)
547{
548 XMLDocumentPtr config_xml;
549 bool has_zk_includes;
550 bool processed_successfully = false;
551 try
552 {
553 config_xml = processConfig(&has_zk_includes, &zk_node_cache, zk_changed_event);
554 processed_successfully = true;
555 }
556 catch (const Poco::Exception & ex)
557 {
558 if (!fallback_to_preprocessed)
559 throw;
560
561 const auto * zk_exception = dynamic_cast<const Coordination::Exception *>(ex.nested());
562 if (!zk_exception)
563 throw;
564
565 LOG_WARNING(
566 log,
567 "Error while processing from_zk config includes: " + zk_exception->message() +
568 ". Config will be loaded from preprocessed file: " + preprocessed_path);
569
570 config_xml = dom_parser.parse(preprocessed_path);
571 }
572
573 ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(config_xml));
574
575 return LoadedConfig{configuration, has_zk_includes, !processed_successfully, config_xml, path};
576}
577
578void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir)
579{
580 try
581 {
582 if (preprocessed_path.empty())
583 {
584 auto new_path = loaded_config.config_path;
585 if (new_path.substr(0, main_config_path.size()) == main_config_path)
586 new_path.replace(0, main_config_path.size(), "");
587 std::replace(new_path.begin(), new_path.end(), '/', '_');
588
589 if (preprocessed_dir.empty())
590 {
591 if (!loaded_config.configuration->has("path"))
592 {
593 // Will use current directory
594 auto parent_path = Poco::Path(loaded_config.config_path).makeParent();
595 preprocessed_dir = parent_path.toString();
596 Poco::Path poco_new_path(new_path);
597 poco_new_path.setBaseName(poco_new_path.getBaseName() + PREPROCESSED_SUFFIX);
598 new_path = poco_new_path.toString();
599 }
600 else
601 {
602 preprocessed_dir = loaded_config.configuration->getString("path") + "/preprocessed_configs/";
603 }
604 }
605 else
606 {
607 preprocessed_dir += "/preprocessed_configs/";
608 }
609
610 preprocessed_path = preprocessed_dir + new_path;
611 auto preprocessed_path_parent = Poco::Path(preprocessed_path).makeParent();
612 if (!preprocessed_path_parent.toString().empty())
613 Poco::File(preprocessed_path_parent).createDirectories();
614 }
615 DOMWriter().writeNode(preprocessed_path, loaded_config.preprocessed_xml);
616 }
617 catch (Poco::Exception & e)
618 {
619 LOG_WARNING(log, "Couldn't save preprocessed config to " << preprocessed_path << ": " << e.displayText());
620 }
621}
622
623void ConfigProcessor::setConfigPath(const std::string & config_path)
624{
625 main_config_path = config_path;
626}
627
628}
629