1#include <IO/ReadHelpers.h>
2#include <Common/Exception.h>
3#include <Common/StringUtils/StringUtils.h>
4#include <Poco/Util/AbstractConfiguration.h>
5#include <Compression/ICompressionCodec.h>
6#include <Compression/CompressionFactory.h>
7#include <Compression/CompressionInfo.h>
8
9namespace DB
10{
11
12namespace ErrorCodes
13{
14 extern const int UNKNOWN_COMPRESSION_METHOD;
15 extern const int UNKNOWN_ELEMENT_IN_CONFIG;
16}
17
18
19/** Allows you to select the compression settings for the conditions specified in the configuration file.
20 * The config looks like this
21
22 <compression>
23
24 <!-- Set of options. Options are checked in a row. The last worked option wins. If none has worked, then lz4 is used. -->
25 <case>
26
27 <!-- Conditions. All must be satisfied simultaneously. Some conditions may not be specified. -->
28 <min_part_size>10000000000</min_part_size> <!-- The minimum size of a part in bytes. -->
29 <min_part_size_ratio>0.01</min_part_size_ratio> <!-- The minimum size of the part relative to all the data in the table. -->
30
31 <!-- Which compression method to choose. -->
32 <method>zstd</method>
33 <level>2</level>
34 </case>
35
36 <case>
37 ...
38 </case>
39 </compression>
40 */
41class CompressionCodecSelector
42{
43private:
44 struct Element
45 {
46 size_t min_part_size = 0;
47 double min_part_size_ratio = 0;
48 std::string family_name;
49 std::optional<int> level;
50
51
52 Element(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
53 {
54 min_part_size = config.getUInt64(config_prefix + ".min_part_size", 0);
55 min_part_size_ratio = config.getDouble(config_prefix + ".min_part_size_ratio", 0);
56
57 family_name = config.getString(config_prefix + ".method", "lz4");
58 if (config.has(config_prefix + ".level"))
59 level = config.getInt64(config_prefix + ".level");
60 }
61
62 bool check(size_t part_size, double part_size_ratio) const
63 {
64 return part_size >= min_part_size
65 && part_size_ratio >= min_part_size_ratio;
66 }
67 };
68
69 std::vector<Element> elements;
70
71public:
72 CompressionCodecSelector() {} /// Always returns the default method.
73
74 CompressionCodecSelector(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
75 {
76 Poco::Util::AbstractConfiguration::Keys keys;
77 config.keys(config_prefix, keys);
78
79 for (const auto & name : keys)
80 {
81 if (!startsWith(name.data(), "case"))
82 throw Exception("Unknown element in config: " + config_prefix + "." + name + ", must be 'case'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
83
84 elements.emplace_back(config, config_prefix + "." + name);
85 }
86 }
87
88 CompressionCodecPtr choose(size_t part_size, double part_size_ratio) const
89 {
90 const auto & factory = CompressionCodecFactory::instance();
91 CompressionCodecPtr res = factory.getDefaultCodec();
92
93 for (const auto & element : elements)
94 if (element.check(part_size, part_size_ratio))
95 res = factory.get(element.family_name, element.level);
96
97 return res;
98 }
99};
100
101}
102