1 | #include <IO/ReadHelpers.h> |
2 | #include <Common/Exception.h> |
3 | #include <Common/StringUtils/StringUtils.h> |
4 | #include <Poco/Util/AbstractConfiguration.h> |
5 | #include <Compression/ICompressionCodec.h> |
6 | #include <Compression/CompressionFactory.h> |
7 | #include <Compression/CompressionInfo.h> |
8 | |
9 | namespace DB |
10 | { |
11 | |
12 | namespace ErrorCodes |
13 | { |
14 | extern const int UNKNOWN_COMPRESSION_METHOD; |
15 | extern const int UNKNOWN_ELEMENT_IN_CONFIG; |
16 | } |
17 | |
18 | |
19 | /** Allows you to select the compression settings for the conditions specified in the configuration file. |
20 | * The config looks like this |
21 | |
22 | <compression> |
23 | |
24 | <!-- Set of options. Options are checked in a row. The last worked option wins. If none has worked, then lz4 is used. --> |
25 | <case> |
26 | |
27 | <!-- Conditions. All must be satisfied simultaneously. Some conditions may not be specified. --> |
28 | <min_part_size>10000000000</min_part_size> <!-- The minimum size of a part in bytes. --> |
29 | <min_part_size_ratio>0.01</min_part_size_ratio> <!-- The minimum size of the part relative to all the data in the table. --> |
30 | |
31 | <!-- Which compression method to choose. --> |
32 | <method>zstd</method> |
33 | <level>2</level> |
34 | </case> |
35 | |
36 | <case> |
37 | ... |
38 | </case> |
39 | </compression> |
40 | */ |
41 | class CompressionCodecSelector |
42 | { |
43 | private: |
44 | struct Element |
45 | { |
46 | size_t min_part_size = 0; |
47 | double min_part_size_ratio = 0; |
48 | std::string family_name; |
49 | std::optional<int> level; |
50 | |
51 | |
52 | Element(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) |
53 | { |
54 | min_part_size = config.getUInt64(config_prefix + ".min_part_size" , 0); |
55 | min_part_size_ratio = config.getDouble(config_prefix + ".min_part_size_ratio" , 0); |
56 | |
57 | family_name = config.getString(config_prefix + ".method" , "lz4" ); |
58 | if (config.has(config_prefix + ".level" )) |
59 | level = config.getInt64(config_prefix + ".level" ); |
60 | } |
61 | |
62 | bool check(size_t part_size, double part_size_ratio) const |
63 | { |
64 | return part_size >= min_part_size |
65 | && part_size_ratio >= min_part_size_ratio; |
66 | } |
67 | }; |
68 | |
69 | std::vector<Element> elements; |
70 | |
71 | public: |
72 | CompressionCodecSelector() {} /// Always returns the default method. |
73 | |
74 | CompressionCodecSelector(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) |
75 | { |
76 | Poco::Util::AbstractConfiguration::Keys keys; |
77 | config.keys(config_prefix, keys); |
78 | |
79 | for (const auto & name : keys) |
80 | { |
81 | if (!startsWith(name.data(), "case" )) |
82 | throw Exception("Unknown element in config: " + config_prefix + "." + name + ", must be 'case'" , ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); |
83 | |
84 | elements.emplace_back(config, config_prefix + "." + name); |
85 | } |
86 | } |
87 | |
88 | CompressionCodecPtr choose(size_t part_size, double part_size_ratio) const |
89 | { |
90 | const auto & factory = CompressionCodecFactory::instance(); |
91 | CompressionCodecPtr res = factory.getDefaultCodec(); |
92 | |
93 | for (const auto & element : elements) |
94 | if (element.check(part_size, part_size_ratio)) |
95 | res = factory.get(element.family_name, element.level); |
96 | |
97 | return res; |
98 | } |
99 | }; |
100 | |
101 | } |
102 | |