| 1 | #include <IO/ReadHelpers.h> | 
|---|
| 2 | #include <Common/Exception.h> | 
|---|
| 3 | #include <Common/StringUtils/StringUtils.h> | 
|---|
| 4 | #include <Poco/Util/AbstractConfiguration.h> | 
|---|
| 5 | #include <Compression/ICompressionCodec.h> | 
|---|
| 6 | #include <Compression/CompressionFactory.h> | 
|---|
| 7 | #include <Compression/CompressionInfo.h> | 
|---|
| 8 |  | 
|---|
| 9 | namespace DB | 
|---|
| 10 | { | 
|---|
| 11 |  | 
|---|
| 12 | namespace ErrorCodes | 
|---|
| 13 | { | 
|---|
| 14 | extern const int UNKNOWN_COMPRESSION_METHOD; | 
|---|
| 15 | extern const int UNKNOWN_ELEMENT_IN_CONFIG; | 
|---|
| 16 | } | 
|---|
| 17 |  | 
|---|
| 18 |  | 
|---|
| 19 | /** Allows you to select the compression settings for the conditions specified in the configuration file. | 
|---|
| 20 | * The config looks like this | 
|---|
| 21 |  | 
|---|
| 22 | <compression> | 
|---|
| 23 |  | 
|---|
| 24 | <!-- Set of options. Options are checked in a row. The last worked option wins. If none has worked, then lz4 is used. --> | 
|---|
| 25 | <case> | 
|---|
| 26 |  | 
|---|
| 27 | <!-- Conditions. All must be satisfied simultaneously. Some conditions may not be specified. --> | 
|---|
| 28 | <min_part_size>10000000000</min_part_size>         <!-- The minimum size of a part in bytes. --> | 
|---|
| 29 | <min_part_size_ratio>0.01</min_part_size_ratio>    <!-- The minimum size of the part relative to all the data in the table. --> | 
|---|
| 30 |  | 
|---|
| 31 | <!-- Which compression method to choose. --> | 
|---|
| 32 | <method>zstd</method> | 
|---|
| 33 | <level>2</level> | 
|---|
| 34 | </case> | 
|---|
| 35 |  | 
|---|
| 36 | <case> | 
|---|
| 37 | ... | 
|---|
| 38 | </case> | 
|---|
| 39 | </compression> | 
|---|
| 40 | */ | 
|---|
| 41 | class CompressionCodecSelector | 
|---|
| 42 | { | 
|---|
| 43 | private: | 
|---|
| 44 | struct Element | 
|---|
| 45 | { | 
|---|
| 46 | size_t min_part_size = 0; | 
|---|
| 47 | double min_part_size_ratio = 0; | 
|---|
| 48 | std::string family_name; | 
|---|
| 49 | std::optional<int> level; | 
|---|
| 50 |  | 
|---|
| 51 |  | 
|---|
| 52 | Element(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) | 
|---|
| 53 | { | 
|---|
| 54 | min_part_size = config.getUInt64(config_prefix + ".min_part_size", 0); | 
|---|
| 55 | min_part_size_ratio = config.getDouble(config_prefix + ".min_part_size_ratio", 0); | 
|---|
| 56 |  | 
|---|
| 57 | family_name = config.getString(config_prefix + ".method", "lz4"); | 
|---|
| 58 | if (config.has(config_prefix + ".level")) | 
|---|
| 59 | level = config.getInt64(config_prefix + ".level"); | 
|---|
| 60 | } | 
|---|
| 61 |  | 
|---|
| 62 | bool check(size_t part_size, double part_size_ratio) const | 
|---|
| 63 | { | 
|---|
| 64 | return part_size >= min_part_size | 
|---|
| 65 | && part_size_ratio >= min_part_size_ratio; | 
|---|
| 66 | } | 
|---|
| 67 | }; | 
|---|
| 68 |  | 
|---|
| 69 | std::vector<Element> elements; | 
|---|
| 70 |  | 
|---|
| 71 | public: | 
|---|
| 72 | CompressionCodecSelector() {}    /// Always returns the default method. | 
|---|
| 73 |  | 
|---|
| 74 | CompressionCodecSelector(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) | 
|---|
| 75 | { | 
|---|
| 76 | Poco::Util::AbstractConfiguration::Keys keys; | 
|---|
| 77 | config.keys(config_prefix, keys); | 
|---|
| 78 |  | 
|---|
| 79 | for (const auto & name : keys) | 
|---|
| 80 | { | 
|---|
| 81 | if (!startsWith(name.data(), "case")) | 
|---|
| 82 | throw Exception( "Unknown element in config: "+ config_prefix + "."+ name + ", must be 'case'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); | 
|---|
| 83 |  | 
|---|
| 84 | elements.emplace_back(config, config_prefix + "."+ name); | 
|---|
| 85 | } | 
|---|
| 86 | } | 
|---|
| 87 |  | 
|---|
| 88 | CompressionCodecPtr choose(size_t part_size, double part_size_ratio) const | 
|---|
| 89 | { | 
|---|
| 90 | const auto & factory = CompressionCodecFactory::instance(); | 
|---|
| 91 | CompressionCodecPtr res = factory.getDefaultCodec(); | 
|---|
| 92 |  | 
|---|
| 93 | for (const auto & element : elements) | 
|---|
| 94 | if (element.check(part_size, part_size_ratio)) | 
|---|
| 95 | res = factory.get(element.family_name, element.level); | 
|---|
| 96 |  | 
|---|
| 97 | return res; | 
|---|
| 98 | } | 
|---|
| 99 | }; | 
|---|
| 100 |  | 
|---|
| 101 | } | 
|---|
| 102 |  | 
|---|