| 1 | #include <IO/ReadHelpers.h> |
| 2 | #include <Common/Exception.h> |
| 3 | #include <Common/StringUtils/StringUtils.h> |
| 4 | #include <Poco/Util/AbstractConfiguration.h> |
| 5 | #include <Compression/ICompressionCodec.h> |
| 6 | #include <Compression/CompressionFactory.h> |
| 7 | #include <Compression/CompressionInfo.h> |
| 8 | |
| 9 | namespace DB |
| 10 | { |
| 11 | |
| 12 | namespace ErrorCodes |
| 13 | { |
| 14 | extern const int UNKNOWN_COMPRESSION_METHOD; |
| 15 | extern const int UNKNOWN_ELEMENT_IN_CONFIG; |
| 16 | } |
| 17 | |
| 18 | |
| 19 | /** Allows you to select the compression settings for the conditions specified in the configuration file. |
| 20 | * The config looks like this |
| 21 | |
| 22 | <compression> |
| 23 | |
| 24 | <!-- Set of options. Options are checked in a row. The last worked option wins. If none has worked, then lz4 is used. --> |
| 25 | <case> |
| 26 | |
| 27 | <!-- Conditions. All must be satisfied simultaneously. Some conditions may not be specified. --> |
| 28 | <min_part_size>10000000000</min_part_size> <!-- The minimum size of a part in bytes. --> |
| 29 | <min_part_size_ratio>0.01</min_part_size_ratio> <!-- The minimum size of the part relative to all the data in the table. --> |
| 30 | |
| 31 | <!-- Which compression method to choose. --> |
| 32 | <method>zstd</method> |
| 33 | <level>2</level> |
| 34 | </case> |
| 35 | |
| 36 | <case> |
| 37 | ... |
| 38 | </case> |
| 39 | </compression> |
| 40 | */ |
| 41 | class CompressionCodecSelector |
| 42 | { |
| 43 | private: |
| 44 | struct Element |
| 45 | { |
| 46 | size_t min_part_size = 0; |
| 47 | double min_part_size_ratio = 0; |
| 48 | std::string family_name; |
| 49 | std::optional<int> level; |
| 50 | |
| 51 | |
| 52 | Element(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) |
| 53 | { |
| 54 | min_part_size = config.getUInt64(config_prefix + ".min_part_size" , 0); |
| 55 | min_part_size_ratio = config.getDouble(config_prefix + ".min_part_size_ratio" , 0); |
| 56 | |
| 57 | family_name = config.getString(config_prefix + ".method" , "lz4" ); |
| 58 | if (config.has(config_prefix + ".level" )) |
| 59 | level = config.getInt64(config_prefix + ".level" ); |
| 60 | } |
| 61 | |
| 62 | bool check(size_t part_size, double part_size_ratio) const |
| 63 | { |
| 64 | return part_size >= min_part_size |
| 65 | && part_size_ratio >= min_part_size_ratio; |
| 66 | } |
| 67 | }; |
| 68 | |
| 69 | std::vector<Element> elements; |
| 70 | |
| 71 | public: |
| 72 | CompressionCodecSelector() {} /// Always returns the default method. |
| 73 | |
| 74 | CompressionCodecSelector(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) |
| 75 | { |
| 76 | Poco::Util::AbstractConfiguration::Keys keys; |
| 77 | config.keys(config_prefix, keys); |
| 78 | |
| 79 | for (const auto & name : keys) |
| 80 | { |
| 81 | if (!startsWith(name.data(), "case" )) |
| 82 | throw Exception("Unknown element in config: " + config_prefix + "." + name + ", must be 'case'" , ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); |
| 83 | |
| 84 | elements.emplace_back(config, config_prefix + "." + name); |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | CompressionCodecPtr choose(size_t part_size, double part_size_ratio) const |
| 89 | { |
| 90 | const auto & factory = CompressionCodecFactory::instance(); |
| 91 | CompressionCodecPtr res = factory.getDefaultCodec(); |
| 92 | |
| 93 | for (const auto & element : elements) |
| 94 | if (element.check(part_size, part_size_ratio)) |
| 95 | res = factory.get(element.family_name, element.level); |
| 96 | |
| 97 | return res; |
| 98 | } |
| 99 | }; |
| 100 | |
| 101 | } |
| 102 | |