| 1 | #include <iostream> | 
|---|
| 2 | #include <optional> | 
|---|
| 3 | #include <boost/program_options.hpp> | 
|---|
| 4 | #include <boost/algorithm/string/join.hpp> | 
|---|
| 5 |  | 
|---|
| 6 | #include <Common/Exception.h> | 
|---|
| 7 | #include <IO/WriteBufferFromFileDescriptor.h> | 
|---|
| 8 | #include <IO/ReadBufferFromFileDescriptor.h> | 
|---|
| 9 | #include <Compression/CompressedWriteBuffer.h> | 
|---|
| 10 | #include <Compression/CompressedReadBuffer.h> | 
|---|
| 11 | #include <IO/WriteHelpers.h> | 
|---|
| 12 | #include <IO/copyData.h> | 
|---|
| 13 | #include <Parsers/parseQuery.h> | 
|---|
| 14 | #include <Parsers/ExpressionElementParsers.h> | 
|---|
| 15 | #include <Compression/CompressionFactory.h> | 
|---|
| 16 | #include <Common/TerminalSize.h> | 
|---|
| 17 |  | 
|---|
| 18 |  | 
|---|
| 19 | namespace DB | 
|---|
| 20 | { | 
|---|
| 21 | namespace ErrorCodes | 
|---|
| 22 | { | 
|---|
| 23 | extern const int TOO_LARGE_SIZE_COMPRESSED; | 
|---|
| 24 | extern const int BAD_ARGUMENTS; | 
|---|
| 25 | } | 
|---|
| 26 | } | 
|---|
| 27 |  | 
|---|
| 28 |  | 
|---|
| 29 | namespace | 
|---|
| 30 | { | 
|---|
| 31 |  | 
|---|
| 32 | /// Outputs sizes of uncompressed and compressed blocks for compressed file. | 
|---|
| 33 | void checkAndWriteHeader(DB::ReadBuffer & in, DB::WriteBuffer & out) | 
|---|
| 34 | { | 
|---|
| 35 | while (!in.eof()) | 
|---|
| 36 | { | 
|---|
| 37 | in.ignore(16);    /// checksum | 
|---|
| 38 |  | 
|---|
| 39 | char [COMPRESSED_BLOCK_HEADER_SIZE]; | 
|---|
| 40 | in.readStrict(header, COMPRESSED_BLOCK_HEADER_SIZE); | 
|---|
| 41 |  | 
|---|
| 42 | UInt32 size_compressed = unalignedLoad<UInt32>(&header[1]); | 
|---|
| 43 |  | 
|---|
| 44 | if (size_compressed > DBMS_MAX_COMPRESSED_SIZE) | 
|---|
| 45 | throw DB::Exception( "Too large size_compressed. Most likely corrupted data.", DB::ErrorCodes::TOO_LARGE_SIZE_COMPRESSED); | 
|---|
| 46 |  | 
|---|
| 47 | UInt32 size_decompressed = unalignedLoad<UInt32>(&header[5]); | 
|---|
| 48 |  | 
|---|
| 49 | DB::writeText(size_decompressed, out); | 
|---|
| 50 | DB::writeChar('\t', out); | 
|---|
| 51 | DB::writeText(size_compressed, out); | 
|---|
| 52 | DB::writeChar('\n', out); | 
|---|
| 53 |  | 
|---|
| 54 | in.ignore(size_compressed - COMPRESSED_BLOCK_HEADER_SIZE); | 
|---|
| 55 | } | 
|---|
| 56 | } | 
|---|
| 57 |  | 
|---|
| 58 | } | 
|---|
| 59 |  | 
|---|
| 60 | #pragma GCC diagnostic ignored "-Wunused-function" | 
|---|
| 61 | #pragma GCC diagnostic ignored "-Wmissing-declarations" | 
|---|
| 62 |  | 
|---|
| 63 | int mainEntryClickHouseCompressor(int argc, char ** argv) | 
|---|
| 64 | { | 
|---|
| 65 | boost::program_options::options_description desc = createOptionsDescription( "Allowed options", getTerminalWidth()); | 
|---|
| 66 | desc.add_options() | 
|---|
| 67 | ( "help,h", "produce help message") | 
|---|
| 68 | ( "decompress,d", "decompress") | 
|---|
| 69 | ( "block-size,b", boost::program_options::value<unsigned>()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size") | 
|---|
| 70 | ( "hc", "use LZ4HC instead of LZ4") | 
|---|
| 71 | ( "zstd", "use ZSTD instead of LZ4") | 
|---|
| 72 | ( "codec", boost::program_options::value<std::vector<std::string>>()->multitoken(), "use codecs combination instead of LZ4") | 
|---|
| 73 | ( "level", boost::program_options::value<int>(), "compression level for codecs spicified via flags") | 
|---|
| 74 | ( "none", "use no compression instead of LZ4") | 
|---|
| 75 | ( "stat", "print block statistics of compressed data") | 
|---|
| 76 | ; | 
|---|
| 77 |  | 
|---|
| 78 | boost::program_options::variables_map options; | 
|---|
| 79 | boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); | 
|---|
| 80 |  | 
|---|
| 81 | if (options.count( "help")) | 
|---|
| 82 | { | 
|---|
| 83 | std::cout << "Usage: "<< argv[0] << " [options] < in > out"<< std::endl; | 
|---|
| 84 | std::cout << desc << std::endl; | 
|---|
| 85 | return 1; | 
|---|
| 86 | } | 
|---|
| 87 |  | 
|---|
| 88 | try | 
|---|
| 89 | { | 
|---|
| 90 | bool decompress = options.count( "decompress"); | 
|---|
| 91 | bool use_lz4hc = options.count( "hc"); | 
|---|
| 92 | bool use_zstd = options.count( "zstd"); | 
|---|
| 93 | bool stat_mode = options.count( "stat"); | 
|---|
| 94 | bool use_none = options.count( "none"); | 
|---|
| 95 | unsigned block_size = options[ "block-size"].as<unsigned>(); | 
|---|
| 96 | std::vector<std::string> codecs; | 
|---|
| 97 | if (options.count( "codec")) | 
|---|
| 98 | codecs = options[ "codec"].as<std::vector<std::string>>(); | 
|---|
| 99 |  | 
|---|
| 100 | if ((use_lz4hc || use_zstd || use_none) && !codecs.empty()) | 
|---|
| 101 | throw DB::Exception( "Wrong options, codec flags like --zstd and --codec options are mutually exclusive", DB::ErrorCodes::BAD_ARGUMENTS); | 
|---|
| 102 |  | 
|---|
| 103 | if (!codecs.empty() && options.count( "level")) | 
|---|
| 104 | throw DB::Exception( "Wrong options, --level is not compatible with --codec list", DB::ErrorCodes::BAD_ARGUMENTS); | 
|---|
| 105 |  | 
|---|
| 106 | std::string method_family = "LZ4"; | 
|---|
| 107 |  | 
|---|
| 108 | if (use_lz4hc) | 
|---|
| 109 | method_family = "LZ4HC"; | 
|---|
| 110 | else if (use_zstd) | 
|---|
| 111 | method_family = "ZSTD"; | 
|---|
| 112 | else if (use_none) | 
|---|
| 113 | method_family = "NONE"; | 
|---|
| 114 |  | 
|---|
| 115 | std::optional<int> level = std::nullopt; | 
|---|
| 116 | if (options.count( "level")) | 
|---|
| 117 | level = options[ "level"].as<int>(); | 
|---|
| 118 |  | 
|---|
| 119 |  | 
|---|
| 120 | DB::CompressionCodecPtr codec; | 
|---|
| 121 | if (!codecs.empty()) | 
|---|
| 122 | { | 
|---|
| 123 | DB::ParserCodec codec_parser; | 
|---|
| 124 |  | 
|---|
| 125 | std::string codecs_line = boost::algorithm::join(codecs, ","); | 
|---|
| 126 | auto ast = DB::parseQuery(codec_parser, "("+ codecs_line + ")", 0); | 
|---|
| 127 | codec = DB::CompressionCodecFactory::instance().get(ast, nullptr); | 
|---|
| 128 | } | 
|---|
| 129 | else | 
|---|
| 130 | codec = DB::CompressionCodecFactory::instance().get(method_family, level); | 
|---|
| 131 |  | 
|---|
| 132 |  | 
|---|
| 133 | DB::ReadBufferFromFileDescriptor rb(STDIN_FILENO); | 
|---|
| 134 | DB::WriteBufferFromFileDescriptor wb(STDOUT_FILENO); | 
|---|
| 135 |  | 
|---|
| 136 | if (stat_mode) | 
|---|
| 137 | { | 
|---|
| 138 | /// Output statistic for compressed file. | 
|---|
| 139 | checkAndWriteHeader(rb, wb); | 
|---|
| 140 | } | 
|---|
| 141 | else if (decompress) | 
|---|
| 142 | { | 
|---|
| 143 | /// Decompression | 
|---|
| 144 | DB::CompressedReadBuffer from(rb); | 
|---|
| 145 | DB::copyData(from, wb); | 
|---|
| 146 | } | 
|---|
| 147 | else | 
|---|
| 148 | { | 
|---|
| 149 | /// Compression | 
|---|
| 150 | DB::CompressedWriteBuffer to(wb, codec, block_size); | 
|---|
| 151 | DB::copyData(rb, to); | 
|---|
| 152 | } | 
|---|
| 153 | } | 
|---|
| 154 | catch (...) | 
|---|
| 155 | { | 
|---|
| 156 | std::cerr << DB::getCurrentExceptionMessage(true); | 
|---|
| 157 | return DB::getCurrentExceptionCode(); | 
|---|
| 158 | } | 
|---|
| 159 |  | 
|---|
| 160 | return 0; | 
|---|
| 161 | } | 
|---|
| 162 |  | 
|---|