| 1 | #include "catch.hpp" |
| 2 | #include "duckdb/common/file_system.hpp" |
| 3 | #include "duckdb/common/fstream_util.hpp" |
| 4 | #include "duckdb/common/gzip_stream.hpp" |
| 5 | #include "test_gzip_stream_header.hpp" |
| 6 | #include "test_helpers.hpp" |
| 7 | |
| 8 | using namespace duckdb; |
| 9 | using namespace std; |
| 10 | |
| 11 | unsigned char test_txt_gz[] = {0x1f, 0x8b, 0x08, 0x08, 0x9a, 0x57, 0xc8, 0x5c, 0x00, 0x03, 0x74, 0x65, 0x73, 0x74, |
| 12 | 0x2e, 0x74, 0x78, 0x74, 0x00, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0xd7, 0x51, 0x08, 0xcf, |
| 13 | 0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00, 0x90, 0x3a, 0xf6, 0x40, 0x0d, 0x00, 0x00, 0x00}; |
| 14 | |
| 15 | unsigned int test_txt_gz_len = 42; |
| 16 | |
| 17 | TEST_CASE("Test basic stream read from GZIP files" , "[gzip_stream]" ) { |
| 18 | string gzip_file_path = TestCreatePath("test.txt.gz" ); |
| 19 | |
| 20 | ofstream ofp(gzip_file_path, ios::out | ios::binary); |
| 21 | ofp.write((const char *)test_txt_gz, test_txt_gz_len); |
| 22 | ofp.close(); |
| 23 | |
| 24 | GzipStream gz(gzip_file_path); |
| 25 | std::string s(istreambuf_iterator<char>(gz), {}); |
| 26 | REQUIRE(s == "Hello, World\n" ); |
| 27 | |
| 28 | std::ofstream ofp2(gzip_file_path, ios::out | ios::binary); |
| 29 | ofp2.write((const char *)test_txt_gz, 5); // header too short |
| 30 | ofp2.close(); |
| 31 | |
| 32 | GzipStream gz2(gzip_file_path); |
| 33 | REQUIRE_THROWS(s = string(std::istreambuf_iterator<char>(gz2), {})); |
| 34 | |
| 35 | GzipStream gz3("XXX_THIS_DOES_NOT_EXIST" ); |
| 36 | REQUIRE_THROWS(s = string(std::istreambuf_iterator<char>(gz3), {})); |
| 37 | } |
| 38 | |
| 39 | TEST_CASE("Test COPY with GZIP files" , "[gzip_stream]" ) { |
| 40 | string gzip_file_path = TestCreatePath("lineitem1k.tbl.gz" ); |
| 41 | |
| 42 | ofstream ofp(gzip_file_path, ios::out | ios::binary); |
| 43 | ofp.write((const char *)lineitem_tbl_small_gz, lineitem_tbl_small_gz_len); |
| 44 | ofp.close(); |
| 45 | |
| 46 | unique_ptr<QueryResult> result; |
| 47 | DuckDB db(nullptr); |
| 48 | Connection con(db); |
| 49 | |
| 50 | REQUIRE_NO_FAIL(con.Query( |
| 51 | "CREATE TABLE lineitem(l_orderkey INT NOT NULL, l_partkey INT NOT NULL, l_suppkey INT NOT NULL, l_linenumber " |
| 52 | "INT NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) " |
| 53 | "NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR(1) NOT NULL, l_linestatus VARCHAR(1) NOT NULL, " |
| 54 | "l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR(25) " |
| 55 | "NOT NULL, l_shipmode VARCHAR(10) NOT NULL, l_comment VARCHAR(44) NOT NULL);" )); |
| 56 | result = con.Query("COPY lineitem FROM '" + gzip_file_path + "' DELIMITER '|'" ); |
| 57 | |
| 58 | REQUIRE(CHECK_COLUMN(result, 0, {1000})); |
| 59 | // stolen from test_copy.cpp |
| 60 | result = con.Query("SELECT l_partkey FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber" ); |
| 61 | REQUIRE(CHECK_COLUMN(result, 0, {155190, 67310, 63700, 2132, 24027, 15635})); |
| 62 | } |
| 63 | |