1 | #include "catch.hpp" |
2 | #include "duckdb/common/file_system.hpp" |
3 | #include "duckdb/common/fstream_util.hpp" |
4 | #include "duckdb/common/gzip_stream.hpp" |
5 | #include "test_gzip_stream_header.hpp" |
6 | #include "test_helpers.hpp" |
7 | |
8 | using namespace duckdb; |
9 | using namespace std; |
10 | |
11 | unsigned char test_txt_gz[] = {0x1f, 0x8b, 0x08, 0x08, 0x9a, 0x57, 0xc8, 0x5c, 0x00, 0x03, 0x74, 0x65, 0x73, 0x74, |
12 | 0x2e, 0x74, 0x78, 0x74, 0x00, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0xd7, 0x51, 0x08, 0xcf, |
13 | 0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00, 0x90, 0x3a, 0xf6, 0x40, 0x0d, 0x00, 0x00, 0x00}; |
14 | |
15 | unsigned int test_txt_gz_len = 42; |
16 | |
17 | TEST_CASE("Test basic stream read from GZIP files" , "[gzip_stream]" ) { |
18 | string gzip_file_path = TestCreatePath("test.txt.gz" ); |
19 | |
20 | ofstream ofp(gzip_file_path, ios::out | ios::binary); |
21 | ofp.write((const char *)test_txt_gz, test_txt_gz_len); |
22 | ofp.close(); |
23 | |
24 | GzipStream gz(gzip_file_path); |
25 | std::string s(istreambuf_iterator<char>(gz), {}); |
26 | REQUIRE(s == "Hello, World\n" ); |
27 | |
28 | std::ofstream ofp2(gzip_file_path, ios::out | ios::binary); |
29 | ofp2.write((const char *)test_txt_gz, 5); // header too short |
30 | ofp2.close(); |
31 | |
32 | GzipStream gz2(gzip_file_path); |
33 | REQUIRE_THROWS(s = string(std::istreambuf_iterator<char>(gz2), {})); |
34 | |
35 | GzipStream gz3("XXX_THIS_DOES_NOT_EXIST" ); |
36 | REQUIRE_THROWS(s = string(std::istreambuf_iterator<char>(gz3), {})); |
37 | } |
38 | |
39 | TEST_CASE("Test COPY with GZIP files" , "[gzip_stream]" ) { |
40 | string gzip_file_path = TestCreatePath("lineitem1k.tbl.gz" ); |
41 | |
42 | ofstream ofp(gzip_file_path, ios::out | ios::binary); |
43 | ofp.write((const char *)lineitem_tbl_small_gz, lineitem_tbl_small_gz_len); |
44 | ofp.close(); |
45 | |
46 | unique_ptr<QueryResult> result; |
47 | DuckDB db(nullptr); |
48 | Connection con(db); |
49 | |
50 | REQUIRE_NO_FAIL(con.Query( |
51 | "CREATE TABLE lineitem(l_orderkey INT NOT NULL, l_partkey INT NOT NULL, l_suppkey INT NOT NULL, l_linenumber " |
52 | "INT NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) " |
53 | "NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR(1) NOT NULL, l_linestatus VARCHAR(1) NOT NULL, " |
54 | "l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR(25) " |
55 | "NOT NULL, l_shipmode VARCHAR(10) NOT NULL, l_comment VARCHAR(44) NOT NULL);" )); |
56 | result = con.Query("COPY lineitem FROM '" + gzip_file_path + "' DELIMITER '|'" ); |
57 | |
58 | REQUIRE(CHECK_COLUMN(result, 0, {1000})); |
59 | // stolen from test_copy.cpp |
60 | result = con.Query("SELECT l_partkey FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber" ); |
61 | REQUIRE(CHECK_COLUMN(result, 0, {155190, 67310, 63700, 2132, 24027, 15635})); |
62 | } |
63 | |