1#include "catch.hpp"
2#include "duckdb/common/file_system.hpp"
3#include "duckdb/common/fstream_util.hpp"
4#include "duckdb/common/gzip_stream.hpp"
5#include "test_gzip_stream_header.hpp"
6#include "test_helpers.hpp"
7
8using namespace duckdb;
9using namespace std;
10
11unsigned char test_txt_gz[] = {0x1f, 0x8b, 0x08, 0x08, 0x9a, 0x57, 0xc8, 0x5c, 0x00, 0x03, 0x74, 0x65, 0x73, 0x74,
12 0x2e, 0x74, 0x78, 0x74, 0x00, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0xd7, 0x51, 0x08, 0xcf,
13 0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00, 0x90, 0x3a, 0xf6, 0x40, 0x0d, 0x00, 0x00, 0x00};
14
15unsigned int test_txt_gz_len = 42;
16
17TEST_CASE("Test basic stream read from GZIP files", "[gzip_stream]") {
18 string gzip_file_path = TestCreatePath("test.txt.gz");
19
20 ofstream ofp(gzip_file_path, ios::out | ios::binary);
21 ofp.write((const char *)test_txt_gz, test_txt_gz_len);
22 ofp.close();
23
24 GzipStream gz(gzip_file_path);
25 std::string s(istreambuf_iterator<char>(gz), {});
26 REQUIRE(s == "Hello, World\n");
27
28 std::ofstream ofp2(gzip_file_path, ios::out | ios::binary);
29 ofp2.write((const char *)test_txt_gz, 5); // header too short
30 ofp2.close();
31
32 GzipStream gz2(gzip_file_path);
33 REQUIRE_THROWS(s = string(std::istreambuf_iterator<char>(gz2), {}));
34
35 GzipStream gz3("XXX_THIS_DOES_NOT_EXIST");
36 REQUIRE_THROWS(s = string(std::istreambuf_iterator<char>(gz3), {}));
37}
38
39TEST_CASE("Test COPY with GZIP files", "[gzip_stream]") {
40 string gzip_file_path = TestCreatePath("lineitem1k.tbl.gz");
41
42 ofstream ofp(gzip_file_path, ios::out | ios::binary);
43 ofp.write((const char *)lineitem_tbl_small_gz, lineitem_tbl_small_gz_len);
44 ofp.close();
45
46 unique_ptr<QueryResult> result;
47 DuckDB db(nullptr);
48 Connection con(db);
49
50 REQUIRE_NO_FAIL(con.Query(
51 "CREATE TABLE lineitem(l_orderkey INT NOT NULL, l_partkey INT NOT NULL, l_suppkey INT NOT NULL, l_linenumber "
52 "INT NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) "
53 "NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR(1) NOT NULL, l_linestatus VARCHAR(1) NOT NULL, "
54 "l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR(25) "
55 "NOT NULL, l_shipmode VARCHAR(10) NOT NULL, l_comment VARCHAR(44) NOT NULL);"));
56 result = con.Query("COPY lineitem FROM '" + gzip_file_path + "' DELIMITER '|'");
57
58 REQUIRE(CHECK_COLUMN(result, 0, {1000}));
59 // stolen from test_copy.cpp
60 result = con.Query("SELECT l_partkey FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber");
61 REQUIRE(CHECK_COLUMN(result, 0, {155190, 67310, 63700, 2132, 24027, 15635}));
62}
63