1#include "duckdb_miniparquet.hpp"
2#include "catch.hpp"
3#include "test_helpers.hpp"
4#include "duckdb/common/types/timestamp.hpp"
5
6using namespace duckdb;
7using namespace std;
8
9TEST_CASE("Test basic parquet reading", "[parquet]") {
10 DuckDB db(nullptr);
11 Parquet::Init(db);
12
13 Connection con(db);
14 con.EnableQueryVerification();
15
16 SECTION("Exception on missing file") {
17 REQUIRE_THROWS(con.Query("SELECT * FROM parquet_scan('does_not_exist')"));
18 }
19
20 SECTION("alltypes_plain.parquet") {
21 auto result = con.Query("SELECT * FROM parquet_scan('third_party/miniparquet/test/alltypes_plain.parquet')");
22 REQUIRE(CHECK_COLUMN(result, 0, {4, 5, 6, 7, 2, 3, 0, 1}));
23 REQUIRE(CHECK_COLUMN(result, 1, {true, false, true, false, true, false, true, false}));
24 REQUIRE(CHECK_COLUMN(result, 2, {0, 1, 0, 1, 0, 1, 0, 1}));
25 REQUIRE(CHECK_COLUMN(result, 3, {0, 1, 0, 1, 0, 1, 0, 1}));
26 REQUIRE(CHECK_COLUMN(result, 4, {0, 1, 0, 1, 0, 1, 0, 1}));
27 REQUIRE(CHECK_COLUMN(result, 5, {0, 10, 0, 10, 0, 10, 0, 10}));
28 REQUIRE(CHECK_COLUMN(result, 6, {0.0, 1.1, 0.0, 1.1, 0.0, 1.1, 0.0, 1.1}));
29 REQUIRE(CHECK_COLUMN(result, 7, {0.0, 10.1, 0.0, 10.1, 0.0, 10.1, 0.0, 10.1}));
30 REQUIRE(CHECK_COLUMN(
31 result, 8,
32 {"03/01/09", "03/01/09", "04/01/09", "04/01/09", "02/01/09", "02/01/09", "01/01/09", "01/01/09"}));
33 REQUIRE(CHECK_COLUMN(result, 9, {"0", "1", "0", "1", "0", "1", "0", "1"}));
34
35 REQUIRE(CHECK_COLUMN(result, 10,
36 {Value::BIGINT(Timestamp::FromString("2009-03-01 00:00:00")),
37 Value::BIGINT(Timestamp::FromString("2009-03-01 00:01:00")),
38 Value::BIGINT(Timestamp::FromString("2009-04-01 00:00:00")),
39 Value::BIGINT(Timestamp::FromString("2009-04-01 00:01:00")),
40 Value::BIGINT(Timestamp::FromString("2009-02-01 00:00:00")),
41 Value::BIGINT(Timestamp::FromString("2009-02-01 00:01:00")),
42 Value::BIGINT(Timestamp::FromString("2009-01-01 00:00:00")),
43 Value::BIGINT(Timestamp::FromString("2009-01-01 00:01:00"))}));
44 }
45
46 SECTION("alltypes_plain.snappy.parquet") {
47 auto result =
48 con.Query("SELECT * FROM parquet_scan('third_party/miniparquet/test/alltypes_plain.snappy.parquet')");
49 REQUIRE(CHECK_COLUMN(result, 0, {6, 7}));
50 REQUIRE(CHECK_COLUMN(result, 1, {true, false}));
51 REQUIRE(CHECK_COLUMN(result, 2, {0, 1}));
52 REQUIRE(CHECK_COLUMN(result, 3, {0, 1}));
53 REQUIRE(CHECK_COLUMN(result, 4, {0, 1}));
54 REQUIRE(CHECK_COLUMN(result, 5, {0, 10}));
55 REQUIRE(CHECK_COLUMN(result, 6, {0.0, 1.1}));
56 REQUIRE(CHECK_COLUMN(result, 7, {0.0, 10.1}));
57 REQUIRE(CHECK_COLUMN(result, 8, {"04/01/09", "04/01/09"}));
58 REQUIRE(CHECK_COLUMN(result, 9, {"0", "1"}));
59 REQUIRE(CHECK_COLUMN(result, 10,
60 {Value::BIGINT(Timestamp::FromString("2009-04-01 00:00:00")),
61 Value::BIGINT(Timestamp::FromString("2009-04-01 00:01:00"))}));
62 }
63
64 SECTION("alltypes_dictionary.parquet") {
65 auto result =
66 con.Query("SELECT * FROM parquet_scan('third_party/miniparquet/test/alltypes_dictionary.parquet')");
67
68 REQUIRE(CHECK_COLUMN(result, 0, {0, 1}));
69 REQUIRE(CHECK_COLUMN(result, 1, {true, false}));
70 REQUIRE(CHECK_COLUMN(result, 2, {0, 1}));
71 REQUIRE(CHECK_COLUMN(result, 3, {0, 1}));
72 REQUIRE(CHECK_COLUMN(result, 4, {0, 1}));
73 REQUIRE(CHECK_COLUMN(result, 5, {0, 10}));
74 REQUIRE(CHECK_COLUMN(result, 6, {0.0, 1.1}));
75 REQUIRE(CHECK_COLUMN(result, 7, {0.0, 10.1}));
76 REQUIRE(CHECK_COLUMN(result, 8, {"01/01/09", "01/01/09"}));
77 REQUIRE(CHECK_COLUMN(result, 9, {"0", "1"}));
78 REQUIRE(CHECK_COLUMN(result, 10,
79 {Value::BIGINT(Timestamp::FromString("2009-01-01 00:00:00")),
80 Value::BIGINT(Timestamp::FromString("2009-01-01 00:01:00"))}));
81 }
82
83 SECTION("userdata1.parquet") {
84 auto result = con.Query("SELECT count(*) FROM parquet_scan('third_party/miniparquet/test/userdata1.parquet')");
85 REQUIRE(CHECK_COLUMN(result, 0, {1000}));
86 }
87}
88