| 1 | #include "duckdb_miniparquet.hpp" | 
| 2 | #include "catch.hpp" | 
| 3 | #include "test_helpers.hpp" | 
| 4 | #include "duckdb/common/types/timestamp.hpp" | 
| 5 |  | 
| 6 | using namespace duckdb; | 
| 7 | using namespace std; | 
| 8 |  | 
| 9 | TEST_CASE("Test basic parquet reading" , "[parquet]" ) { | 
| 10 | 	DuckDB db(nullptr); | 
| 11 | 	Parquet::Init(db); | 
| 12 |  | 
| 13 | 	Connection con(db); | 
| 14 | 	con.EnableQueryVerification(); | 
| 15 |  | 
| 16 | 	SECTION("Exception on missing file" ) { | 
| 17 | 		REQUIRE_THROWS(con.Query("SELECT * FROM parquet_scan('does_not_exist')" )); | 
| 18 | 	} | 
| 19 |  | 
| 20 | 	SECTION("alltypes_plain.parquet" ) { | 
| 21 | 		auto result = con.Query("SELECT * FROM parquet_scan('third_party/miniparquet/test/alltypes_plain.parquet')" ); | 
| 22 | 		REQUIRE(CHECK_COLUMN(result, 0, {4, 5, 6, 7, 2, 3, 0, 1})); | 
| 23 | 		REQUIRE(CHECK_COLUMN(result, 1, {true, false, true, false, true, false, true, false})); | 
| 24 | 		REQUIRE(CHECK_COLUMN(result, 2, {0, 1, 0, 1, 0, 1, 0, 1})); | 
| 25 | 		REQUIRE(CHECK_COLUMN(result, 3, {0, 1, 0, 1, 0, 1, 0, 1})); | 
| 26 | 		REQUIRE(CHECK_COLUMN(result, 4, {0, 1, 0, 1, 0, 1, 0, 1})); | 
| 27 | 		REQUIRE(CHECK_COLUMN(result, 5, {0, 10, 0, 10, 0, 10, 0, 10})); | 
| 28 | 		REQUIRE(CHECK_COLUMN(result, 6, {0.0, 1.1, 0.0, 1.1, 0.0, 1.1, 0.0, 1.1})); | 
| 29 | 		REQUIRE(CHECK_COLUMN(result, 7, {0.0, 10.1, 0.0, 10.1, 0.0, 10.1, 0.0, 10.1})); | 
| 30 | 		REQUIRE(CHECK_COLUMN( | 
| 31 | 		    result, 8, | 
| 32 | 		    {"03/01/09" , "03/01/09" , "04/01/09" , "04/01/09" , "02/01/09" , "02/01/09" , "01/01/09" , "01/01/09" })); | 
| 33 | 		REQUIRE(CHECK_COLUMN(result, 9, {"0" , "1" , "0" , "1" , "0" , "1" , "0" , "1" })); | 
| 34 |  | 
| 35 | 		REQUIRE(CHECK_COLUMN(result, 10, | 
| 36 | 		                     {Value::BIGINT(Timestamp::FromString("2009-03-01 00:00:00" )), | 
| 37 | 		                      Value::BIGINT(Timestamp::FromString("2009-03-01 00:01:00" )), | 
| 38 | 		                      Value::BIGINT(Timestamp::FromString("2009-04-01 00:00:00" )), | 
| 39 | 		                      Value::BIGINT(Timestamp::FromString("2009-04-01 00:01:00" )), | 
| 40 | 		                      Value::BIGINT(Timestamp::FromString("2009-02-01 00:00:00" )), | 
| 41 | 		                      Value::BIGINT(Timestamp::FromString("2009-02-01 00:01:00" )), | 
| 42 | 		                      Value::BIGINT(Timestamp::FromString("2009-01-01 00:00:00" )), | 
| 43 | 		                      Value::BIGINT(Timestamp::FromString("2009-01-01 00:01:00" ))})); | 
| 44 | 	} | 
| 45 |  | 
| 46 | 	SECTION("alltypes_plain.snappy.parquet" ) { | 
| 47 | 		auto result = | 
| 48 | 		    con.Query("SELECT * FROM parquet_scan('third_party/miniparquet/test/alltypes_plain.snappy.parquet')" ); | 
| 49 | 		REQUIRE(CHECK_COLUMN(result, 0, {6, 7})); | 
| 50 | 		REQUIRE(CHECK_COLUMN(result, 1, {true, false})); | 
| 51 | 		REQUIRE(CHECK_COLUMN(result, 2, {0, 1})); | 
| 52 | 		REQUIRE(CHECK_COLUMN(result, 3, {0, 1})); | 
| 53 | 		REQUIRE(CHECK_COLUMN(result, 4, {0, 1})); | 
| 54 | 		REQUIRE(CHECK_COLUMN(result, 5, {0, 10})); | 
| 55 | 		REQUIRE(CHECK_COLUMN(result, 6, {0.0, 1.1})); | 
| 56 | 		REQUIRE(CHECK_COLUMN(result, 7, {0.0, 10.1})); | 
| 57 | 		REQUIRE(CHECK_COLUMN(result, 8, {"04/01/09" , "04/01/09" })); | 
| 58 | 		REQUIRE(CHECK_COLUMN(result, 9, {"0" , "1" })); | 
| 59 | 		REQUIRE(CHECK_COLUMN(result, 10, | 
| 60 | 		                     {Value::BIGINT(Timestamp::FromString("2009-04-01 00:00:00" )), | 
| 61 | 		                      Value::BIGINT(Timestamp::FromString("2009-04-01 00:01:00" ))})); | 
| 62 | 	} | 
| 63 |  | 
| 64 | 	SECTION("alltypes_dictionary.parquet" ) { | 
| 65 | 		auto result = | 
| 66 | 		    con.Query("SELECT * FROM parquet_scan('third_party/miniparquet/test/alltypes_dictionary.parquet')" ); | 
| 67 |  | 
| 68 | 		REQUIRE(CHECK_COLUMN(result, 0, {0, 1})); | 
| 69 | 		REQUIRE(CHECK_COLUMN(result, 1, {true, false})); | 
| 70 | 		REQUIRE(CHECK_COLUMN(result, 2, {0, 1})); | 
| 71 | 		REQUIRE(CHECK_COLUMN(result, 3, {0, 1})); | 
| 72 | 		REQUIRE(CHECK_COLUMN(result, 4, {0, 1})); | 
| 73 | 		REQUIRE(CHECK_COLUMN(result, 5, {0, 10})); | 
| 74 | 		REQUIRE(CHECK_COLUMN(result, 6, {0.0, 1.1})); | 
| 75 | 		REQUIRE(CHECK_COLUMN(result, 7, {0.0, 10.1})); | 
| 76 | 		REQUIRE(CHECK_COLUMN(result, 8, {"01/01/09" , "01/01/09" })); | 
| 77 | 		REQUIRE(CHECK_COLUMN(result, 9, {"0" , "1" })); | 
| 78 | 		REQUIRE(CHECK_COLUMN(result, 10, | 
| 79 | 		                     {Value::BIGINT(Timestamp::FromString("2009-01-01 00:00:00" )), | 
| 80 | 		                      Value::BIGINT(Timestamp::FromString("2009-01-01 00:01:00" ))})); | 
| 81 | 	} | 
| 82 |  | 
| 83 | 	SECTION("userdata1.parquet" ) { | 
| 84 | 		auto result = con.Query("SELECT count(*) FROM parquet_scan('third_party/miniparquet/test/userdata1.parquet')" ); | 
| 85 | 		REQUIRE(CHECK_COLUMN(result, 0, {1000})); | 
| 86 | 	} | 
| 87 | } | 
| 88 |  |