| 1 | #include "catch.hpp" |
| 2 | #include "duckdb/common/file_system.hpp" |
| 3 | #include "duckdb/common/types/date.hpp" |
| 4 | #include "test_csv_header.hpp" |
| 5 | #include "test_helpers.hpp" |
| 6 | |
| 7 | #include <fstream> |
| 8 | |
| 9 | using namespace duckdb; |
| 10 | using namespace std; |
| 11 | |
| 12 | #if STANDARD_VECTOR_SIZE >= 16 |
| 13 | |
| 14 | TEST_CASE("Test copy into auto from lineitem csv" , "[copy]" ) { |
| 15 | FileSystem fs; |
| 16 | unique_ptr<QueryResult> result; |
| 17 | DuckDB db(nullptr); |
| 18 | Connection con(db); |
| 19 | |
| 20 | auto csv_path = GetCSVPath(); |
| 21 | auto lineitem_csv = fs.JoinPath(csv_path, "lineitem.csv" ); |
| 22 | WriteBinary(lineitem_csv, lineitem_sample, sizeof(lineitem_sample)); |
| 23 | |
| 24 | REQUIRE_NO_FAIL(con.Query( |
| 25 | "CREATE TABLE lineitem(l_orderkey INT NOT NULL, l_partkey INT NOT NULL, l_suppkey INT NOT NULL, l_linenumber " |
| 26 | "INT NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) " |
| 27 | "NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR(1) NOT NULL, l_linestatus VARCHAR(1) NOT NULL, " |
| 28 | "l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR(25) " |
| 29 | "NOT NULL, l_shipmode VARCHAR(10) NOT NULL, l_comment VARCHAR(44) NOT NULL);" )); |
| 30 | result = con.Query("COPY lineitem FROM '" + lineitem_csv + "' (FORMAT CSV_AUTO);" ); |
| 31 | |
| 32 | result = con.Query("SELECT COUNT(*) FROM lineitem;" ); |
| 33 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
| 34 | |
| 35 | result = con.Query("SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;" ); |
| 36 | REQUIRE(CHECK_COLUMN(result, 0, {15519, 6731, 6370, 214, 2403, 1564})); |
| 37 | REQUIRE( |
| 38 | CHECK_COLUMN(result, 1, |
| 39 | {"egular courts above the" , "ly final dependencies: slyly bold " , "riously. regular, express dep" , |
| 40 | "lites. fluffily even de" , " pending foxes. slyly re" , "arefully slyly ex" })); |
| 41 | } |
| 42 | |
| 43 | TEST_CASE("Test read_csv_auto from on-time dataset" , "[copy]" ) { |
| 44 | FileSystem fs; |
| 45 | unique_ptr<QueryResult> result; |
| 46 | DuckDB db(nullptr); |
| 47 | Connection con(db); |
| 48 | |
| 49 | auto csv_path = GetCSVPath(); |
| 50 | auto ontime_csv = fs.JoinPath(csv_path, "ontime.csv" ); |
| 51 | WriteBinary(ontime_csv, ontime_sample, sizeof(ontime_sample)); |
| 52 | |
| 53 | REQUIRE_NO_FAIL(con.Query( |
| 54 | "CREATE TABLE ontime(year SMALLINT, quarter SMALLINT, month SMALLINT, dayofmonth SMALLINT, dayofweek SMALLINT, " |
| 55 | "flightdate DATE, uniquecarrier CHAR(7), airlineid DECIMAL(8,2), carrier CHAR(2), tailnum VARCHAR(50), " |
| 56 | "flightnum VARCHAR(10), originairportid INTEGER, originairportseqid INTEGER, origincitymarketid INTEGER, " |
| 57 | "origin CHAR(5), origincityname VARCHAR(100), originstate CHAR(2), originstatefips VARCHAR(10), " |
| 58 | "originstatename VARCHAR(100), originwac DECIMAL(8,2), destairportid INTEGER, destairportseqid INTEGER, " |
| 59 | "destcitymarketid INTEGER, dest CHAR(5), destcityname VARCHAR(100), deststate CHAR(2), deststatefips " |
| 60 | "VARCHAR(10), deststatename VARCHAR(100), destwac DECIMAL(8,2), crsdeptime DECIMAL(8,2), deptime DECIMAL(8,2), " |
| 61 | "depdelay DECIMAL(8,2), depdelayminutes DECIMAL(8,2), depdel15 DECIMAL(8,2), departuredelaygroups " |
| 62 | "DECIMAL(8,2), deptimeblk VARCHAR(20), taxiout DECIMAL(8,2), wheelsoff DECIMAL(8,2), wheelson DECIMAL(8,2), " |
| 63 | "taxiin DECIMAL(8,2), crsarrtime DECIMAL(8,2), arrtime DECIMAL(8,2), arrdelay DECIMAL(8,2), arrdelayminutes " |
| 64 | "DECIMAL(8,2), arrdel15 DECIMAL(8,2), arrivaldelaygroups DECIMAL(8,2), arrtimeblk VARCHAR(20), cancelled " |
| 65 | "DECIMAL(8,2), cancellationcode CHAR(1), diverted DECIMAL(8,2), crselapsedtime DECIMAL(8,2), actualelapsedtime " |
| 66 | "DECIMAL(8,2), airtime DECIMAL(8,2), flights DECIMAL(8,2), distance DECIMAL(8,2), distancegroup DECIMAL(8,2), " |
| 67 | "carrierdelay DECIMAL(8,2), weatherdelay DECIMAL(8,2), nasdelay DECIMAL(8,2), securitydelay DECIMAL(8,2), " |
| 68 | "lateaircraftdelay DECIMAL(8,2), firstdeptime VARCHAR(10), totaladdgtime VARCHAR(10), longestaddgtime " |
| 69 | "VARCHAR(10), divairportlandings VARCHAR(10), divreacheddest VARCHAR(10), divactualelapsedtime VARCHAR(10), " |
| 70 | "divarrdelay VARCHAR(10), divdistance VARCHAR(10), div1airport VARCHAR(10), div1aiportid INTEGER, " |
| 71 | "div1airportseqid INTEGER, div1wheelson VARCHAR(10), div1totalgtime VARCHAR(10), div1longestgtime VARCHAR(10), " |
| 72 | "div1wheelsoff VARCHAR(10), div1tailnum VARCHAR(10), div2airport VARCHAR(10), div2airportid INTEGER, " |
| 73 | "div2airportseqid INTEGER, div2wheelson VARCHAR(10), div2totalgtime VARCHAR(10), div2longestgtime VARCHAR(10), " |
| 74 | "div2wheelsoff VARCHAR(10), div2tailnum VARCHAR(10), div3airport VARCHAR(10), div3airportid INTEGER, " |
| 75 | "div3airportseqid INTEGER, div3wheelson VARCHAR(10), div3totalgtime VARCHAR(10), div3longestgtime VARCHAR(10), " |
| 76 | "div3wheelsoff VARCHAR(10), div3tailnum VARCHAR(10), div4airport VARCHAR(10), div4airportid INTEGER, " |
| 77 | "div4airportseqid INTEGER, div4wheelson VARCHAR(10), div4totalgtime VARCHAR(10), div4longestgtime VARCHAR(10), " |
| 78 | "div4wheelsoff VARCHAR(10), div4tailnum VARCHAR(10), div5airport VARCHAR(10), div5airportid INTEGER, " |
| 79 | "div5airportseqid INTEGER, div5wheelson VARCHAR(10), div5totalgtime VARCHAR(10), div5longestgtime VARCHAR(10), " |
| 80 | "div5wheelsoff VARCHAR(10), div5tailnum VARCHAR(10));" )); |
| 81 | |
| 82 | result = con.Query("COPY ontime FROM '" + ontime_csv + "' (FORMAT CSV_AUTO);" ); |
| 83 | REQUIRE(CHECK_COLUMN(result, 0, {9})); |
| 84 | |
| 85 | result = con.Query("SELECT year, uniquecarrier, origin, origincityname, div5longestgtime FROM ontime;" ); |
| 86 | REQUIRE(CHECK_COLUMN(result, 0, {1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988})); |
| 87 | REQUIRE(CHECK_COLUMN(result, 1, {"AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" })); |
| 88 | REQUIRE(CHECK_COLUMN(result, 2, {"JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" })); |
| 89 | REQUIRE(CHECK_COLUMN(result, 3, |
| 90 | {"New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" , |
| 91 | "New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" })); |
| 92 | REQUIRE(CHECK_COLUMN(result, 4, {Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value()})); |
| 93 | } |
| 94 | |
| 95 | TEST_CASE("Test read_csv_auto from web_page csv" , "[copy]" ) { |
| 96 | FileSystem fs; |
| 97 | unique_ptr<QueryResult> result; |
| 98 | DuckDB db(nullptr); |
| 99 | Connection con(db); |
| 100 | |
| 101 | auto csv_path = GetCSVPath(); |
| 102 | auto webpage_csv = fs.JoinPath(csv_path, "web_page.csv" ); |
| 103 | WriteBinary(webpage_csv, web_page, sizeof(web_page)); |
| 104 | |
| 105 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE web_page AS SELECT * FROM read_csv_auto ('" + webpage_csv + "');" )); |
| 106 | |
| 107 | result = con.Query("SELECT COUNT(*) FROM web_page;" ); |
| 108 | REQUIRE(CHECK_COLUMN(result, 0, {60})); |
| 109 | |
| 110 | result = con.Query("SELECT * FROM web_page ORDER BY column00 LIMIT 3;" ); |
| 111 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
| 112 | REQUIRE(CHECK_COLUMN(result, 1, {"AAAAAAAABAAAAAAA" , "AAAAAAAACAAAAAAA" , "AAAAAAAACAAAAAAA" })); |
| 113 | REQUIRE(CHECK_COLUMN(result, 2, {Value::DATE(1997, 9, 3), Value::DATE(1997, 9, 3), Value::DATE(2000, 9, 3)})); |
| 114 | REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value::DATE(2000, 9, 2), Value()})); |
| 115 | REQUIRE(CHECK_COLUMN(result, 4, {2450810, 2450814, 2450814})); |
| 116 | REQUIRE(CHECK_COLUMN(result, 5, {2452620, 2452580, 2452611})); |
| 117 | REQUIRE(CHECK_COLUMN(result, 6, {"Y" , "N" , "N" })); |
| 118 | REQUIRE(CHECK_COLUMN(result, 7, {98539, Value(), Value()})); |
| 119 | REQUIRE(CHECK_COLUMN(result, 8, {"http://www.foo.com" , "http://www.foo.com" , "http://www.foo.com" })); |
| 120 | REQUIRE(CHECK_COLUMN(result, 9, {"welcome" , "protected" , "feedback" })); |
| 121 | REQUIRE(CHECK_COLUMN(result, 10, {2531, 1564, 1564})); |
| 122 | REQUIRE(CHECK_COLUMN(result, 11, {8, 4, 4})); |
| 123 | REQUIRE(CHECK_COLUMN(result, 12, {3, 3, 3})); |
| 124 | REQUIRE(CHECK_COLUMN(result, 13, {4, 1, 4})); |
| 125 | } |
| 126 | |
| 127 | TEST_CASE("Test read_csv_auto from greek-utf8 csv" , "[copy]" ) { |
| 128 | FileSystem fs; |
| 129 | unique_ptr<QueryResult> result; |
| 130 | DuckDB db(nullptr); |
| 131 | Connection con(db); |
| 132 | |
| 133 | auto csv_path = GetCSVPath(); |
| 134 | auto csv_file = fs.JoinPath(csv_path, "greek_utf8.csv" ); |
| 135 | WriteBinary(csv_file, greek_utf8, sizeof(greek_utf8)); |
| 136 | |
| 137 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE greek_utf8 AS SELECT * FROM read_csv_auto ('" + csv_file + "');" )); |
| 138 | |
| 139 | result = con.Query("SELECT COUNT(*) FROM greek_utf8;" ); |
| 140 | REQUIRE(CHECK_COLUMN(result, 0, {8})); |
| 141 | |
| 142 | result = con.Query("SELECT * FROM greek_utf8 ORDER BY 1;" ); |
| 143 | REQUIRE(CHECK_COLUMN(result, 0, {1689, 1690, 41561, 45804, 51981, 171067, 182773, 607808})); |
| 144 | REQUIRE(CHECK_COLUMN(result, 1, |
| 145 | {"\x30\x30\x69\\047\x6d" , "\x30\x30\x69\\047\x76" , "\x32\x30\x31\x35\xe2\x80\x8e" , |
| 146 | "\x32\x31\xcf\x80" , "\x32\x34\x68\x6f\x75\x72\x73\xe2\x80\xac" , |
| 147 | "\x61\x72\x64\x65\xcc\x80\x63\x68" , "\x61\xef\xac\x81" , |
| 148 | "\x70\x6f\x76\x65\x72\x74\x79\xe2\x80\xaa" })); |
| 149 | REQUIRE(CHECK_COLUMN(result, 2, {2, 2, 1, 1, 1, 2, 1, 1})); |
| 150 | } |
| 151 | |
| 152 | TEST_CASE("Test read_csv_auto from ncvoter csv" , "[copy]" ) { |
| 153 | FileSystem fs; |
| 154 | unique_ptr<QueryResult> result; |
| 155 | DuckDB db(nullptr); |
| 156 | Connection con(db); |
| 157 | |
| 158 | auto csv_path = GetCSVPath(); |
| 159 | auto ncvoter_csv = fs.JoinPath(csv_path, "ncvoter.csv" ); |
| 160 | WriteBinary(ncvoter_csv, ncvoter, sizeof(ncvoter)); |
| 161 | |
| 162 | REQUIRE_NO_FAIL(con.Query( |
| 163 | "CREATE TABLE IF NOT EXISTS ncvoters(county_id INTEGER, county_desc STRING, voter_reg_num STRING,status_cd " |
| 164 | "STRING, voter_status_desc STRING, reason_cd STRING, voter_status_reason_desc STRING, absent_ind STRING, " |
| 165 | "name_prefx_cd STRING,last_name STRING, first_name STRING, midl_name STRING, name_sufx_cd STRING, " |
| 166 | "full_name_rep STRING,full_name_mail STRING, house_num STRING, half_code STRING, street_dir STRING, " |
| 167 | "street_name STRING, street_type_cd STRING, street_sufx_cd STRING, unit_designator STRING, unit_num STRING, " |
| 168 | "res_city_desc STRING,state_cd STRING, zip_code STRING, res_street_address STRING, res_city_state_zip STRING, " |
| 169 | "mail_addr1 STRING, mail_addr2 STRING, mail_addr3 STRING, mail_addr4 STRING, mail_city STRING, mail_state " |
| 170 | "STRING, mail_zipcode STRING, mail_city_state_zip STRING, area_cd STRING, phone_num STRING, full_phone_number " |
| 171 | "STRING, drivers_lic STRING, race_code STRING, race_desc STRING, ethnic_code STRING, ethnic_desc STRING, " |
| 172 | "party_cd STRING, party_desc STRING, sex_code STRING, sex STRING, birth_age STRING, birth_place STRING, " |
| 173 | "registr_dt STRING, precinct_abbrv STRING, precinct_desc STRING,municipality_abbrv STRING, municipality_desc " |
| 174 | "STRING, ward_abbrv STRING, ward_desc STRING, cong_dist_abbrv STRING, cong_dist_desc STRING, super_court_abbrv " |
| 175 | "STRING, super_court_desc STRING, judic_dist_abbrv STRING, judic_dist_desc STRING, nc_senate_abbrv STRING, " |
| 176 | "nc_senate_desc STRING, nc_house_abbrv STRING, nc_house_desc STRING,county_commiss_abbrv STRING, " |
| 177 | "county_commiss_desc STRING, township_abbrv STRING, township_desc STRING,school_dist_abbrv STRING, " |
| 178 | "school_dist_desc STRING, fire_dist_abbrv STRING, fire_dist_desc STRING, water_dist_abbrv STRING, " |
| 179 | "water_dist_desc STRING, sewer_dist_abbrv STRING, sewer_dist_desc STRING, sanit_dist_abbrv STRING, " |
| 180 | "sanit_dist_desc STRING, rescue_dist_abbrv STRING, rescue_dist_desc STRING, munic_dist_abbrv STRING, " |
| 181 | "munic_dist_desc STRING, dist_1_abbrv STRING, dist_1_desc STRING, dist_2_abbrv STRING, dist_2_desc STRING, " |
| 182 | "confidential_ind STRING, age STRING, ncid STRING, vtd_abbrv STRING, vtd_desc STRING);" )); |
| 183 | result = con.Query("COPY ncvoters FROM '" + ncvoter_csv + "' (FORMAT CSV_AUTO);" ); |
| 184 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
| 185 | |
| 186 | result = con.Query("SELECT county_id, county_desc, vtd_desc, name_prefx_cd FROM ncvoters;" ); |
| 187 | REQUIRE(CHECK_COLUMN(result, 0, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1})); |
| 188 | REQUIRE(CHECK_COLUMN(result, 1, |
| 189 | {"ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , |
| 190 | "ALAMANCE" , "ALAMANCE" , "ALAMANCE" })); |
| 191 | REQUIRE(CHECK_COLUMN(result, 2, {"09S" , "09S" , "03W" , "09S" , "1210" , "035" , "124" , "06E" , "035" , "064" })); |
| 192 | REQUIRE(CHECK_COLUMN(result, 3, |
| 193 | {Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value()})); |
| 194 | } |
| 195 | |
| 196 | TEST_CASE("Test read_csv_auto from imdb csv" , "[copy]" ) { |
| 197 | FileSystem fs; |
| 198 | unique_ptr<QueryResult> result; |
| 199 | DuckDB db(nullptr); |
| 200 | Connection con(db); |
| 201 | |
| 202 | auto csv_path = GetCSVPath(); |
| 203 | auto imdb_movie_info = fs.JoinPath(csv_path, "imdb_movie_info.csv" ); |
| 204 | WriteBinary(imdb_movie_info, imdb_movie_info_escaped, sizeof(imdb_movie_info_escaped)); |
| 205 | |
| 206 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE movie_info AS SELECT * FROM read_csv_auto ('" + imdb_movie_info + "');" )); |
| 207 | |
| 208 | result = con.Query("SELECT COUNT(*) FROM movie_info;" ); |
| 209 | REQUIRE(CHECK_COLUMN(result, 0, {201})); |
| 210 | } |
| 211 | |
| 212 | TEST_CASE("Test read_csv_auto from cranlogs broken gzip" , "[copy][.]" ) { |
| 213 | FileSystem fs; |
| 214 | unique_ptr<QueryResult> result; |
| 215 | DuckDB db(nullptr); |
| 216 | Connection con(db); |
| 217 | |
| 218 | auto csv_path = GetCSVPath(); |
| 219 | auto cranlogs_csv = fs.JoinPath(csv_path, "cranlogs.csv.gz" ); |
| 220 | WriteBinary(cranlogs_csv, tmp2013_06_15, sizeof(tmp2013_06_15)); |
| 221 | |
| 222 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE cranlogs AS SELECT * FROM read_csv_auto ('" + cranlogs_csv + "');" )); |
| 223 | |
| 224 | result = con.Query("SELECT COUNT(*) FROM cranlogs;" ); |
| 225 | REQUIRE(CHECK_COLUMN(result, 0, {37459})); |
| 226 | } |
| 227 | |
| 228 | TEST_CASE("Test csv dialect detection" , "[copy]" ) { |
| 229 | FileSystem fs; |
| 230 | unique_ptr<QueryResult> result; |
| 231 | DuckDB db(nullptr); |
| 232 | Connection con(db); |
| 233 | |
| 234 | auto csv_path = GetCSVPath(); |
| 235 | |
| 236 | // generate CSV file with RFC-conform dialect |
| 237 | ofstream csv_file(fs.JoinPath(csv_path, "test.csv" )); |
| 238 | csv_file << "123,TEST1,one space" << endl; |
| 239 | csv_file << "345,TEST1,trailing_space " << endl; |
| 240 | csv_file << "567,TEST1,no_space" << endl; |
| 241 | csv_file.close(); |
| 242 | |
| 243 | REQUIRE_NO_FAIL( |
| 244 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 245 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 246 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 247 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST1" , "TEST1" , "TEST1" })); |
| 248 | REQUIRE(CHECK_COLUMN(result, 2, {"one space" , "trailing_space " , "no_space" })); |
| 249 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 250 | |
| 251 | // generate CSV file with RFC-conform dialect quote |
| 252 | ofstream csv_file2(fs.JoinPath(csv_path, "test.csv" )); |
| 253 | csv_file2 << "123,TEST2,\"one space\"" << endl; |
| 254 | csv_file2 << "345,TEST2,\"trailing_space, \"" << endl; |
| 255 | csv_file2 << "567,TEST2,\"no\"\"space\"" << endl; |
| 256 | csv_file2.close(); |
| 257 | |
| 258 | REQUIRE_NO_FAIL( |
| 259 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 260 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 261 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 262 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST2" , "TEST2" , "TEST2" })); |
| 263 | REQUIRE(CHECK_COLUMN(result, 2, {"one space" , "trailing_space, " , "no\"space" })); |
| 264 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 265 | |
| 266 | // generate CSV file with RFC-conform dialect quote/leading space of numerics |
| 267 | ofstream csv_file3(fs.JoinPath(csv_path, "test.csv" )); |
| 268 | csv_file3 << "123,TEST3,text1" << endl; |
| 269 | csv_file3 << "\"345\",TEST3,text2" << endl; |
| 270 | csv_file3 << " 567,TEST3,text3" << endl; |
| 271 | csv_file3.close(); |
| 272 | |
| 273 | REQUIRE_NO_FAIL( |
| 274 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 275 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 276 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 277 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST3" , "TEST3" , "TEST3" })); |
| 278 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" , "text3" })); |
| 279 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 280 | |
| 281 | // generate CSV file with bar delimiter |
| 282 | ofstream csv_file4(fs.JoinPath(csv_path, "test.csv" )); |
| 283 | csv_file4 << "123|TEST4|text1" << endl; |
| 284 | csv_file4 << "345|TEST4|text2" << endl; |
| 285 | csv_file4 << "567|TEST4|text3" << endl; |
| 286 | csv_file4.close(); |
| 287 | |
| 288 | REQUIRE_NO_FAIL( |
| 289 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 290 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 291 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 292 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST4" , "TEST4" , "TEST4" })); |
| 293 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" , "text3" })); |
| 294 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 295 | |
| 296 | // generate CSV file with bar delimiter and double quotes |
| 297 | ofstream csv_file5(fs.JoinPath(csv_path, "test.csv" )); |
| 298 | csv_file5 << "123|TEST5|text1" << endl; |
| 299 | csv_file5 << "345|TEST5|\"text2|\"" << endl; |
| 300 | csv_file5 << "567|TEST5|text3" << endl; |
| 301 | csv_file5.close(); |
| 302 | |
| 303 | REQUIRE_NO_FAIL( |
| 304 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 305 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 306 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 307 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST5" , "TEST5" , "TEST5" })); |
| 308 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2|" , "text3" })); |
| 309 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 310 | |
| 311 | // generate CSV file with bar delimiter and double quotes and double escape |
| 312 | ofstream csv_file6(fs.JoinPath(csv_path, "test.csv" )); |
| 313 | csv_file6 << "123|TEST6|text1" << endl; |
| 314 | csv_file6 << "345|TEST6|\"text\"\"2\"\"text\"" << endl; |
| 315 | csv_file6 << "\"567\"|TEST6|text3" << endl; |
| 316 | csv_file6.close(); |
| 317 | |
| 318 | REQUIRE_NO_FAIL( |
| 319 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 320 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 321 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 322 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST6" , "TEST6" , "TEST6" })); |
| 323 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text\"2\"text" , "text3" })); |
| 324 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 325 | |
| 326 | // generate CSV file with bar delimiter and double quotes and backslash escape |
| 327 | ofstream csv_file7(fs.JoinPath(csv_path, "test.csv" )); |
| 328 | csv_file7 << "123|TEST7|text1" << endl; |
| 329 | csv_file7 << "345|TEST7|\"text\\\"2\\\"\"" << endl; |
| 330 | csv_file7 << "\"567\"|TEST7|text3" << endl; |
| 331 | csv_file7.close(); |
| 332 | |
| 333 | REQUIRE_NO_FAIL( |
| 334 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 335 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 336 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 337 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST7" , "TEST7" , "TEST7" })); |
| 338 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text\"2\"" , "text3" })); |
| 339 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 340 | |
| 341 | // generate CSV file with bar delimiter and single quotes and backslash escape |
| 342 | ofstream csv_file8(fs.JoinPath(csv_path, "test.csv" )); |
| 343 | csv_file8 << "123|TEST8|text1" << endl; |
| 344 | csv_file8 << "345|TEST8|'text\\'2\\'text'" << endl; |
| 345 | csv_file8 << "'567'|TEST8|text3" << endl; |
| 346 | csv_file8.close(); |
| 347 | |
| 348 | REQUIRE_NO_FAIL( |
| 349 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 350 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 351 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 352 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST8" , "TEST8" , "TEST8" })); |
| 353 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text'2'text" , "text3" })); |
| 354 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 355 | |
| 356 | // generate CSV file with semicolon delimiter |
| 357 | ofstream csv_file9(fs.JoinPath(csv_path, "test.csv" )); |
| 358 | csv_file9 << "123;TEST9;text1" << endl; |
| 359 | csv_file9 << "345;TEST9;text2" << endl; |
| 360 | csv_file9 << "567;TEST9;text3" << endl; |
| 361 | csv_file9.close(); |
| 362 | |
| 363 | REQUIRE_NO_FAIL( |
| 364 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 365 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 366 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 367 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST9" , "TEST9" , "TEST9" })); |
| 368 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" , "text3" })); |
| 369 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 370 | |
| 371 | // generate CSV file with semicolon delimiter and double quotes |
| 372 | ofstream csv_file10(fs.JoinPath(csv_path, "test.csv" )); |
| 373 | csv_file10 << "123;TEST10;text1" << endl; |
| 374 | csv_file10 << "\"345\";TEST10;text2" << endl; |
| 375 | csv_file10 << "567;TEST10;\"te;xt3\"" << endl; |
| 376 | csv_file10.close(); |
| 377 | |
| 378 | REQUIRE_NO_FAIL( |
| 379 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 380 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 381 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 382 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST10" , "TEST10" , "TEST10" })); |
| 383 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" , "te;xt3" })); |
| 384 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 385 | |
| 386 | // generate CSV file with semicolon delimiter, double quotes and RFC escape |
| 387 | ofstream csv_file11(fs.JoinPath(csv_path, "test.csv" )); |
| 388 | csv_file11 << "123;TEST11;text1" << endl; |
| 389 | csv_file11 << "\"345\";TEST11;text2" << endl; |
| 390 | csv_file11 << "567;TEST11;\"te\"\"xt3\"" << endl; |
| 391 | csv_file11.close(); |
| 392 | |
| 393 | REQUIRE_NO_FAIL( |
| 394 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 395 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 396 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 397 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST11" , "TEST11" , "TEST11" })); |
| 398 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" , "te\"xt3" })); |
| 399 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 400 | |
| 401 | // generate CSV file with tab delimiter |
| 402 | ofstream csv_file12(fs.JoinPath(csv_path, "test.csv" )); |
| 403 | csv_file12 << "123\tTEST12\ttext1" << endl; |
| 404 | csv_file12 << "345\tTEST12\ttext2" << endl; |
| 405 | csv_file12 << "567\tTEST12\ttext3" << endl; |
| 406 | csv_file12.close(); |
| 407 | |
| 408 | REQUIRE_NO_FAIL( |
| 409 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 410 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 411 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 412 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST12" , "TEST12" , "TEST12" })); |
| 413 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" , "text3" })); |
| 414 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 415 | |
| 416 | // generate CSV file with tab delimiter and single quotes |
| 417 | ofstream csv_file13(fs.JoinPath(csv_path, "test.csv" )); |
| 418 | csv_file13 << "123\tTEST13\ttext1" << endl; |
| 419 | csv_file13 << "345\tTEST13\t'te\txt2'" << endl; |
| 420 | csv_file13 << "'567'\tTEST13\ttext3" << endl; |
| 421 | csv_file13.close(); |
| 422 | |
| 423 | REQUIRE_NO_FAIL( |
| 424 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 425 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 426 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 427 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST13" , "TEST13" , "TEST13" })); |
| 428 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "te\txt2" , "text3" })); |
| 429 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 430 | |
| 431 | // generate CSV file with tab delimiter and single quotes without type-hint |
| 432 | ofstream csv_file14(fs.JoinPath(csv_path, "test.csv" )); |
| 433 | csv_file14 << "123\tTEST14\ttext1" << endl; |
| 434 | csv_file14 << "345\tTEST14\t'te\txt2'" << endl; |
| 435 | csv_file14 << "567\tTEST14\ttext3" << endl; |
| 436 | csv_file14.close(); |
| 437 | |
| 438 | REQUIRE_NO_FAIL( |
| 439 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 440 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 441 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
| 442 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST14" , "TEST14" , "TEST14" })); |
| 443 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "te\txt2" , "text3" })); |
| 444 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 445 | } |
| 446 | |
| 447 | TEST_CASE("Test csv header detection" , "[copy]" ) { |
| 448 | FileSystem fs; |
| 449 | unique_ptr<QueryResult> result; |
| 450 | DuckDB db(nullptr); |
| 451 | Connection con(db); |
| 452 | |
| 453 | auto csv_path = GetCSVPath(); |
| 454 | |
| 455 | // generate CSV file with two lines, none header |
| 456 | ofstream csv_file(fs.JoinPath(csv_path, "test.csv" )); |
| 457 | csv_file << "123.0,TEST1,2000-12-12" << endl; |
| 458 | csv_file << "345.0,TEST1,2000-12-13" << endl; |
| 459 | csv_file.close(); |
| 460 | |
| 461 | REQUIRE_NO_FAIL( |
| 462 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 463 | result = con.Query("SELECT column0, column1, column2 FROM test ORDER BY column0;" ); |
| 464 | REQUIRE(CHECK_COLUMN(result, 0, {123.0, 345.0})); |
| 465 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST1" , "TEST1" })); |
| 466 | REQUIRE(CHECK_COLUMN(result, 2, {Value::DATE(2000, 12, 12), Value::DATE(2000, 12, 13)})); |
| 467 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 468 | |
| 469 | // generate CSV file with two lines, one header |
| 470 | ofstream csv_file2(fs.JoinPath(csv_path, "test.csv" )); |
| 471 | csv_file2 << "number,text,date" << endl; |
| 472 | csv_file2 << "345.0,TEST2,2000-12-13" << endl; |
| 473 | csv_file2.close(); |
| 474 | |
| 475 | REQUIRE_NO_FAIL( |
| 476 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 477 | result = con.Query("SELECT number, text, date FROM test ORDER BY number;" ); |
| 478 | REQUIRE(CHECK_COLUMN(result, 0, {345.0})); |
| 479 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST2" })); |
| 480 | REQUIRE(CHECK_COLUMN(result, 2, {Value::DATE(2000, 12, 13)})); |
| 481 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 482 | |
| 483 | // generate CSV file with three lines, one header, one skip row |
| 484 | ofstream csv_file3(fs.JoinPath(csv_path, "test.csv" )); |
| 485 | csv_file3 << "some notes..." << endl; |
| 486 | csv_file3 << "number,text,date" << endl; |
| 487 | csv_file3 << "345.0,TEST3,2000-12-13" << endl; |
| 488 | csv_file3.close(); |
| 489 | |
| 490 | REQUIRE_NO_FAIL( |
| 491 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 492 | result = con.Query("SELECT number, text, date FROM test ORDER BY number;" ); |
| 493 | REQUIRE(CHECK_COLUMN(result, 0, {345.0})); |
| 494 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST3" })); |
| 495 | REQUIRE(CHECK_COLUMN(result, 2, {Value::DATE(2000, 12, 13)})); |
| 496 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 497 | |
| 498 | // generate CSV file with three lines, one header, two skip rows |
| 499 | ofstream csv_file4(fs.JoinPath(csv_path, "test.csv" )); |
| 500 | csv_file4 << "some notes..." << endl; |
| 501 | csv_file4 << "more notes,..." << endl; |
| 502 | csv_file4 << "number,text,date" << endl; |
| 503 | csv_file4 << "345.0,TEST4,2000-12-13" << endl; |
| 504 | csv_file4.close(); |
| 505 | |
| 506 | REQUIRE_NO_FAIL( |
| 507 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 508 | result = con.Query("SELECT number, text, date FROM test ORDER BY number;" ); |
| 509 | REQUIRE(CHECK_COLUMN(result, 0, {345.0})); |
| 510 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST4" })); |
| 511 | REQUIRE(CHECK_COLUMN(result, 2, {Value::DATE(2000, 12, 13)})); |
| 512 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 513 | |
| 514 | // generate CSV file with two lines both only strings |
| 515 | ofstream csv_file5(fs.JoinPath(csv_path, "test.csv" )); |
| 516 | csv_file5 << "Alice,StreetA,TEST5" << endl; |
| 517 | csv_file5 << "Bob,StreetB,TEST5" << endl; |
| 518 | csv_file5.close(); |
| 519 | |
| 520 | REQUIRE_NO_FAIL( |
| 521 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 522 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
| 523 | REQUIRE(CHECK_COLUMN(result, 0, {"Alice" , "Bob" })); |
| 524 | REQUIRE(CHECK_COLUMN(result, 1, {"StreetA" , "StreetB" })); |
| 525 | REQUIRE(CHECK_COLUMN(result, 2, {"TEST5" , "TEST5" })); |
| 526 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 527 | |
| 528 | // generate CSV file with one line, two columsn, only strings |
| 529 | ofstream csv_file6(fs.JoinPath(csv_path, "test.csv" )); |
| 530 | csv_file6 << "Alice,StreetA" << endl; |
| 531 | csv_file6.close(); |
| 532 | |
| 533 | REQUIRE_NO_FAIL( |
| 534 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 535 | result = con.Query("SELECT column0, column1 FROM test ORDER BY column0;" ); |
| 536 | REQUIRE(CHECK_COLUMN(result, 0, {"Alice" })); |
| 537 | REQUIRE(CHECK_COLUMN(result, 1, {"StreetA" })); |
| 538 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 539 | |
| 540 | // generate CSV file with one line, two columns - one numeric, one string |
| 541 | ofstream csv_file7(fs.JoinPath(csv_path, "test.csv" )); |
| 542 | csv_file7 << "1,StreetA" << endl; |
| 543 | csv_file7.close(); |
| 544 | |
| 545 | REQUIRE_NO_FAIL( |
| 546 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 547 | result = con.Query("SELECT column0, column1 FROM test ORDER BY column0;" ); |
| 548 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 549 | REQUIRE(CHECK_COLUMN(result, 1, {"StreetA" })); |
| 550 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 551 | |
| 552 | // generate CSV file with one line, one string column |
| 553 | ofstream csv_file8(fs.JoinPath(csv_path, "test.csv" )); |
| 554 | csv_file8 << "Test" << endl; |
| 555 | csv_file8.close(); |
| 556 | |
| 557 | REQUIRE_NO_FAIL( |
| 558 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 559 | result = con.Query("SELECT * FROM test;" ); |
| 560 | REQUIRE(CHECK_COLUMN(result, 0, {"Test" })); |
| 561 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 562 | |
| 563 | // generate CSV file with one line, one numeric column |
| 564 | ofstream csv_file9(fs.JoinPath(csv_path, "test.csv" )); |
| 565 | csv_file9 << "1" << endl; |
| 566 | csv_file9.close(); |
| 567 | |
| 568 | REQUIRE_NO_FAIL( |
| 569 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 570 | result = con.Query("SELECT * FROM test;" ); |
| 571 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 572 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 573 | } |
| 574 | |
| 575 | TEST_CASE("Test csv header completion" , "[copy]" ) { |
| 576 | FileSystem fs; |
| 577 | unique_ptr<QueryResult> result; |
| 578 | DuckDB db(nullptr); |
| 579 | Connection con(db); |
| 580 | |
| 581 | auto csv_path = GetCSVPath(); |
| 582 | |
| 583 | // generate CSV file with one missing header |
| 584 | ofstream csv_file(fs.JoinPath(csv_path, "test.csv" )); |
| 585 | csv_file << "a,,c" << endl; |
| 586 | csv_file << "123,TEST1,text1" << endl; |
| 587 | csv_file << "345,TEST1,text2" << endl; |
| 588 | csv_file.close(); |
| 589 | |
| 590 | REQUIRE_NO_FAIL( |
| 591 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 592 | result = con.Query("SELECT a, column1, c FROM test ORDER BY a;" ); |
| 593 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345})); |
| 594 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST1" , "TEST1" })); |
| 595 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" })); |
| 596 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 597 | |
| 598 | // generate CSV file with one duplicate header |
| 599 | ofstream csv_file2(fs.JoinPath(csv_path, "test.csv" )); |
| 600 | csv_file2 << "a,b,a" << endl; |
| 601 | csv_file2 << "123,TEST2,text1" << endl; |
| 602 | csv_file2 << "345,TEST2,text2" << endl; |
| 603 | csv_file2.close(); |
| 604 | |
| 605 | REQUIRE_NO_FAIL( |
| 606 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 607 | result = con.Query("SELECT a_0, b, a_1 FROM test ORDER BY a_0;" ); |
| 608 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345})); |
| 609 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST2" , "TEST2" })); |
| 610 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" })); |
| 611 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 612 | |
| 613 | // generate CSV file with all column names missing |
| 614 | ofstream csv_file3(fs.JoinPath(csv_path, "test.csv" )); |
| 615 | csv_file3 << ",," << endl; |
| 616 | csv_file3 << "123,TEST3,text1" << endl; |
| 617 | csv_file3 << "345,TEST3,text2" << endl; |
| 618 | csv_file3.close(); |
| 619 | |
| 620 | REQUIRE_NO_FAIL( |
| 621 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 622 | REQUIRE_NO_FAIL(con.Query("SELECT column0, column1, column2 FROM test ORDER BY column0;" )); |
| 623 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 624 | |
| 625 | // generate CSV file with 12 columns and all but one column name missing |
| 626 | ofstream csv_file4(fs.JoinPath(csv_path, "test.csv" )); |
| 627 | csv_file4 << "a,,,,,,,,,,,," << endl; |
| 628 | csv_file4 << "123,TEST2,text1,,,,,,,,,,value1" << endl; |
| 629 | csv_file4 << "345,TEST2,text2,,,,,,,,,,value2" << endl; |
| 630 | csv_file4.close(); |
| 631 | |
| 632 | REQUIRE_NO_FAIL( |
| 633 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 634 | REQUIRE_NO_FAIL(con.Query("SELECT a, column01, column12 FROM test;" )); |
| 635 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 636 | |
| 637 | // generate CSV file with 12 equally called columns |
| 638 | ofstream csv_file5(fs.JoinPath(csv_path, "test.csv" )); |
| 639 | csv_file5 << "a,a,a,a,a,a,a,a,a,a,a,a," << endl; |
| 640 | csv_file5 << "123,TEST2,text1,,,,,,,,,,value1" << endl; |
| 641 | csv_file5 << "345,TEST2,text2,,,,,,,,,,value2" << endl; |
| 642 | csv_file5.close(); |
| 643 | |
| 644 | REQUIRE_NO_FAIL( |
| 645 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 646 | REQUIRE_NO_FAIL(con.Query("SELECT a_00, a_08, a_09, column12 FROM test;" )); |
| 647 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 648 | |
| 649 | // generate CSV file with 10 equally called columns, one named column12 and column 11 and 12 missing |
| 650 | ofstream csv_file6(fs.JoinPath(csv_path, "test.csv" )); |
| 651 | csv_file6 << "a,a,a,a,a,a,a,a,a,a,column12,," << endl; |
| 652 | csv_file6 << "123,TEST2,text1,,,,,,,,,,value1" << endl; |
| 653 | csv_file6 << "345,TEST2,text2,,,,,,,,,,value2" << endl; |
| 654 | csv_file6.close(); |
| 655 | |
| 656 | REQUIRE_NO_FAIL( |
| 657 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 658 | REQUIRE_NO_FAIL(con.Query("SELECT a_0, a_8, a_9, column12_0, column11, column12_1 FROM test;" )); |
| 659 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 660 | } |
| 661 | |
| 662 | TEST_CASE("Test csv type detection" , "[copy]" ) { |
| 663 | FileSystem fs; |
| 664 | unique_ptr<QueryResult> result; |
| 665 | DuckDB db(nullptr); |
| 666 | Connection con(db); |
| 667 | |
| 668 | auto csv_path = GetCSVPath(); |
| 669 | |
| 670 | idx_t line_count = 9000; |
| 671 | |
| 672 | // generate a CSV file with many strings |
| 673 | ofstream csv_file(fs.JoinPath(csv_path, "test.csv" )); |
| 674 | csv_file << "linenr|mixed_string|mixed_double" << endl; |
| 675 | for (idx_t i = 0; i <= line_count; i++) { |
| 676 | csv_file << i * 3 + 1 << "|1|1" << endl; |
| 677 | |
| 678 | if (i < line_count / 3 || i > line_count * 2 / 3) { |
| 679 | csv_file << i * 3 + 2 << "|2|2" << endl; |
| 680 | } else { |
| 681 | csv_file << i * 3 + 2 << "|TEST|3.5" << endl; |
| 682 | } |
| 683 | |
| 684 | if (i < line_count / 2) { |
| 685 | csv_file << i * 3 + 3 << "|3|3" << endl; |
| 686 | } else { |
| 687 | csv_file << i * 3 + 3 << "|3|3.5" << endl; |
| 688 | } |
| 689 | } |
| 690 | csv_file.close(); |
| 691 | |
| 692 | REQUIRE_NO_FAIL( |
| 693 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 694 | result = con.Query("SELECT linenr, mixed_string, mixed_double FROM test LIMIT 3;" ); |
| 695 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
| 696 | REQUIRE(CHECK_COLUMN(result, 1, {"1" , "2" , "3" })); |
| 697 | REQUIRE(CHECK_COLUMN(result, 2, {1.0, 2.0, 3.0})); |
| 698 | |
| 699 | result = con.Query("SELECT linenr, mixed_string, mixed_double FROM test WHERE linenr > 27000 LIMIT 3;" ); |
| 700 | REQUIRE(CHECK_COLUMN(result, 0, {27001, 27002, 27003})); |
| 701 | REQUIRE(CHECK_COLUMN(result, 1, {"1" , "2" , "3" })); |
| 702 | REQUIRE(CHECK_COLUMN(result, 2, {1.0, 2.0, 3.5})); |
| 703 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 704 | |
| 705 | // generate a CSV file containing time, date and timestamp columns |
| 706 | ofstream csv_file2(fs.JoinPath(csv_path, "test.csv" )); |
| 707 | csv_file2 << "a,b,t,d,ts" << endl; |
| 708 | csv_file2 << "123,TEST2,12:12:12,2000-01-01,2000-01-01 12:12:00" << endl; |
| 709 | csv_file2 << "345,TEST2,14:15:30,2002-02-02,2002-02-02 14:15:00" << endl; |
| 710 | csv_file2 << "346,TEST2,15:16:17,2004-12-13,2004-12-13 15:16:00" << endl; |
| 711 | csv_file2.close(); |
| 712 | |
| 713 | REQUIRE_NO_FAIL( |
| 714 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 715 | result = con.Query("SELECT a, b, t, d, ts FROM test ORDER BY a;" ); |
| 716 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 346})); |
| 717 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST2" , "TEST2" , "TEST2" })); |
| 718 | REQUIRE( |
| 719 | CHECK_COLUMN(result, 2, {Value::TIME(12, 12, 12, 0), Value::TIME(14, 15, 30, 0), Value::TIME(15, 16, 17, 0)})); |
| 720 | REQUIRE(CHECK_COLUMN(result, 3, {Value::DATE(2000, 01, 01), Value::DATE(2002, 02, 02), Value::DATE(2004, 12, 13)})); |
| 721 | REQUIRE(CHECK_COLUMN(result, 4, |
| 722 | {Value::TIMESTAMP(2000, 01, 01, 12, 12, 0, 0), Value::TIMESTAMP(2002, 02, 02, 14, 15, 0, 0), |
| 723 | Value::TIMESTAMP(2004, 12, 13, 15, 16, 0, 0)})); |
| 724 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 725 | |
| 726 | // generate a CSV file containing time and date columns with leading/trailing chars |
| 727 | ofstream csv_file3(fs.JoinPath(csv_path, "test.csv" )); |
| 728 | csv_file3 << "a,b,t,tf,d,df" << endl; |
| 729 | csv_file3 << "123,TEST2,\" 12:12:12 \",\"12:12:12\",\" 2000-01-01 \",\"2000-01-01\"" << endl; |
| 730 | csv_file3 << "345,TEST2,\" 14:15:30\",\"14:15:30\",\" 2002-02-02 \",\"2000-01-01 a\"" << endl; |
| 731 | csv_file3 << "346,TEST2,\" 15:16:17 \",\"15:16:17 01\",\" 2004-12-13 \",\"2000-01-01\"" << endl; |
| 732 | csv_file3.close(); |
| 733 | |
| 734 | REQUIRE_NO_FAIL( |
| 735 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
| 736 | result = con.Query("SELECT a, b, t, tf, d, df FROM test ORDER BY a;" ); |
| 737 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 346})); |
| 738 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST2" , "TEST2" , "TEST2" })); |
| 739 | REQUIRE( |
| 740 | CHECK_COLUMN(result, 2, {Value::TIME(12, 12, 12, 0), Value::TIME(14, 15, 30, 0), Value::TIME(15, 16, 17, 0)})); |
| 741 | REQUIRE(CHECK_COLUMN(result, 3, {"12:12:12" , "14:15:30" , "15:16:17 01" })); |
| 742 | REQUIRE(CHECK_COLUMN(result, 4, {Value::DATE(2000, 01, 01), Value::DATE(2002, 02, 02), Value::DATE(2004, 12, 13)})); |
| 743 | REQUIRE(CHECK_COLUMN(result, 5, {"2000-01-01" , "2000-01-01 a" , "2000-01-01" })); |
| 744 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 745 | } |
| 746 | |
| 747 | #endif |
| 748 | |