1 | #include "catch.hpp" |
2 | #include "duckdb/common/file_system.hpp" |
3 | #include "duckdb/common/types/date.hpp" |
4 | #include "test_csv_header.hpp" |
5 | #include "test_helpers.hpp" |
6 | |
7 | #include <fstream> |
8 | |
9 | using namespace duckdb; |
10 | using namespace std; |
11 | |
12 | #if STANDARD_VECTOR_SIZE >= 16 |
13 | |
14 | TEST_CASE("Test copy into auto from lineitem csv" , "[copy]" ) { |
15 | FileSystem fs; |
16 | unique_ptr<QueryResult> result; |
17 | DuckDB db(nullptr); |
18 | Connection con(db); |
19 | |
20 | auto csv_path = GetCSVPath(); |
21 | auto lineitem_csv = fs.JoinPath(csv_path, "lineitem.csv" ); |
22 | WriteBinary(lineitem_csv, lineitem_sample, sizeof(lineitem_sample)); |
23 | |
24 | REQUIRE_NO_FAIL(con.Query( |
25 | "CREATE TABLE lineitem(l_orderkey INT NOT NULL, l_partkey INT NOT NULL, l_suppkey INT NOT NULL, l_linenumber " |
26 | "INT NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) " |
27 | "NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR(1) NOT NULL, l_linestatus VARCHAR(1) NOT NULL, " |
28 | "l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR(25) " |
29 | "NOT NULL, l_shipmode VARCHAR(10) NOT NULL, l_comment VARCHAR(44) NOT NULL);" )); |
30 | result = con.Query("COPY lineitem FROM '" + lineitem_csv + "' (FORMAT CSV_AUTO);" ); |
31 | |
32 | result = con.Query("SELECT COUNT(*) FROM lineitem;" ); |
33 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
34 | |
35 | result = con.Query("SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;" ); |
36 | REQUIRE(CHECK_COLUMN(result, 0, {15519, 6731, 6370, 214, 2403, 1564})); |
37 | REQUIRE( |
38 | CHECK_COLUMN(result, 1, |
39 | {"egular courts above the" , "ly final dependencies: slyly bold " , "riously. regular, express dep" , |
40 | "lites. fluffily even de" , " pending foxes. slyly re" , "arefully slyly ex" })); |
41 | } |
42 | |
43 | TEST_CASE("Test read_csv_auto from on-time dataset" , "[copy]" ) { |
44 | FileSystem fs; |
45 | unique_ptr<QueryResult> result; |
46 | DuckDB db(nullptr); |
47 | Connection con(db); |
48 | |
49 | auto csv_path = GetCSVPath(); |
50 | auto ontime_csv = fs.JoinPath(csv_path, "ontime.csv" ); |
51 | WriteBinary(ontime_csv, ontime_sample, sizeof(ontime_sample)); |
52 | |
53 | REQUIRE_NO_FAIL(con.Query( |
54 | "CREATE TABLE ontime(year SMALLINT, quarter SMALLINT, month SMALLINT, dayofmonth SMALLINT, dayofweek SMALLINT, " |
55 | "flightdate DATE, uniquecarrier CHAR(7), airlineid DECIMAL(8,2), carrier CHAR(2), tailnum VARCHAR(50), " |
56 | "flightnum VARCHAR(10), originairportid INTEGER, originairportseqid INTEGER, origincitymarketid INTEGER, " |
57 | "origin CHAR(5), origincityname VARCHAR(100), originstate CHAR(2), originstatefips VARCHAR(10), " |
58 | "originstatename VARCHAR(100), originwac DECIMAL(8,2), destairportid INTEGER, destairportseqid INTEGER, " |
59 | "destcitymarketid INTEGER, dest CHAR(5), destcityname VARCHAR(100), deststate CHAR(2), deststatefips " |
60 | "VARCHAR(10), deststatename VARCHAR(100), destwac DECIMAL(8,2), crsdeptime DECIMAL(8,2), deptime DECIMAL(8,2), " |
61 | "depdelay DECIMAL(8,2), depdelayminutes DECIMAL(8,2), depdel15 DECIMAL(8,2), departuredelaygroups " |
62 | "DECIMAL(8,2), deptimeblk VARCHAR(20), taxiout DECIMAL(8,2), wheelsoff DECIMAL(8,2), wheelson DECIMAL(8,2), " |
63 | "taxiin DECIMAL(8,2), crsarrtime DECIMAL(8,2), arrtime DECIMAL(8,2), arrdelay DECIMAL(8,2), arrdelayminutes " |
64 | "DECIMAL(8,2), arrdel15 DECIMAL(8,2), arrivaldelaygroups DECIMAL(8,2), arrtimeblk VARCHAR(20), cancelled " |
65 | "DECIMAL(8,2), cancellationcode CHAR(1), diverted DECIMAL(8,2), crselapsedtime DECIMAL(8,2), actualelapsedtime " |
66 | "DECIMAL(8,2), airtime DECIMAL(8,2), flights DECIMAL(8,2), distance DECIMAL(8,2), distancegroup DECIMAL(8,2), " |
67 | "carrierdelay DECIMAL(8,2), weatherdelay DECIMAL(8,2), nasdelay DECIMAL(8,2), securitydelay DECIMAL(8,2), " |
68 | "lateaircraftdelay DECIMAL(8,2), firstdeptime VARCHAR(10), totaladdgtime VARCHAR(10), longestaddgtime " |
69 | "VARCHAR(10), divairportlandings VARCHAR(10), divreacheddest VARCHAR(10), divactualelapsedtime VARCHAR(10), " |
70 | "divarrdelay VARCHAR(10), divdistance VARCHAR(10), div1airport VARCHAR(10), div1aiportid INTEGER, " |
71 | "div1airportseqid INTEGER, div1wheelson VARCHAR(10), div1totalgtime VARCHAR(10), div1longestgtime VARCHAR(10), " |
72 | "div1wheelsoff VARCHAR(10), div1tailnum VARCHAR(10), div2airport VARCHAR(10), div2airportid INTEGER, " |
73 | "div2airportseqid INTEGER, div2wheelson VARCHAR(10), div2totalgtime VARCHAR(10), div2longestgtime VARCHAR(10), " |
74 | "div2wheelsoff VARCHAR(10), div2tailnum VARCHAR(10), div3airport VARCHAR(10), div3airportid INTEGER, " |
75 | "div3airportseqid INTEGER, div3wheelson VARCHAR(10), div3totalgtime VARCHAR(10), div3longestgtime VARCHAR(10), " |
76 | "div3wheelsoff VARCHAR(10), div3tailnum VARCHAR(10), div4airport VARCHAR(10), div4airportid INTEGER, " |
77 | "div4airportseqid INTEGER, div4wheelson VARCHAR(10), div4totalgtime VARCHAR(10), div4longestgtime VARCHAR(10), " |
78 | "div4wheelsoff VARCHAR(10), div4tailnum VARCHAR(10), div5airport VARCHAR(10), div5airportid INTEGER, " |
79 | "div5airportseqid INTEGER, div5wheelson VARCHAR(10), div5totalgtime VARCHAR(10), div5longestgtime VARCHAR(10), " |
80 | "div5wheelsoff VARCHAR(10), div5tailnum VARCHAR(10));" )); |
81 | |
82 | result = con.Query("COPY ontime FROM '" + ontime_csv + "' (FORMAT CSV_AUTO);" ); |
83 | REQUIRE(CHECK_COLUMN(result, 0, {9})); |
84 | |
85 | result = con.Query("SELECT year, uniquecarrier, origin, origincityname, div5longestgtime FROM ontime;" ); |
86 | REQUIRE(CHECK_COLUMN(result, 0, {1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988})); |
87 | REQUIRE(CHECK_COLUMN(result, 1, {"AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" })); |
88 | REQUIRE(CHECK_COLUMN(result, 2, {"JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" })); |
89 | REQUIRE(CHECK_COLUMN(result, 3, |
90 | {"New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" , |
91 | "New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" })); |
92 | REQUIRE(CHECK_COLUMN(result, 4, {Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value()})); |
93 | } |
94 | |
95 | TEST_CASE("Test read_csv_auto from web_page csv" , "[copy]" ) { |
96 | FileSystem fs; |
97 | unique_ptr<QueryResult> result; |
98 | DuckDB db(nullptr); |
99 | Connection con(db); |
100 | |
101 | auto csv_path = GetCSVPath(); |
102 | auto webpage_csv = fs.JoinPath(csv_path, "web_page.csv" ); |
103 | WriteBinary(webpage_csv, web_page, sizeof(web_page)); |
104 | |
105 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE web_page AS SELECT * FROM read_csv_auto ('" + webpage_csv + "');" )); |
106 | |
107 | result = con.Query("SELECT COUNT(*) FROM web_page;" ); |
108 | REQUIRE(CHECK_COLUMN(result, 0, {60})); |
109 | |
110 | result = con.Query("SELECT * FROM web_page ORDER BY column00 LIMIT 3;" ); |
111 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
112 | REQUIRE(CHECK_COLUMN(result, 1, {"AAAAAAAABAAAAAAA" , "AAAAAAAACAAAAAAA" , "AAAAAAAACAAAAAAA" })); |
113 | REQUIRE(CHECK_COLUMN(result, 2, {Value::DATE(1997, 9, 3), Value::DATE(1997, 9, 3), Value::DATE(2000, 9, 3)})); |
114 | REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value::DATE(2000, 9, 2), Value()})); |
115 | REQUIRE(CHECK_COLUMN(result, 4, {2450810, 2450814, 2450814})); |
116 | REQUIRE(CHECK_COLUMN(result, 5, {2452620, 2452580, 2452611})); |
117 | REQUIRE(CHECK_COLUMN(result, 6, {"Y" , "N" , "N" })); |
118 | REQUIRE(CHECK_COLUMN(result, 7, {98539, Value(), Value()})); |
119 | REQUIRE(CHECK_COLUMN(result, 8, {"http://www.foo.com" , "http://www.foo.com" , "http://www.foo.com" })); |
120 | REQUIRE(CHECK_COLUMN(result, 9, {"welcome" , "protected" , "feedback" })); |
121 | REQUIRE(CHECK_COLUMN(result, 10, {2531, 1564, 1564})); |
122 | REQUIRE(CHECK_COLUMN(result, 11, {8, 4, 4})); |
123 | REQUIRE(CHECK_COLUMN(result, 12, {3, 3, 3})); |
124 | REQUIRE(CHECK_COLUMN(result, 13, {4, 1, 4})); |
125 | } |
126 | |
127 | TEST_CASE("Test read_csv_auto from greek-utf8 csv" , "[copy]" ) { |
128 | FileSystem fs; |
129 | unique_ptr<QueryResult> result; |
130 | DuckDB db(nullptr); |
131 | Connection con(db); |
132 | |
133 | auto csv_path = GetCSVPath(); |
134 | auto csv_file = fs.JoinPath(csv_path, "greek_utf8.csv" ); |
135 | WriteBinary(csv_file, greek_utf8, sizeof(greek_utf8)); |
136 | |
137 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE greek_utf8 AS SELECT * FROM read_csv_auto ('" + csv_file + "');" )); |
138 | |
139 | result = con.Query("SELECT COUNT(*) FROM greek_utf8;" ); |
140 | REQUIRE(CHECK_COLUMN(result, 0, {8})); |
141 | |
142 | result = con.Query("SELECT * FROM greek_utf8 ORDER BY 1;" ); |
143 | REQUIRE(CHECK_COLUMN(result, 0, {1689, 1690, 41561, 45804, 51981, 171067, 182773, 607808})); |
144 | REQUIRE(CHECK_COLUMN(result, 1, |
145 | {"\x30\x30\x69\\047\x6d" , "\x30\x30\x69\\047\x76" , "\x32\x30\x31\x35\xe2\x80\x8e" , |
146 | "\x32\x31\xcf\x80" , "\x32\x34\x68\x6f\x75\x72\x73\xe2\x80\xac" , |
147 | "\x61\x72\x64\x65\xcc\x80\x63\x68" , "\x61\xef\xac\x81" , |
148 | "\x70\x6f\x76\x65\x72\x74\x79\xe2\x80\xaa" })); |
149 | REQUIRE(CHECK_COLUMN(result, 2, {2, 2, 1, 1, 1, 2, 1, 1})); |
150 | } |
151 | |
152 | TEST_CASE("Test read_csv_auto from ncvoter csv" , "[copy]" ) { |
153 | FileSystem fs; |
154 | unique_ptr<QueryResult> result; |
155 | DuckDB db(nullptr); |
156 | Connection con(db); |
157 | |
158 | auto csv_path = GetCSVPath(); |
159 | auto ncvoter_csv = fs.JoinPath(csv_path, "ncvoter.csv" ); |
160 | WriteBinary(ncvoter_csv, ncvoter, sizeof(ncvoter)); |
161 | |
162 | REQUIRE_NO_FAIL(con.Query( |
163 | "CREATE TABLE IF NOT EXISTS ncvoters(county_id INTEGER, county_desc STRING, voter_reg_num STRING,status_cd " |
164 | "STRING, voter_status_desc STRING, reason_cd STRING, voter_status_reason_desc STRING, absent_ind STRING, " |
165 | "name_prefx_cd STRING,last_name STRING, first_name STRING, midl_name STRING, name_sufx_cd STRING, " |
166 | "full_name_rep STRING,full_name_mail STRING, house_num STRING, half_code STRING, street_dir STRING, " |
167 | "street_name STRING, street_type_cd STRING, street_sufx_cd STRING, unit_designator STRING, unit_num STRING, " |
168 | "res_city_desc STRING,state_cd STRING, zip_code STRING, res_street_address STRING, res_city_state_zip STRING, " |
169 | "mail_addr1 STRING, mail_addr2 STRING, mail_addr3 STRING, mail_addr4 STRING, mail_city STRING, mail_state " |
170 | "STRING, mail_zipcode STRING, mail_city_state_zip STRING, area_cd STRING, phone_num STRING, full_phone_number " |
171 | "STRING, drivers_lic STRING, race_code STRING, race_desc STRING, ethnic_code STRING, ethnic_desc STRING, " |
172 | "party_cd STRING, party_desc STRING, sex_code STRING, sex STRING, birth_age STRING, birth_place STRING, " |
173 | "registr_dt STRING, precinct_abbrv STRING, precinct_desc STRING,municipality_abbrv STRING, municipality_desc " |
174 | "STRING, ward_abbrv STRING, ward_desc STRING, cong_dist_abbrv STRING, cong_dist_desc STRING, super_court_abbrv " |
175 | "STRING, super_court_desc STRING, judic_dist_abbrv STRING, judic_dist_desc STRING, nc_senate_abbrv STRING, " |
176 | "nc_senate_desc STRING, nc_house_abbrv STRING, nc_house_desc STRING,county_commiss_abbrv STRING, " |
177 | "county_commiss_desc STRING, township_abbrv STRING, township_desc STRING,school_dist_abbrv STRING, " |
178 | "school_dist_desc STRING, fire_dist_abbrv STRING, fire_dist_desc STRING, water_dist_abbrv STRING, " |
179 | "water_dist_desc STRING, sewer_dist_abbrv STRING, sewer_dist_desc STRING, sanit_dist_abbrv STRING, " |
180 | "sanit_dist_desc STRING, rescue_dist_abbrv STRING, rescue_dist_desc STRING, munic_dist_abbrv STRING, " |
181 | "munic_dist_desc STRING, dist_1_abbrv STRING, dist_1_desc STRING, dist_2_abbrv STRING, dist_2_desc STRING, " |
182 | "confidential_ind STRING, age STRING, ncid STRING, vtd_abbrv STRING, vtd_desc STRING);" )); |
183 | result = con.Query("COPY ncvoters FROM '" + ncvoter_csv + "' (FORMAT CSV_AUTO);" ); |
184 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
185 | |
186 | result = con.Query("SELECT county_id, county_desc, vtd_desc, name_prefx_cd FROM ncvoters;" ); |
187 | REQUIRE(CHECK_COLUMN(result, 0, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1})); |
188 | REQUIRE(CHECK_COLUMN(result, 1, |
189 | {"ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , |
190 | "ALAMANCE" , "ALAMANCE" , "ALAMANCE" })); |
191 | REQUIRE(CHECK_COLUMN(result, 2, {"09S" , "09S" , "03W" , "09S" , "1210" , "035" , "124" , "06E" , "035" , "064" })); |
192 | REQUIRE(CHECK_COLUMN(result, 3, |
193 | {Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value()})); |
194 | } |
195 | |
196 | TEST_CASE("Test read_csv_auto from imdb csv" , "[copy]" ) { |
197 | FileSystem fs; |
198 | unique_ptr<QueryResult> result; |
199 | DuckDB db(nullptr); |
200 | Connection con(db); |
201 | |
202 | auto csv_path = GetCSVPath(); |
203 | auto imdb_movie_info = fs.JoinPath(csv_path, "imdb_movie_info.csv" ); |
204 | WriteBinary(imdb_movie_info, imdb_movie_info_escaped, sizeof(imdb_movie_info_escaped)); |
205 | |
206 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE movie_info AS SELECT * FROM read_csv_auto ('" + imdb_movie_info + "');" )); |
207 | |
208 | result = con.Query("SELECT COUNT(*) FROM movie_info;" ); |
209 | REQUIRE(CHECK_COLUMN(result, 0, {201})); |
210 | } |
211 | |
212 | TEST_CASE("Test read_csv_auto from cranlogs broken gzip" , "[copy][.]" ) { |
213 | FileSystem fs; |
214 | unique_ptr<QueryResult> result; |
215 | DuckDB db(nullptr); |
216 | Connection con(db); |
217 | |
218 | auto csv_path = GetCSVPath(); |
219 | auto cranlogs_csv = fs.JoinPath(csv_path, "cranlogs.csv.gz" ); |
220 | WriteBinary(cranlogs_csv, tmp2013_06_15, sizeof(tmp2013_06_15)); |
221 | |
222 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE cranlogs AS SELECT * FROM read_csv_auto ('" + cranlogs_csv + "');" )); |
223 | |
224 | result = con.Query("SELECT COUNT(*) FROM cranlogs;" ); |
225 | REQUIRE(CHECK_COLUMN(result, 0, {37459})); |
226 | } |
227 | |
228 | TEST_CASE("Test csv dialect detection" , "[copy]" ) { |
229 | FileSystem fs; |
230 | unique_ptr<QueryResult> result; |
231 | DuckDB db(nullptr); |
232 | Connection con(db); |
233 | |
234 | auto csv_path = GetCSVPath(); |
235 | |
236 | // generate CSV file with RFC-conform dialect |
237 | ofstream csv_file(fs.JoinPath(csv_path, "test.csv" )); |
238 | csv_file << "123,TEST1,one space" << endl; |
239 | csv_file << "345,TEST1,trailing_space " << endl; |
240 | csv_file << "567,TEST1,no_space" << endl; |
241 | csv_file.close(); |
242 | |
243 | REQUIRE_NO_FAIL( |
244 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
245 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
246 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
247 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST1" , "TEST1" , "TEST1" })); |
248 | REQUIRE(CHECK_COLUMN(result, 2, {"one space" , "trailing_space " , "no_space" })); |
249 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
250 | |
251 | // generate CSV file with RFC-conform dialect quote |
252 | ofstream csv_file2(fs.JoinPath(csv_path, "test.csv" )); |
253 | csv_file2 << "123,TEST2,\"one space\"" << endl; |
254 | csv_file2 << "345,TEST2,\"trailing_space, \"" << endl; |
255 | csv_file2 << "567,TEST2,\"no\"\"space\"" << endl; |
256 | csv_file2.close(); |
257 | |
258 | REQUIRE_NO_FAIL( |
259 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
260 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
261 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
262 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST2" , "TEST2" , "TEST2" })); |
263 | REQUIRE(CHECK_COLUMN(result, 2, {"one space" , "trailing_space, " , "no\"space" })); |
264 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
265 | |
266 | // generate CSV file with RFC-conform dialect quote/leading space of numerics |
267 | ofstream csv_file3(fs.JoinPath(csv_path, "test.csv" )); |
268 | csv_file3 << "123,TEST3,text1" << endl; |
269 | csv_file3 << "\"345\",TEST3,text2" << endl; |
270 | csv_file3 << " 567,TEST3,text3" << endl; |
271 | csv_file3.close(); |
272 | |
273 | REQUIRE_NO_FAIL( |
274 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
275 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
276 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
277 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST3" , "TEST3" , "TEST3" })); |
278 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" , "text3" })); |
279 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
280 | |
281 | // generate CSV file with bar delimiter |
282 | ofstream csv_file4(fs.JoinPath(csv_path, "test.csv" )); |
283 | csv_file4 << "123|TEST4|text1" << endl; |
284 | csv_file4 << "345|TEST4|text2" << endl; |
285 | csv_file4 << "567|TEST4|text3" << endl; |
286 | csv_file4.close(); |
287 | |
288 | REQUIRE_NO_FAIL( |
289 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
290 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
291 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
292 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST4" , "TEST4" , "TEST4" })); |
293 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" , "text3" })); |
294 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
295 | |
296 | // generate CSV file with bar delimiter and double quotes |
297 | ofstream csv_file5(fs.JoinPath(csv_path, "test.csv" )); |
298 | csv_file5 << "123|TEST5|text1" << endl; |
299 | csv_file5 << "345|TEST5|\"text2|\"" << endl; |
300 | csv_file5 << "567|TEST5|text3" << endl; |
301 | csv_file5.close(); |
302 | |
303 | REQUIRE_NO_FAIL( |
304 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
305 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
306 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
307 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST5" , "TEST5" , "TEST5" })); |
308 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2|" , "text3" })); |
309 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
310 | |
311 | // generate CSV file with bar delimiter and double quotes and double escape |
312 | ofstream csv_file6(fs.JoinPath(csv_path, "test.csv" )); |
313 | csv_file6 << "123|TEST6|text1" << endl; |
314 | csv_file6 << "345|TEST6|\"text\"\"2\"\"text\"" << endl; |
315 | csv_file6 << "\"567\"|TEST6|text3" << endl; |
316 | csv_file6.close(); |
317 | |
318 | REQUIRE_NO_FAIL( |
319 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
320 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
321 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
322 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST6" , "TEST6" , "TEST6" })); |
323 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text\"2\"text" , "text3" })); |
324 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
325 | |
326 | // generate CSV file with bar delimiter and double quotes and backslash escape |
327 | ofstream csv_file7(fs.JoinPath(csv_path, "test.csv" )); |
328 | csv_file7 << "123|TEST7|text1" << endl; |
329 | csv_file7 << "345|TEST7|\"text\\\"2\\\"\"" << endl; |
330 | csv_file7 << "\"567\"|TEST7|text3" << endl; |
331 | csv_file7.close(); |
332 | |
333 | REQUIRE_NO_FAIL( |
334 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
335 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
336 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
337 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST7" , "TEST7" , "TEST7" })); |
338 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text\"2\"" , "text3" })); |
339 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
340 | |
341 | // generate CSV file with bar delimiter and single quotes and backslash escape |
342 | ofstream csv_file8(fs.JoinPath(csv_path, "test.csv" )); |
343 | csv_file8 << "123|TEST8|text1" << endl; |
344 | csv_file8 << "345|TEST8|'text\\'2\\'text'" << endl; |
345 | csv_file8 << "'567'|TEST8|text3" << endl; |
346 | csv_file8.close(); |
347 | |
348 | REQUIRE_NO_FAIL( |
349 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
350 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
351 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
352 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST8" , "TEST8" , "TEST8" })); |
353 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text'2'text" , "text3" })); |
354 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
355 | |
356 | // generate CSV file with semicolon delimiter |
357 | ofstream csv_file9(fs.JoinPath(csv_path, "test.csv" )); |
358 | csv_file9 << "123;TEST9;text1" << endl; |
359 | csv_file9 << "345;TEST9;text2" << endl; |
360 | csv_file9 << "567;TEST9;text3" << endl; |
361 | csv_file9.close(); |
362 | |
363 | REQUIRE_NO_FAIL( |
364 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
365 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
366 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
367 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST9" , "TEST9" , "TEST9" })); |
368 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" , "text3" })); |
369 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
370 | |
371 | // generate CSV file with semicolon delimiter and double quotes |
372 | ofstream csv_file10(fs.JoinPath(csv_path, "test.csv" )); |
373 | csv_file10 << "123;TEST10;text1" << endl; |
374 | csv_file10 << "\"345\";TEST10;text2" << endl; |
375 | csv_file10 << "567;TEST10;\"te;xt3\"" << endl; |
376 | csv_file10.close(); |
377 | |
378 | REQUIRE_NO_FAIL( |
379 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
380 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
381 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
382 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST10" , "TEST10" , "TEST10" })); |
383 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" , "te;xt3" })); |
384 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
385 | |
386 | // generate CSV file with semicolon delimiter, double quotes and RFC escape |
387 | ofstream csv_file11(fs.JoinPath(csv_path, "test.csv" )); |
388 | csv_file11 << "123;TEST11;text1" << endl; |
389 | csv_file11 << "\"345\";TEST11;text2" << endl; |
390 | csv_file11 << "567;TEST11;\"te\"\"xt3\"" << endl; |
391 | csv_file11.close(); |
392 | |
393 | REQUIRE_NO_FAIL( |
394 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
395 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
396 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
397 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST11" , "TEST11" , "TEST11" })); |
398 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" , "te\"xt3" })); |
399 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
400 | |
401 | // generate CSV file with tab delimiter |
402 | ofstream csv_file12(fs.JoinPath(csv_path, "test.csv" )); |
403 | csv_file12 << "123\tTEST12\ttext1" << endl; |
404 | csv_file12 << "345\tTEST12\ttext2" << endl; |
405 | csv_file12 << "567\tTEST12\ttext3" << endl; |
406 | csv_file12.close(); |
407 | |
408 | REQUIRE_NO_FAIL( |
409 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
410 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
411 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
412 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST12" , "TEST12" , "TEST12" })); |
413 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" , "text3" })); |
414 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
415 | |
416 | // generate CSV file with tab delimiter and single quotes |
417 | ofstream csv_file13(fs.JoinPath(csv_path, "test.csv" )); |
418 | csv_file13 << "123\tTEST13\ttext1" << endl; |
419 | csv_file13 << "345\tTEST13\t'te\txt2'" << endl; |
420 | csv_file13 << "'567'\tTEST13\ttext3" << endl; |
421 | csv_file13.close(); |
422 | |
423 | REQUIRE_NO_FAIL( |
424 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
425 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
426 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
427 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST13" , "TEST13" , "TEST13" })); |
428 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "te\txt2" , "text3" })); |
429 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
430 | |
431 | // generate CSV file with tab delimiter and single quotes without type-hint |
432 | ofstream csv_file14(fs.JoinPath(csv_path, "test.csv" )); |
433 | csv_file14 << "123\tTEST14\ttext1" << endl; |
434 | csv_file14 << "345\tTEST14\t'te\txt2'" << endl; |
435 | csv_file14 << "567\tTEST14\ttext3" << endl; |
436 | csv_file14.close(); |
437 | |
438 | REQUIRE_NO_FAIL( |
439 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
440 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
441 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 567})); |
442 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST14" , "TEST14" , "TEST14" })); |
443 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "te\txt2" , "text3" })); |
444 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
445 | } |
446 | |
447 | TEST_CASE("Test csv header detection" , "[copy]" ) { |
448 | FileSystem fs; |
449 | unique_ptr<QueryResult> result; |
450 | DuckDB db(nullptr); |
451 | Connection con(db); |
452 | |
453 | auto csv_path = GetCSVPath(); |
454 | |
455 | // generate CSV file with two lines, none header |
456 | ofstream csv_file(fs.JoinPath(csv_path, "test.csv" )); |
457 | csv_file << "123.0,TEST1,2000-12-12" << endl; |
458 | csv_file << "345.0,TEST1,2000-12-13" << endl; |
459 | csv_file.close(); |
460 | |
461 | REQUIRE_NO_FAIL( |
462 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
463 | result = con.Query("SELECT column0, column1, column2 FROM test ORDER BY column0;" ); |
464 | REQUIRE(CHECK_COLUMN(result, 0, {123.0, 345.0})); |
465 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST1" , "TEST1" })); |
466 | REQUIRE(CHECK_COLUMN(result, 2, {Value::DATE(2000, 12, 12), Value::DATE(2000, 12, 13)})); |
467 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
468 | |
469 | // generate CSV file with two lines, one header |
470 | ofstream csv_file2(fs.JoinPath(csv_path, "test.csv" )); |
471 | csv_file2 << "number,text,date" << endl; |
472 | csv_file2 << "345.0,TEST2,2000-12-13" << endl; |
473 | csv_file2.close(); |
474 | |
475 | REQUIRE_NO_FAIL( |
476 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
477 | result = con.Query("SELECT number, text, date FROM test ORDER BY number;" ); |
478 | REQUIRE(CHECK_COLUMN(result, 0, {345.0})); |
479 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST2" })); |
480 | REQUIRE(CHECK_COLUMN(result, 2, {Value::DATE(2000, 12, 13)})); |
481 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
482 | |
483 | // generate CSV file with three lines, one header, one skip row |
484 | ofstream csv_file3(fs.JoinPath(csv_path, "test.csv" )); |
485 | csv_file3 << "some notes..." << endl; |
486 | csv_file3 << "number,text,date" << endl; |
487 | csv_file3 << "345.0,TEST3,2000-12-13" << endl; |
488 | csv_file3.close(); |
489 | |
490 | REQUIRE_NO_FAIL( |
491 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
492 | result = con.Query("SELECT number, text, date FROM test ORDER BY number;" ); |
493 | REQUIRE(CHECK_COLUMN(result, 0, {345.0})); |
494 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST3" })); |
495 | REQUIRE(CHECK_COLUMN(result, 2, {Value::DATE(2000, 12, 13)})); |
496 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
497 | |
498 | // generate CSV file with three lines, one header, two skip rows |
499 | ofstream csv_file4(fs.JoinPath(csv_path, "test.csv" )); |
500 | csv_file4 << "some notes..." << endl; |
501 | csv_file4 << "more notes,..." << endl; |
502 | csv_file4 << "number,text,date" << endl; |
503 | csv_file4 << "345.0,TEST4,2000-12-13" << endl; |
504 | csv_file4.close(); |
505 | |
506 | REQUIRE_NO_FAIL( |
507 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
508 | result = con.Query("SELECT number, text, date FROM test ORDER BY number;" ); |
509 | REQUIRE(CHECK_COLUMN(result, 0, {345.0})); |
510 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST4" })); |
511 | REQUIRE(CHECK_COLUMN(result, 2, {Value::DATE(2000, 12, 13)})); |
512 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
513 | |
514 | // generate CSV file with two lines both only strings |
515 | ofstream csv_file5(fs.JoinPath(csv_path, "test.csv" )); |
516 | csv_file5 << "Alice,StreetA,TEST5" << endl; |
517 | csv_file5 << "Bob,StreetB,TEST5" << endl; |
518 | csv_file5.close(); |
519 | |
520 | REQUIRE_NO_FAIL( |
521 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
522 | result = con.Query("SELECT * FROM test ORDER BY column0;" ); |
523 | REQUIRE(CHECK_COLUMN(result, 0, {"Alice" , "Bob" })); |
524 | REQUIRE(CHECK_COLUMN(result, 1, {"StreetA" , "StreetB" })); |
525 | REQUIRE(CHECK_COLUMN(result, 2, {"TEST5" , "TEST5" })); |
526 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
527 | |
528 | // generate CSV file with one line, two columsn, only strings |
529 | ofstream csv_file6(fs.JoinPath(csv_path, "test.csv" )); |
530 | csv_file6 << "Alice,StreetA" << endl; |
531 | csv_file6.close(); |
532 | |
533 | REQUIRE_NO_FAIL( |
534 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
535 | result = con.Query("SELECT column0, column1 FROM test ORDER BY column0;" ); |
536 | REQUIRE(CHECK_COLUMN(result, 0, {"Alice" })); |
537 | REQUIRE(CHECK_COLUMN(result, 1, {"StreetA" })); |
538 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
539 | |
540 | // generate CSV file with one line, two columns - one numeric, one string |
541 | ofstream csv_file7(fs.JoinPath(csv_path, "test.csv" )); |
542 | csv_file7 << "1,StreetA" << endl; |
543 | csv_file7.close(); |
544 | |
545 | REQUIRE_NO_FAIL( |
546 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
547 | result = con.Query("SELECT column0, column1 FROM test ORDER BY column0;" ); |
548 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
549 | REQUIRE(CHECK_COLUMN(result, 1, {"StreetA" })); |
550 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
551 | |
552 | // generate CSV file with one line, one string column |
553 | ofstream csv_file8(fs.JoinPath(csv_path, "test.csv" )); |
554 | csv_file8 << "Test" << endl; |
555 | csv_file8.close(); |
556 | |
557 | REQUIRE_NO_FAIL( |
558 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
559 | result = con.Query("SELECT * FROM test;" ); |
560 | REQUIRE(CHECK_COLUMN(result, 0, {"Test" })); |
561 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
562 | |
563 | // generate CSV file with one line, one numeric column |
564 | ofstream csv_file9(fs.JoinPath(csv_path, "test.csv" )); |
565 | csv_file9 << "1" << endl; |
566 | csv_file9.close(); |
567 | |
568 | REQUIRE_NO_FAIL( |
569 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
570 | result = con.Query("SELECT * FROM test;" ); |
571 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
572 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
573 | } |
574 | |
575 | TEST_CASE("Test csv header completion" , "[copy]" ) { |
576 | FileSystem fs; |
577 | unique_ptr<QueryResult> result; |
578 | DuckDB db(nullptr); |
579 | Connection con(db); |
580 | |
581 | auto csv_path = GetCSVPath(); |
582 | |
583 | // generate CSV file with one missing header |
584 | ofstream csv_file(fs.JoinPath(csv_path, "test.csv" )); |
585 | csv_file << "a,,c" << endl; |
586 | csv_file << "123,TEST1,text1" << endl; |
587 | csv_file << "345,TEST1,text2" << endl; |
588 | csv_file.close(); |
589 | |
590 | REQUIRE_NO_FAIL( |
591 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
592 | result = con.Query("SELECT a, column1, c FROM test ORDER BY a;" ); |
593 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345})); |
594 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST1" , "TEST1" })); |
595 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" })); |
596 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
597 | |
598 | // generate CSV file with one duplicate header |
599 | ofstream csv_file2(fs.JoinPath(csv_path, "test.csv" )); |
600 | csv_file2 << "a,b,a" << endl; |
601 | csv_file2 << "123,TEST2,text1" << endl; |
602 | csv_file2 << "345,TEST2,text2" << endl; |
603 | csv_file2.close(); |
604 | |
605 | REQUIRE_NO_FAIL( |
606 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
607 | result = con.Query("SELECT a_0, b, a_1 FROM test ORDER BY a_0;" ); |
608 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345})); |
609 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST2" , "TEST2" })); |
610 | REQUIRE(CHECK_COLUMN(result, 2, {"text1" , "text2" })); |
611 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
612 | |
613 | // generate CSV file with all column names missing |
614 | ofstream csv_file3(fs.JoinPath(csv_path, "test.csv" )); |
615 | csv_file3 << ",," << endl; |
616 | csv_file3 << "123,TEST3,text1" << endl; |
617 | csv_file3 << "345,TEST3,text2" << endl; |
618 | csv_file3.close(); |
619 | |
620 | REQUIRE_NO_FAIL( |
621 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
622 | REQUIRE_NO_FAIL(con.Query("SELECT column0, column1, column2 FROM test ORDER BY column0;" )); |
623 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
624 | |
625 | // generate CSV file with 12 columns and all but one column name missing |
626 | ofstream csv_file4(fs.JoinPath(csv_path, "test.csv" )); |
627 | csv_file4 << "a,,,,,,,,,,,," << endl; |
628 | csv_file4 << "123,TEST2,text1,,,,,,,,,,value1" << endl; |
629 | csv_file4 << "345,TEST2,text2,,,,,,,,,,value2" << endl; |
630 | csv_file4.close(); |
631 | |
632 | REQUIRE_NO_FAIL( |
633 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
634 | REQUIRE_NO_FAIL(con.Query("SELECT a, column01, column12 FROM test;" )); |
635 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
636 | |
637 | // generate CSV file with 12 equally called columns |
638 | ofstream csv_file5(fs.JoinPath(csv_path, "test.csv" )); |
639 | csv_file5 << "a,a,a,a,a,a,a,a,a,a,a,a," << endl; |
640 | csv_file5 << "123,TEST2,text1,,,,,,,,,,value1" << endl; |
641 | csv_file5 << "345,TEST2,text2,,,,,,,,,,value2" << endl; |
642 | csv_file5.close(); |
643 | |
644 | REQUIRE_NO_FAIL( |
645 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
646 | REQUIRE_NO_FAIL(con.Query("SELECT a_00, a_08, a_09, column12 FROM test;" )); |
647 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
648 | |
649 | // generate CSV file with 10 equally called columns, one named column12 and column 11 and 12 missing |
650 | ofstream csv_file6(fs.JoinPath(csv_path, "test.csv" )); |
651 | csv_file6 << "a,a,a,a,a,a,a,a,a,a,column12,," << endl; |
652 | csv_file6 << "123,TEST2,text1,,,,,,,,,,value1" << endl; |
653 | csv_file6 << "345,TEST2,text2,,,,,,,,,,value2" << endl; |
654 | csv_file6.close(); |
655 | |
656 | REQUIRE_NO_FAIL( |
657 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
658 | REQUIRE_NO_FAIL(con.Query("SELECT a_0, a_8, a_9, column12_0, column11, column12_1 FROM test;" )); |
659 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
660 | } |
661 | |
662 | TEST_CASE("Test csv type detection" , "[copy]" ) { |
663 | FileSystem fs; |
664 | unique_ptr<QueryResult> result; |
665 | DuckDB db(nullptr); |
666 | Connection con(db); |
667 | |
668 | auto csv_path = GetCSVPath(); |
669 | |
670 | idx_t line_count = 9000; |
671 | |
672 | // generate a CSV file with many strings |
673 | ofstream csv_file(fs.JoinPath(csv_path, "test.csv" )); |
674 | csv_file << "linenr|mixed_string|mixed_double" << endl; |
675 | for (idx_t i = 0; i <= line_count; i++) { |
676 | csv_file << i * 3 + 1 << "|1|1" << endl; |
677 | |
678 | if (i < line_count / 3 || i > line_count * 2 / 3) { |
679 | csv_file << i * 3 + 2 << "|2|2" << endl; |
680 | } else { |
681 | csv_file << i * 3 + 2 << "|TEST|3.5" << endl; |
682 | } |
683 | |
684 | if (i < line_count / 2) { |
685 | csv_file << i * 3 + 3 << "|3|3" << endl; |
686 | } else { |
687 | csv_file << i * 3 + 3 << "|3|3.5" << endl; |
688 | } |
689 | } |
690 | csv_file.close(); |
691 | |
692 | REQUIRE_NO_FAIL( |
693 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
694 | result = con.Query("SELECT linenr, mixed_string, mixed_double FROM test LIMIT 3;" ); |
695 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
696 | REQUIRE(CHECK_COLUMN(result, 1, {"1" , "2" , "3" })); |
697 | REQUIRE(CHECK_COLUMN(result, 2, {1.0, 2.0, 3.0})); |
698 | |
699 | result = con.Query("SELECT linenr, mixed_string, mixed_double FROM test WHERE linenr > 27000 LIMIT 3;" ); |
700 | REQUIRE(CHECK_COLUMN(result, 0, {27001, 27002, 27003})); |
701 | REQUIRE(CHECK_COLUMN(result, 1, {"1" , "2" , "3" })); |
702 | REQUIRE(CHECK_COLUMN(result, 2, {1.0, 2.0, 3.5})); |
703 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
704 | |
705 | // generate a CSV file containing time, date and timestamp columns |
706 | ofstream csv_file2(fs.JoinPath(csv_path, "test.csv" )); |
707 | csv_file2 << "a,b,t,d,ts" << endl; |
708 | csv_file2 << "123,TEST2,12:12:12,2000-01-01,2000-01-01 12:12:00" << endl; |
709 | csv_file2 << "345,TEST2,14:15:30,2002-02-02,2002-02-02 14:15:00" << endl; |
710 | csv_file2 << "346,TEST2,15:16:17,2004-12-13,2004-12-13 15:16:00" << endl; |
711 | csv_file2.close(); |
712 | |
713 | REQUIRE_NO_FAIL( |
714 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
715 | result = con.Query("SELECT a, b, t, d, ts FROM test ORDER BY a;" ); |
716 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 346})); |
717 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST2" , "TEST2" , "TEST2" })); |
718 | REQUIRE( |
719 | CHECK_COLUMN(result, 2, {Value::TIME(12, 12, 12, 0), Value::TIME(14, 15, 30, 0), Value::TIME(15, 16, 17, 0)})); |
720 | REQUIRE(CHECK_COLUMN(result, 3, {Value::DATE(2000, 01, 01), Value::DATE(2002, 02, 02), Value::DATE(2004, 12, 13)})); |
721 | REQUIRE(CHECK_COLUMN(result, 4, |
722 | {Value::TIMESTAMP(2000, 01, 01, 12, 12, 0, 0), Value::TIMESTAMP(2002, 02, 02, 14, 15, 0, 0), |
723 | Value::TIMESTAMP(2004, 12, 13, 15, 16, 0, 0)})); |
724 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
725 | |
726 | // generate a CSV file containing time and date columns with leading/trailing chars |
727 | ofstream csv_file3(fs.JoinPath(csv_path, "test.csv" )); |
728 | csv_file3 << "a,b,t,tf,d,df" << endl; |
729 | csv_file3 << "123,TEST2,\" 12:12:12 \",\"12:12:12\",\" 2000-01-01 \",\"2000-01-01\"" << endl; |
730 | csv_file3 << "345,TEST2,\" 14:15:30\",\"14:15:30\",\" 2002-02-02 \",\"2000-01-01 a\"" << endl; |
731 | csv_file3 << "346,TEST2,\" 15:16:17 \",\"15:16:17 01\",\" 2004-12-13 \",\"2000-01-01\"" << endl; |
732 | csv_file3.close(); |
733 | |
734 | REQUIRE_NO_FAIL( |
735 | con.Query("CREATE TABLE test AS SELECT * FROM read_csv_auto ('" + fs.JoinPath(csv_path, "test.csv" ) + "');" )); |
736 | result = con.Query("SELECT a, b, t, tf, d, df FROM test ORDER BY a;" ); |
737 | REQUIRE(CHECK_COLUMN(result, 0, {123, 345, 346})); |
738 | REQUIRE(CHECK_COLUMN(result, 1, {"TEST2" , "TEST2" , "TEST2" })); |
739 | REQUIRE( |
740 | CHECK_COLUMN(result, 2, {Value::TIME(12, 12, 12, 0), Value::TIME(14, 15, 30, 0), Value::TIME(15, 16, 17, 0)})); |
741 | REQUIRE(CHECK_COLUMN(result, 3, {"12:12:12" , "14:15:30" , "15:16:17 01" })); |
742 | REQUIRE(CHECK_COLUMN(result, 4, {Value::DATE(2000, 01, 01), Value::DATE(2002, 02, 02), Value::DATE(2004, 12, 13)})); |
743 | REQUIRE(CHECK_COLUMN(result, 5, {"2000-01-01" , "2000-01-01 a" , "2000-01-01" })); |
744 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
745 | } |
746 | |
747 | #endif |
748 | |