1#include "catch.hpp"
2#include "duckdb/common/file_system.hpp"
3#include "duckdb/common/types/date.hpp"
4#include "test_csv_header.hpp"
5#include "test_helpers.hpp"
6
7#include <fstream>
8
9using namespace duckdb;
10using namespace std;
11
12TEST_CASE("Test copy statement", "[copy]") {
13 FileSystem fs;
14 unique_ptr<QueryResult> result;
15 DuckDB db(nullptr);
16 Connection con(db);
17
18 auto csv_path = GetCSVPath();
19
20 // generate CSV file with ',' as delimiter and complex strings
21 ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv"));
22 for (int i = 0; i < 5000; i++) {
23 from_csv_file << i << "," << i << ", test" << endl;
24 }
25 from_csv_file.close();
26
27 // load CSV file into a table
28 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b INTEGER, c VARCHAR(10));"));
29 result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv") + "';");
30 REQUIRE(CHECK_COLUMN(result, 0, {5000}));
31
32 result = con.Query("SELECT COUNT(a), SUM(a) FROM test;");
33 REQUIRE(CHECK_COLUMN(result, 0, {5000}));
34 REQUIRE(CHECK_COLUMN(result, 1, {12497500}));
35
36 result = con.Query("SELECT * FROM test ORDER BY 1 LIMIT 3;");
37 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2}));
38 REQUIRE(CHECK_COLUMN(result, 1, {0, 1, 2}));
39 REQUIRE(CHECK_COLUMN(result, 2, {" test", " test", " test"}));
40
41 // create CSV file from table
42 result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test2.csv") + "';");
43 REQUIRE(CHECK_COLUMN(result, 0, {5000}));
44 // load the same CSV file back again
45 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test2 (a INTEGER, b INTEGER, c VARCHAR(10));"));
46 result = con.Query("COPY test2 FROM '" + fs.JoinPath(csv_path, "test2.csv") + "';");
47 REQUIRE(CHECK_COLUMN(result, 0, {5000}));
48 result = con.Query("SELECT * FROM test2 ORDER BY 1 LIMIT 3;");
49 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2}));
50 REQUIRE(CHECK_COLUMN(result, 1, {0, 1, 2}));
51 REQUIRE(CHECK_COLUMN(result, 2, {" test", " test", " test"}));
52
53 // test too few rows
54 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test_too_few_rows (a INTEGER, b INTEGER, c VARCHAR, d INTEGER);"));
55 REQUIRE_FAIL(con.Query("COPY test_too_few_rows FROM '" + fs.JoinPath(csv_path, "test2.csv") + "';"));
56
57 // create CSV file from query
58 result = con.Query("COPY (SELECT a,b FROM test WHERE a < 4000) TO '" + fs.JoinPath(csv_path, "test3.csv") + "';");
59 REQUIRE(CHECK_COLUMN(result, 0, {4000}));
60 // load the same CSV file back again
61 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test3 (a INTEGER, b INTEGER);"));
62 result = con.Query("COPY test3 FROM '" + fs.JoinPath(csv_path, "test3.csv") + "';");
63 REQUIRE(CHECK_COLUMN(result, 0, {4000}));
64 result = con.Query("SELECT * FROM test3 ORDER BY 1 LIMIT 3;");
65 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2}));
66 REQUIRE(CHECK_COLUMN(result, 1, {0, 1, 2}));
67
68 // export selected columns from a table to a CSV file
69 result =
70 con.Query("COPY test (a,c) TO '" + fs.JoinPath(csv_path, "test4.csv") + "' (DELIMITER ',', HEADER false);");
71 REQUIRE(CHECK_COLUMN(result, 0, {5000}));
72
73 // import selected columns from CSV file
74 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test4 (a INTEGER, b INTEGER, c VARCHAR(10));"));
75 result = con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (DELIM ',', HEADER 0);");
76 REQUIRE(CHECK_COLUMN(result, 0, {5000}));
77 result = con.Query("SELECT * FROM test4 ORDER BY 1 LIMIT 3;");
78 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2}));
79 REQUIRE(CHECK_COLUMN(result, 1, {Value(), Value(), Value()}));
80 REQUIRE(CHECK_COLUMN(result, 2, {" test", " test", " test"}));
81
82 // unsupported type for HEADER
83 REQUIRE_FAIL(
84 con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (SEP ',', HEADER 0.2);"));
85 // empty delimiter
86 REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (SEP);"));
87 // number as delimiter
88 REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (SEP 1);"));
89 // multiple format options
90 REQUIRE_FAIL(
91 con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (FORMAT 'csv', FORMAT 'json');"));
92 // number as escape string
93 REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (ESCAPE 1);"));
94 // no escape string
95 REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (ESCAPE);"));
96 // number as quote string
97 REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (QUOTE 1);"));
98 // no quote string
99 REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (QUOTE);"));
100 // no format string
101 REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (FORMAT);"));
102 // encoding must not be empty and must have the correct parameter type and value
103 REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (ENCODING);"));
104 REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (ENCODING 42);"));
105 REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (ENCODING 'utf-42');"));
106 // don't allow for non-existant copy options
107 REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv") + "' (MAGIC '42');"));
108
109 // use a different delimiter
110 auto pipe_csv = fs.JoinPath(csv_path, "test_pipe.csv");
111 ofstream from_csv_file_pipe(pipe_csv);
112 for (int i = 0; i < 10; i++) {
113 from_csv_file_pipe << i << "|" << i << "|test" << endl;
114 }
115 from_csv_file_pipe.close();
116
117 // create new table
118 REQUIRE_NO_FAIL(con.Query("DROP TABLE test;"));
119 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b INTEGER, c VARCHAR(10));"));
120 result = con.Query("COPY test FROM '" + pipe_csv + "' (SEPARATOR '|');");
121 REQUIRE(CHECK_COLUMN(result, 0, {10}));
122
123 // throw exception if a line contains too many values
124 ofstream csv_too_many_values_file(fs.JoinPath(csv_path, "too_many_values.csv"));
125 csv_too_many_values_file << "1,2,3,4" << endl;
126 csv_too_many_values_file.close();
127 REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "too_many_values.csv") + "';"));
128
129 // test default null string
130 auto null_csv = fs.JoinPath(csv_path, "null.csv");
131 ofstream from_csv_file_null(null_csv);
132 for (int i = 0; i < 1; i++)
133 from_csv_file_null << i << "||test" << endl;
134 from_csv_file_null.close();
135 result = con.Query("COPY test FROM '" + null_csv + "' DELIMITER '|';");
136 REQUIRE(CHECK_COLUMN(result, 0, {1}));
137
138 // test invalid UTF-8
139 auto invalid_utf_csv = fs.JoinPath(csv_path, "invalid_utf.csv");
140 ofstream from_csv_file_utf(invalid_utf_csv);
141 for (int i = 0; i < 1; i++)
142 from_csv_file_utf << i << "42|42|\xe2\x82\x28" << endl;
143 from_csv_file_utf.close();
144 REQUIRE_FAIL(con.Query("COPY test FROM '" + invalid_utf_csv + "' DELIMITER '|';"));
145
146 // empty file
147 ofstream empty_file(fs.JoinPath(csv_path, "empty.csv"));
148 empty_file.close();
149
150 REQUIRE_NO_FAIL(con.Query("CREATE TABLE empty_table (a INTEGER, b INTEGER, c VARCHAR(10));"));
151 result = con.Query("COPY empty_table FROM '" + fs.JoinPath(csv_path, "empty.csv") + "';");
152 REQUIRE(CHECK_COLUMN(result, 0, {0}));
153
154 // unterminated quotes
155 ofstream unterminated_quotes_file(fs.JoinPath(csv_path, "unterminated.csv"));
156 unterminated_quotes_file << "\"hello\n\n world\n";
157 unterminated_quotes_file.close();
158 REQUIRE_NO_FAIL(con.Query("CREATE TABLE unterminated (a VARCHAR);"));
159 REQUIRE_FAIL(con.Query("COPY unterminated FROM '" + fs.JoinPath(csv_path, "unterminated.csv") + "';"));
160
161 // 1024 rows (vector size)
162 ofstream csv_vector_size(fs.JoinPath(csv_path, "vsize.csv"));
163 for (int i = 0; i < STANDARD_VECTOR_SIZE; i++) {
164 csv_vector_size << i << "," << i << ", test" << endl;
165 }
166 csv_vector_size.close();
167
168 // load CSV file into a table
169 REQUIRE_NO_FAIL(con.Query("CREATE TABLE vsize (a INTEGER, b INTEGER, c VARCHAR(10));"));
170 result = con.Query("COPY vsize FROM '" + fs.JoinPath(csv_path, "vsize.csv") + "';");
171 REQUIRE(CHECK_COLUMN(result, 0, {STANDARD_VECTOR_SIZE}));
172}
173
174TEST_CASE("Test CSV file without trailing newline", "[copy]") {
175 FileSystem fs;
176 unique_ptr<QueryResult> result;
177 DuckDB db(nullptr);
178 Connection con(db);
179
180 auto csv_path = GetCSVPath();
181
182 // no newline at end of file with simple delimiter
183 ofstream csv_no_newline(fs.JoinPath(csv_path, "no_newline.csv"));
184 for (int i = 0; i < 1024; i++) {
185 csv_no_newline << i << "," << i << ", test" << (i + 1 < 1024 ? "\n" : "");
186 }
187 csv_no_newline.close();
188
189 // load CSV file into a table
190 REQUIRE_NO_FAIL(con.Query("CREATE TABLE no_newline (a INTEGER, b INTEGER, c VARCHAR(10));"));
191 result = con.Query("COPY no_newline FROM '" + fs.JoinPath(csv_path, "no_newline.csv") + "';");
192 REQUIRE(CHECK_COLUMN(result, 0, {1024}));
193
194 // no newline at end of file with unicode delimiter
195 ofstream csv_no_newline_unicode(fs.JoinPath(csv_path, "no_newline_unicode.csv"));
196 for (int i = 0; i < 1024; i++) {
197 csv_no_newline_unicode << i << "🦆" << i << "🦆 test" << (i + 1 < 1024 ? "\n" : "");
198 }
199 csv_no_newline_unicode.close();
200
201 // load CSV file into a table
202 REQUIRE_NO_FAIL(con.Query("CREATE TABLE no_newline_unicode (a INTEGER, b INTEGER, c VARCHAR(10));"));
203 result = con.Query("COPY no_newline_unicode FROM '" + fs.JoinPath(csv_path, "no_newline_unicode.csv") +
204 "' DELIMITER '🦆';");
205 REQUIRE(CHECK_COLUMN(result, 0, {1024}));
206}
207
208TEST_CASE("Test CSVs with repeating patterns in delimiter/escape/quote", "[copy]") {
209 FileSystem fs;
210 unique_ptr<QueryResult> result;
211 DuckDB db(nullptr);
212 Connection con(db);
213
214 auto csv_dir = GetCSVPath();
215 auto csv_path = fs.JoinPath(csv_dir, "abac.csv");
216
217 SECTION("ABAC delimiter") {
218 ofstream csv_stream(csv_path);
219 // this is equivalent to "AB|ABAB|"
220 csv_stream << "ABABACABABABAC";
221 csv_stream.close();
222
223 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR, c VARCHAR);"));
224 result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'ABAC';");
225 REQUIRE(CHECK_COLUMN(result, 0, {1}));
226
227 result = con.Query("SELECT * FROM abac_tbl");
228 REQUIRE(CHECK_COLUMN(result, 0, {"AB"}));
229 REQUIRE(CHECK_COLUMN(result, 1, {"ABAB"}));
230 REQUIRE(CHECK_COLUMN(result, 2, {Value()}));
231
232 // do the same but with a large unused quote specifier
233 REQUIRE_NO_FAIL(con.Query("DELETE FROM abac_tbl;"));
234 result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'ABAC' QUOTE 'ABABABABABAB';");
235 REQUIRE(CHECK_COLUMN(result, 0, {1}));
236
237 result = con.Query("SELECT * FROM abac_tbl");
238 REQUIRE(CHECK_COLUMN(result, 0, {"AB"}));
239 REQUIRE(CHECK_COLUMN(result, 1, {"ABAB"}));
240 REQUIRE(CHECK_COLUMN(result, 2, {Value()}));
241 }
242 SECTION("Mix of complex quotes/delimiters/escapes") {
243 ofstream csv_stream(csv_path);
244 // quote -> "ABAB"
245 // escape -> "ABAC"
246 // delimiter -> "ABAD"
247 // first value is an escaped quote (ABAB)
248 // second value is a quoted delimiter followed by an escaped quote
249 // third value is an escape outside of a set of quotes (interpreted as a literal value)
250 csv_stream << "ABABABACABABABABABADABABABADABACABABABABABADABAC";
251 csv_stream.close();
252
253 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR, c VARCHAR);"));
254 result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'ABAD' QUOTE 'ABAB' ESCAPE 'ABAC';");
255 REQUIRE(CHECK_COLUMN(result, 0, {1}));
256
257 result = con.Query("SELECT * FROM abac_tbl");
258 REQUIRE(CHECK_COLUMN(result, 0, {"ABAB"}));
259 REQUIRE(CHECK_COLUMN(result, 1, {"ABADABAB"}));
260 REQUIRE(CHECK_COLUMN(result, 2, {"ABAC"}));
261 }
262 SECTION("CSV terminates in the middle of quote parsing") {
263 ofstream csv_stream(csv_path);
264 csv_stream << "ABAB";
265 csv_stream.close();
266
267 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
268 result = con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABABABABAB';");
269 REQUIRE(CHECK_COLUMN(result, 0, {1}));
270
271 result = con.Query("SELECT * FROM abac_tbl");
272 REQUIRE(CHECK_COLUMN(result, 0, {"ABAB"}));
273 }
274 SECTION("Newline in the middle of quote parsing") {
275 ofstream csv_stream(csv_path);
276 csv_stream << "ABAB\nABAB";
277 csv_stream.close();
278
279 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
280 result = con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABABABABAB';");
281 REQUIRE(CHECK_COLUMN(result, 0, {2}));
282
283 result = con.Query("SELECT * FROM abac_tbl");
284 REQUIRE(CHECK_COLUMN(result, 0, {"ABAB", "ABAB"}));
285 }
286 SECTION("Simple quote terminates immediately results in error") {
287 ofstream csv_stream(csv_path);
288 csv_stream << "\"";
289 csv_stream.close();
290
291 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
292 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"';"));
293 }
294 SECTION("File ends in quoted value (simple)") {
295 ofstream csv_stream(csv_path);
296 csv_stream << "\"\"";
297 csv_stream.close();
298
299 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
300 result = con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"';");
301 REQUIRE(CHECK_COLUMN(result, 0, {1}));
302
303 result = con.Query("SELECT * FROM abac_tbl");
304 REQUIRE(CHECK_COLUMN(result, 0, {Value()}));
305 }
306 SECTION("File ends in quoted value (complex)") {
307 ofstream csv_stream(csv_path);
308 csv_stream << "\"\"";
309 csv_stream.close();
310
311 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
312 result = con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"' DELIMITER 'AAAB';");
313 REQUIRE(CHECK_COLUMN(result, 0, {1}));
314
315 result = con.Query("SELECT * FROM abac_tbl");
316 REQUIRE(CHECK_COLUMN(result, 0, {Value()}));
317 }
318 SECTION("Simple quote terminates after escape results in error") {
319 ofstream csv_stream(csv_path);
320 csv_stream << "\"\\\"";
321 csv_stream.close();
322
323 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
324 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"' ESCAPE '\\';"));
325 }
326 SECTION("Simple quote terminates after quote escape results in error") {
327 ofstream csv_stream(csv_path);
328 csv_stream << "\"\"\"";
329 csv_stream.close();
330
331 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
332 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"' ESCAPE '\"';"));
333 }
334 SECTION("Simple quote terminates after escape results in error") {
335 ofstream csv_stream(csv_path);
336 csv_stream << "\"\\";
337 csv_stream.close();
338
339 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
340 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"' ESCAPE '\\';"));
341 }
342 SECTION("Multi-byte quote terminates immediately results in error") {
343 ofstream csv_stream(csv_path);
344 csv_stream << "ABABAC";
345 csv_stream.close();
346
347 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
348 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABABAC';"));
349 }
350 SECTION("Quote followed by incomplete multi-byte delimiter") {
351 ofstream csv_stream(csv_path);
352 csv_stream << "\"\"AB";
353 csv_stream.close();
354
355 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
356 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'ABAC';"));
357 REQUIRE_NO_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'AB';"));
358 }
359 SECTION("Multi-byte quote terminates after escape results in error") {
360 ofstream csv_stream(csv_path);
361 csv_stream << "ABACABABABAC";
362 csv_stream.close();
363
364 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
365 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABAC' ESCAPE 'ABAB';"));
366 }
367 SECTION("Multi-byte quote terminates after quote escape results in error") {
368 ofstream csv_stream(csv_path);
369 csv_stream << "ABACABACABAC";
370 csv_stream.close();
371
372 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
373 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABAC' ESCAPE 'ABAC';"));
374 }
375 SECTION("Multi-byte quote terminates after escape results in error") {
376 ofstream csv_stream(csv_path);
377 csv_stream << "ABACABAB";
378 csv_stream.close();
379
380 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
381 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABAC' ESCAPE 'ABAB';"));
382 }
383 SECTION("Delimiter, quote and escape have a maximum size of 255 bytes") {
384 ofstream csv_stream(csv_path);
385 csv_stream << "ABAB";
386 csv_stream.close();
387
388 string long_string(1000, 'a');
389
390 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
391 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '" + long_string + "';"));
392 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' ESCAPE '" + long_string + "';"));
393 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER '" + long_string + "';"));
394 REQUIRE_NO_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'BLABLABLA';"));
395 }
396 SECTION("Test \r newline with multi-byte delimiter") {
397 ofstream csv_stream(csv_path);
398 csv_stream << "ABAB\rABAC\r";
399 csv_stream.close();
400
401 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);"));
402 result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';");
403 REQUIRE(CHECK_COLUMN(result, 0, {2}));
404
405 result = con.Query("SELECT * FROM abac_tbl");
406 REQUIRE(CHECK_COLUMN(result, 0, {"A", "A"}));
407 REQUIRE(CHECK_COLUMN(result, 1, {"B", "C"}));
408 }
409 SECTION("Test \r\n newline with multi-byte delimiter") {
410 ofstream csv_stream(csv_path);
411 csv_stream << "ABAB\r\nABAC\r\n";
412 csv_stream.close();
413
414 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);"));
415 result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';");
416 REQUIRE(CHECK_COLUMN(result, 0, {2}));
417
418 result = con.Query("SELECT * FROM abac_tbl");
419 REQUIRE(CHECK_COLUMN(result, 0, {"A", "A"}));
420 REQUIRE(CHECK_COLUMN(result, 1, {"B", "C"}));
421 }
422 SECTION("Test unterminated quotes with multi-line delimiter") {
423 ofstream csv_stream(csv_path);
424 csv_stream << "\"AAA";
425 csv_stream.close();
426
427 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
428 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';"));
429 }
430 SECTION("Test unquote not followed by delimiter") {
431 ofstream csv_stream(csv_path);
432 csv_stream << "\"AAA\"BB";
433 csv_stream.close();
434
435 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);"));
436 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';"));
437 }
438 SECTION("Test escape followed by non-quote and non-escape (single-byte)") {
439 ofstream csv_stream(csv_path);
440 csv_stream << "\"AAA\\BB\"|A";
441 csv_stream.close();
442
443 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);"));
444 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER '|' ESCAPE '\\';"));
445 }
446 SECTION("Test escape followed by non-quote and non-escape (multi-byte)") {
447 ofstream csv_stream(csv_path);
448 csv_stream << "\"AAA\\BB\"BAA";
449 csv_stream.close();
450
451 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);"));
452 REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA' ESCAPE '\\';"));
453 }
454 SECTION("Test file end after delimiter with multi-byte delimiter") {
455 ofstream csv_stream(csv_path);
456 csv_stream << "AAABA";
457 csv_stream.close();
458
459 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
460 result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';");
461 REQUIRE(CHECK_COLUMN(result, 0, {1}));
462
463 result = con.Query("SELECT * FROM abac_tbl");
464 REQUIRE(CHECK_COLUMN(result, 0, {"AAA"}));
465 }
466 SECTION("Test file end after delimiter with single-byte delimiter") {
467 ofstream csv_stream(csv_path);
468 csv_stream << "AAA|";
469 csv_stream.close();
470
471 REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);"));
472 result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER '|';");
473 REQUIRE(CHECK_COLUMN(result, 0, {1}));
474
475 result = con.Query("SELECT * FROM abac_tbl");
476 REQUIRE(CHECK_COLUMN(result, 0, {"AAA"}));
477 }
478}
479
480TEST_CASE("Test long value with escapes", "[copy]") {
481 FileSystem fs;
482 unique_ptr<QueryResult> result;
483 DuckDB db(nullptr);
484 Connection con(db);
485
486 auto csv_path = GetCSVPath();
487
488 string value = string(10000, 'a') + "\"\"" + string(20000, 'b');
489 string expected_value = string(10000, 'a') + "\"" + string(20000, 'b');
490
491 // long value with escape and simple delimiter
492 ofstream long_escaped_value(fs.JoinPath(csv_path, "long_escaped_value.csv"));
493 long_escaped_value << 1 << "🦆" << 2 << "🦆"
494 << "\"" << value << "\"" << endl;
495 long_escaped_value.close();
496
497 // load CSV file into a table
498 REQUIRE_NO_FAIL(con.Query("CREATE TABLE long_escaped_value (a INTEGER, b INTEGER, c VARCHAR);"));
499 result = con.Query("COPY long_escaped_value FROM '" + fs.JoinPath(csv_path, "long_escaped_value.csv") +
500 "' DELIMITER '🦆';");
501 REQUIRE(CHECK_COLUMN(result, 0, {1}));
502
503 result = con.Query("SELECT * FROM long_escaped_value");
504 REQUIRE(CHECK_COLUMN(result, 0, {1}));
505 REQUIRE(CHECK_COLUMN(result, 1, {2}));
506 REQUIRE(CHECK_COLUMN(result, 2, {expected_value}));
507
508 // long value with escape and complex delimiter
509 ofstream long_escaped_value_unicode(fs.JoinPath(csv_path, "long_escaped_value_unicode.csv"));
510 long_escaped_value_unicode << 1 << "," << 2 << ","
511 << "\"" << value << "\"" << endl;
512 long_escaped_value_unicode.close();
513
514 // load CSV file into a table
515 REQUIRE_NO_FAIL(con.Query("CREATE TABLE long_escaped_value_unicode (a INTEGER, b INTEGER, c VARCHAR);"));
516 result = con.Query("COPY long_escaped_value_unicode FROM '" +
517 fs.JoinPath(csv_path, "long_escaped_value_unicode.csv") + "';");
518 REQUIRE(CHECK_COLUMN(result, 0, {1}));
519
520 result = con.Query("SELECT * FROM long_escaped_value_unicode");
521 REQUIRE(CHECK_COLUMN(result, 0, {1}));
522 REQUIRE(CHECK_COLUMN(result, 1, {2}));
523 REQUIRE(CHECK_COLUMN(result, 2, {expected_value}));
524}
525
526TEST_CASE("Test NULL option of copy statement", "[copy]") {
527 FileSystem fs;
528 unique_ptr<QueryResult> result;
529 DuckDB db(nullptr);
530 Connection con(db);
531
532 auto csv_path = GetCSVPath();
533
534 // generate CSV file with default delimiter
535 ofstream from_csv_file(fs.JoinPath(csv_path, "test_null_option.csv"));
536 for (int i = 0; i < 3; i++) {
537 from_csv_file << i << ",,\"test\",null" << endl;
538 }
539 from_csv_file.close();
540
541 // create a table
542 REQUIRE_NO_FAIL(con.Query(
543 "CREATE TABLE test_null_option (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10), col_d VARCHAR(10));"));
544
545 // test COPY ... FROM ...
546
547 // implicitly using default NULL value
548 result = con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv") + "';");
549 REQUIRE(CHECK_COLUMN(result, 0, {3}));
550 result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;");
551 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2}));
552 REQUIRE(CHECK_COLUMN(result, 1, {Value(), Value(), Value()}));
553 REQUIRE(CHECK_COLUMN(result, 2, {"test", "test", "test"}));
554 REQUIRE(CHECK_COLUMN(result, 3, {"null", "null", "null"}));
555 REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;"));
556
557 // explicitly using default NULL value
558 result = con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv") + "' (NULL '');");
559 REQUIRE(CHECK_COLUMN(result, 0, {3}));
560 result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;");
561 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2}));
562 REQUIRE(CHECK_COLUMN(result, 1, {Value(), Value(), Value()}));
563 REQUIRE(CHECK_COLUMN(result, 2, {"test", "test", "test"}));
564 REQUIRE(CHECK_COLUMN(result, 3, {"null", "null", "null"}));
565 REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;"));
566
567 // make sure a quoted null string is interpreted as a null value
568 result =
569 con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv") + "' (NULL 'test');");
570 REQUIRE(CHECK_COLUMN(result, 0, {3}));
571 result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;");
572 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2}));
573 REQUIRE(CHECK_COLUMN(result, 1, {"", "", ""}));
574 REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), Value()}));
575 REQUIRE(CHECK_COLUMN(result, 3, {"null", "null", "null"}));
576 REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;"));
577
578 // setting specific NULL value
579 result =
580 con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv") + "' (NULL 'null');");
581 REQUIRE(CHECK_COLUMN(result, 0, {3}));
582 result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;");
583 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2}));
584 REQUIRE(CHECK_COLUMN(result, 1, {"", "", ""}));
585 REQUIRE(CHECK_COLUMN(result, 2, {"test", "test", "test"}));
586 REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()}));
587
588 // invalid parameter type
589 REQUIRE_FAIL(
590 con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv") + "' (NULL null);"));
591
592 // delimiter must not appear in the NULL specification
593 REQUIRE_FAIL(con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv") +
594 "' (NULL 'null,');"));
595 REQUIRE_FAIL(con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv") +
596 "' (DELIMITER 'null', NULL 'null');"));
597 REQUIRE_FAIL(con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv") +
598 "' (DELIMITER 'null', NULL 'nu');"));
599
600 // no parameter type
601 REQUIRE_FAIL(
602 con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv") + "' (NULL);"));
603
604 // empty integer column with non-default NULL string
605 REQUIRE_NO_FAIL(con.Query(
606 "CREATE TABLE test_null_option_2 (col_a INTEGER, col_b INTEGER, col_c VARCHAR(10), col_d VARCHAR(10));"));
607 REQUIRE_FAIL(con.Query("COPY test_null_option_2 FROM '" + fs.JoinPath(csv_path, "test_null_option.csv") +
608 "' (NULL 'null');"));
609
610 // test COPY ... TO ...
611
612 // implicitly using default NULL value
613 result = con.Query("COPY test_null_option TO '" + fs.JoinPath(csv_path, "test_null_option_2.csv") + "';");
614 REQUIRE(CHECK_COLUMN(result, 0, {3}));
615 REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;"));
616 result = con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option_2.csv") + "';");
617 REQUIRE(CHECK_COLUMN(result, 0, {3}));
618 result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;");
619 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2}));
620 REQUIRE(CHECK_COLUMN(result, 1, {"", "", ""}));
621 REQUIRE(CHECK_COLUMN(result, 2, {"test", "test", "test"}));
622 REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()}));
623
624 // explicitly using default NULL value
625 result = con.Query("COPY test_null_option TO '" + fs.JoinPath(csv_path, "test_null_option_3.csv") + "' (NULL '');");
626 REQUIRE(CHECK_COLUMN(result, 0, {3}));
627 REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;"));
628 result =
629 con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option_3.csv") + "' (NULL '');");
630 REQUIRE(CHECK_COLUMN(result, 0, {3}));
631 result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;");
632 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2}));
633 REQUIRE(CHECK_COLUMN(result, 1, {"", "", ""}));
634 REQUIRE(CHECK_COLUMN(result, 2, {"test", "test", "test"}));
635 REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()}));
636
637 // setting specific NULL value
638 result =
639 con.Query("COPY test_null_option TO '" + fs.JoinPath(csv_path, "test_null_option_4.csv") + "' (NULL 'null');");
640 REQUIRE(CHECK_COLUMN(result, 0, {3}));
641 REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;"));
642 result = con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option_4.csv") +
643 "' (NULL 'null');");
644 REQUIRE(CHECK_COLUMN(result, 0, {3}));
645 result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;");
646 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2}));
647 REQUIRE(CHECK_COLUMN(result, 1, {"", "", ""}));
648 REQUIRE(CHECK_COLUMN(result, 2, {"test", "test", "test"}));
649 REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()}));
650}
651
652TEST_CASE("Test force_quote and force_not_null", "[copy]") {
653 FileSystem fs;
654 unique_ptr<QueryResult> result;
655 DuckDB db(nullptr);
656 Connection con(db);
657
658 auto csv_path = GetCSVPath();
659
660 // generate CSV file with default delimiter
661 ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv"));
662 from_csv_file << 8 << ",test,tea" << endl;
663 for (int i = 0; i < 2; i++) {
664 from_csv_file << i << ",,test" << endl;
665 }
666 from_csv_file.close();
667
668 // generate another CSV file
669 ofstream from_csv_file_2(fs.JoinPath(csv_path, "test_2.csv"));
670 from_csv_file_2 << ",test,tea" << endl;
671 for (int i = 0; i < 2; i++) {
672 from_csv_file_2 << i << ",,test" << endl;
673 }
674 from_csv_file_2.close();
675
676 // create a table
677 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10));"));
678
679 result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv") + "';");
680 REQUIRE(CHECK_COLUMN(result, 0, {3}));
681
682 // test FORCE_QUOTE *
683 result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_star.csv") + "' (FORCE_QUOTE *);");
684 REQUIRE(CHECK_COLUMN(result, 0, {3}));
685
686 vector<string> lines;
687 string line;
688 ifstream test_star_file(fs.JoinPath(csv_path, "test_star.csv"));
689 if (test_star_file.is_open()) {
690 while (getline(test_star_file, line)) {
691 lines.push_back(line);
692 }
693 test_star_file.close();
694 } else {
695 throw Exception("Unable to open file: " + fs.JoinPath(csv_path, "test_star.csv"));
696 };
697 REQUIRE(lines[0] == "\"8\",\"test\",\"tea\"");
698 REQUIRE(lines[1] == "\"0\",,\"test\"");
699 REQUIRE(lines[2] == "\"1\",,\"test\"");
700
701 // test FORCE_QUOTE with specific columns and non-default quote character and non-default null character
702 result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_chosen_columns.csv") +
703 "' (FORCE_QUOTE (col_a, col_c), QUOTE 't', NULL 'tea');");
704 REQUIRE(CHECK_COLUMN(result, 0, {3}));
705
706 ifstream test_columns_file(fs.JoinPath(csv_path, "test_chosen_columns.csv"));
707 if (test_columns_file.is_open()) {
708 while (getline(test_columns_file, line)) {
709 lines.push_back(line);
710 }
711 test_columns_file.close();
712 } else {
713 throw Exception("Unable to open file: " + fs.JoinPath(csv_path, "test_chosen_columns.csv"));
714 };
715 REQUIRE(lines[3] == "t8t,tttesttt,ttteat");
716 REQUIRE(lines[4] == "t0t,tea,tttesttt");
717 REQUIRE(lines[5] == "t1t,tea,tttesttt");
718
719 // test FORCE_QUOTE with reordered columns
720 result = con.Query("COPY test (col_b, col_c, col_a) TO '" + fs.JoinPath(csv_path, "test_reorder.csv") +
721 "' (FORCE_QUOTE (col_c, col_b), NULL 'test');");
722 REQUIRE(CHECK_COLUMN(result, 0, {3}));
723
724 ifstream test_reorder_file(fs.JoinPath(csv_path, "test_reorder.csv"));
725 if (test_reorder_file.is_open()) {
726 while (getline(test_reorder_file, line)) {
727 lines.push_back(line);
728 }
729 test_reorder_file.close();
730 } else {
731 throw Exception("Unable to open file: " + fs.JoinPath(csv_path, "test_reorder.csv"));
732 };
733 REQUIRE(lines[6] == "\"test\",\"tea\",8");
734 REQUIRE(lines[7] == "test,\"test\",0");
735 REQUIRE(lines[8] == "test,\"test\",1");
736
737 // test using a column in FORCE_QUOTE that is not set as output, but that is a column of the table
738 REQUIRE_FAIL(con.Query("COPY test (col_b, col_a) TO '" + fs.JoinPath(csv_path, "test_reorder.csv") +
739 "' (FORCE_QUOTE (col_c, col_b));"));
740 // test using a column in FORCE_QUOTE that is not a column of the table
741 REQUIRE_FAIL(
742 con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_reorder.csv") + "' (FORCE_QUOTE (col_c, col_d));"));
743 // FORCE_QUOTE is only supported in COPY ... TO ...
744 REQUIRE_FAIL(
745 con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_reorder.csv") + "' (FORCE_QUOTE (col_c, col_d));"));
746 // FORCE_QUOTE must not be empty and must have the correct parameter type
747 REQUIRE_FAIL(con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_reorder.csv") + "' (FORCE_QUOTE);"));
748 REQUIRE_FAIL(con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_reorder.csv") + "' (FORCE_QUOTE 42);"));
749
750 REQUIRE_NO_FAIL(con.Query("DELETE FROM test;"));
751
752 // test FORCE_NOT_NULL
753
754 // test if null value is correctly converted into string
755 result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_star.csv") +
756 "' (FORCE_NOT_NULL (col_b), NULL 'test');");
757 REQUIRE(CHECK_COLUMN(result, 0, {3}));
758 result = con.Query("SELECT * FROM test ORDER BY 1;");
759 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 8}));
760 REQUIRE(CHECK_COLUMN(result, 1, {"", "", "test"}));
761 REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), "tea"}));
762 REQUIRE_NO_FAIL(con.Query("DELETE FROM test;"));
763
764 // test if null value is correctly converted into string if explicit columns are used
765 result = con.Query("COPY test (col_a, col_b, col_c) FROM '" + fs.JoinPath(csv_path, "test_star.csv") +
766 "' (FORCE_NOT_NULL (col_b), NULL 'test');");
767 REQUIRE(CHECK_COLUMN(result, 0, {3}));
768 result = con.Query("SELECT * FROM test ORDER BY 1;");
769 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 8}));
770 REQUIRE(CHECK_COLUMN(result, 1, {"", "", "test"}));
771 REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), "tea"}));
772
773 // FORCE_NOT_NULL is only supported in COPY ... FROM ...
774 REQUIRE_FAIL(result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_star.csv") +
775 "' (FORCE_NOT_NULL (col_b), NULL 'test');"));
776 // FORCE_NOT_NULL must not be empty and must have the correct parameter type
777 REQUIRE_FAIL(result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_star.csv") +
778 "' (FORCE_NOT_NULL, NULL 'test');"));
779 REQUIRE_FAIL(result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_star.csv") +
780 "' (FORCE_NOT_NULL 42, NULL 'test');"));
781 // test using a column in FORCE_NOT_NULL that is not set as output, but that is a column of the table
782 REQUIRE_FAIL(con.Query("COPY test (col_b, col_a) FROM '" + fs.JoinPath(csv_path, "test_reorder.csv") +
783 "' (FORCE_NOT_NULL (col_c, col_b));"));
784 // test using a column in FORCE_NOT_NULL that is not a column of the table
785 REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_reorder.csv") +
786 "' (FORCE_NOT_NULL (col_c, col_d));"));
787
788 // FORCE_NOT_NULL fails on integer columns
789 REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_2.csv") + "' (FORCE_NOT_NULL (col_a));"));
790}
791
792TEST_CASE("Test copy statement with unicode delimiter/quote/escape", "[copy]") {
793 FileSystem fs;
794 unique_ptr<QueryResult> result;
795 DuckDB db(nullptr);
796 Connection con(db);
797
798 auto csv_path = GetCSVPath();
799
800 // generate CSV file with unicode (> one-byte) delimiter/quote/escape
801 ofstream from_csv_file1(fs.JoinPath(csv_path, "multi_char.csv"));
802 from_csv_file1 << 0 << "🦆ˮdu˧˧🦆ckˮ🦆ˮd˧ˮ˧ˮu🦆ckˮ🦆duck" << endl;
803 from_csv_file1 << 1 << "🦆ˮdou˧ˮbleˮ🦆🦆duck" << endl;
804 from_csv_file1 << 2 << "🦆🦆🦆" << endl;
805 from_csv_file1 << 3 << "🦆duck inv˧asion🦆🦆" << endl;
806 from_csv_file1.close();
807
808 // generate CSV file with unicode (> one-byte) delimiter/quote/escape that exceeds the buffer size a few times
809 ofstream from_csv_file2(fs.JoinPath(csv_path, "multi_char_buffer_exhausted.csv"));
810 int64_t sum = 0;
811 for (int i = 0; i < 16384; i++) {
812 if (i % 2 == 0) {
813 from_csv_file2 << i << "🦆ˮ🦆dˮ🦆ˮd˧ˮ🦆ˮ🦆d˧" << endl;
814 } else {
815 from_csv_file2 << i << "🦆ˮ˧ˮ˧ˮ˧ˮˮ🦆˧˧🦆 test test 🦆" << endl;
816 }
817 sum += i;
818 }
819 from_csv_file2.close();
820
821 // generate CSV file with one-byte delimiter/quote/escape
822 ofstream from_csv_file3(fs.JoinPath(csv_path, "one_byte_char.csv"));
823 for (int i = 0; i < 3; i++) {
824 from_csv_file3 << i << ",'du''ck','''''du,ck',duck" << endl;
825 }
826 from_csv_file3.close();
827
828 // generate CSV file with unterminated quotes
829 ofstream from_csv_file4(fs.JoinPath(csv_path, "unterminated_quotes.csv"));
830 for (int i = 0; i < 3; i++) {
831 from_csv_file4 << i << ",duck,\"duck" << endl;
832 }
833 from_csv_file4.close();
834
835 // generate CSV file with quotes that start midway in the value
836 ofstream from_csv_file5(fs.JoinPath(csv_path, "unterminated_quotes_2.csv"));
837 for (int i = 0; i < 3; i++) {
838 from_csv_file5 << i << ",du\"ck,duck" << endl;
839 }
840 from_csv_file5.close();
841
842 // generate a CSV file with a very long string exceeding the buffer midway in an escape sequence (delimiter and
843 // escape share substrings)
844 ofstream from_csv_file6(fs.JoinPath(csv_path, "shared_substrings.csv"));
845 string big_string_a(16370, 'a');
846 from_csv_file6 << big_string_a << "AAA\"aaaaaaaaAAB\"\"" << endl;
847 from_csv_file6.close();
848
849 // create three tables for testing
850 REQUIRE_NO_FAIL(con.Query(
851 "CREATE TABLE test_unicode_1 (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10), col_d VARCHAR(10));"));
852 REQUIRE_NO_FAIL(con.Query(
853 "CREATE TABLE test_unicode_2 (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10), col_d VARCHAR(10));"));
854 REQUIRE_NO_FAIL(con.Query(
855 "CREATE TABLE test_unicode_3 (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10), col_d VARCHAR(10));"));
856 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test_unicode_4 (col_a VARCHAR, col_b VARCHAR);"));
857
858 // throw error if unterminated quotes are detected
859 REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "unterminated_quotes.csv") + "';"));
860 REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "unterminated_quotes_2.csv") + "';"));
861
862 // test COPY ... FROM ...
863
864 // test unicode delimiter/quote/escape
865 result = con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "multi_char.csv") +
866 "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');");
867 REQUIRE(CHECK_COLUMN(result, 0, {4}));
868 result = con.Query("SELECT * FROM test_unicode_1 ORDER BY 1 LIMIT 4;");
869 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2, 3}));
870 REQUIRE(CHECK_COLUMN(result, 1, {"du˧🦆ck", "douˮble", Value(), "duck inv˧asion"}));
871 REQUIRE(CHECK_COLUMN(result, 2, {"dˮˮu🦆ck", Value(), Value(), Value()}));
872 REQUIRE(CHECK_COLUMN(result, 3, {"duck", "duck", Value(), Value()}));
873
874 // test unicode delimiter/quote/escape that exceeds the buffer size a few times
875 result = con.Query("COPY test_unicode_2 FROM '" + fs.JoinPath(csv_path, "multi_char_buffer_exhausted.csv") +
876 "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');");
877 REQUIRE(CHECK_COLUMN(result, 0, {16384}));
878 result = con.Query("SELECT * FROM test_unicode_2 ORDER BY 1 LIMIT 4;");
879 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2, 3}));
880 REQUIRE(CHECK_COLUMN(result, 1, {"🦆d", "ˮˮˮ", "🦆d", "ˮˮˮ"}));
881 REQUIRE(CHECK_COLUMN(result, 2, {"dˮ🦆", "˧˧", "dˮ🦆", "˧˧"}));
882 REQUIRE(CHECK_COLUMN(result, 3, {"d˧", " test test ", "d˧", " test test "}));
883 result = con.Query("SELECT SUM(col_a) FROM test_unicode_2;");
884 REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(sum)}));
885
886 // test one-byte delimiter/quote/escape
887 result = con.Query("COPY test_unicode_3 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv") + "' (QUOTE '''');");
888 REQUIRE(CHECK_COLUMN(result, 0, {3}));
889 result = con.Query("SELECT * FROM test_unicode_3 ORDER BY 1 LIMIT 3;");
890 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2}));
891 REQUIRE(CHECK_COLUMN(result, 1, {"du'ck", "du'ck", "du'ck"}));
892 REQUIRE(CHECK_COLUMN(result, 2, {"''du,ck", "''du,ck", "''du,ck"}));
893 REQUIRE(CHECK_COLUMN(result, 3, {"duck", "duck", "duck"}));
894
895 // test correct shared substring behavior at buffer borders
896 result = con.Query("COPY test_unicode_4 FROM '" + fs.JoinPath(csv_path, "shared_substrings.csv") +
897 "' (DELIMITER 'AAA', ESCAPE 'AAB');");
898 REQUIRE(CHECK_COLUMN(result, 0, {1}));
899 result = con.Query("SELECT * FROM test_unicode_4;");
900 REQUIRE(CHECK_COLUMN(result, 0, {big_string_a}));
901 REQUIRE(CHECK_COLUMN(result, 1, {"aaaaaaaa\""}));
902
903 // quote and escape must not be empty
904 REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv") +
905 "' (DELIMITER '🦆', QUOTE '');"));
906 REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv") +
907 "' (DELIMITER '🦆', ESCAPE '');"));
908
909 // test same string for delimiter and quote
910 REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv") +
911 "' (DELIMITER '🦆', QUOTE '🦆');"));
912
913 // escape and quote cannot be substrings of each other
914 REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv") +
915 "' (ESCAPE 'du', QUOTE 'duck');"));
916 REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv") +
917 "' (ESCAPE 'duck', QUOTE 'du');"));
918
919 // delimiter and quote cannot be substrings of each other
920 REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv") +
921 "' (DELIMITER 'du', QUOTE 'duck');"));
922 REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv") +
923 "' (DELIMITER 'duck', QUOTE 'du');"));
924
925 // delimiter and escape cannot be substrings of each other
926 REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv") +
927 "' (DELIMITER 'AA', ESCAPE 'AAAA');"));
928 REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv") +
929 "' (DELIMITER 'AAAA', ESCAPE 'AA');"));
930
931 // COPY ... TO ...
932
933 // test unicode delimiter/quote/escape
934 result = con.Query("COPY test_unicode_1 TO '" + fs.JoinPath(csv_path, "test_unicode_1.csv") +
935 "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');");
936 REQUIRE(CHECK_COLUMN(result, 0, {4}));
937 REQUIRE_NO_FAIL(con.Query("DELETE FROM test_unicode_1;"));
938 result = con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "test_unicode_1.csv") +
939 "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');");
940 REQUIRE(CHECK_COLUMN(result, 0, {4}));
941 result = con.Query("SELECT * FROM test_unicode_1 ORDER BY 1 LIMIT 4;");
942 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2, 3}));
943 REQUIRE(CHECK_COLUMN(result, 1, {"du˧🦆ck", "douˮble", Value(), "duck inv˧asion"}));
944 REQUIRE(CHECK_COLUMN(result, 2, {"dˮˮu🦆ck", Value(), Value(), Value()}));
945 REQUIRE(CHECK_COLUMN(result, 3, {"duck", "duck", Value(), Value()}));
946
947 // test unicode delimiter/quote/escape
948 result = con.Query("COPY test_unicode_2 TO '" + fs.JoinPath(csv_path, "test_unicode_2.csv") +
949 "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');");
950 REQUIRE(CHECK_COLUMN(result, 0, {16384}));
951 REQUIRE_NO_FAIL(con.Query("DELETE FROM test_unicode_2;"));
952 result = con.Query("COPY test_unicode_2 FROM '" + fs.JoinPath(csv_path, "test_unicode_2.csv") +
953 "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');");
954 REQUIRE(CHECK_COLUMN(result, 0, {16384}));
955 result = con.Query("SELECT * FROM test_unicode_2 ORDER BY 1 LIMIT 4;");
956 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2, 3}));
957 REQUIRE(CHECK_COLUMN(result, 1, {"🦆d", "ˮˮˮ", "🦆d", "ˮˮˮ"}));
958 REQUIRE(CHECK_COLUMN(result, 2, {"dˮ🦆", "˧˧", "dˮ🦆", "˧˧"}));
959 REQUIRE(CHECK_COLUMN(result, 3, {"d˧", " test test ", "d˧", " test test "}));
960 result = con.Query("SELECT SUM(col_a) FROM test_unicode_2;");
961 REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(sum)}));
962
963 // test one-byte delimiter/quote/escape
964 result = con.Query("COPY test_unicode_3 TO '" + fs.JoinPath(csv_path, "test_unicode_3.csv") + "' (QUOTE '''');");
965 REQUIRE(CHECK_COLUMN(result, 0, {3}));
966 REQUIRE_NO_FAIL(con.Query("DELETE FROM test_unicode_3;"));
967 result = con.Query("COPY test_unicode_3 FROM '" + fs.JoinPath(csv_path, "test_unicode_3.csv") + "' (QUOTE '''');");
968 REQUIRE(CHECK_COLUMN(result, 0, {3}));
969 result = con.Query("SELECT * FROM test_unicode_3 ORDER BY 1 LIMIT 3;");
970 REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2}));
971 REQUIRE(CHECK_COLUMN(result, 1, {"du'ck", "du'ck", "du'ck"}));
972 REQUIRE(CHECK_COLUMN(result, 2, {"''du,ck", "''du,ck", "''du,ck"}));
973 REQUIRE(CHECK_COLUMN(result, 3, {"duck", "duck", "duck"}));
974}
975
976TEST_CASE("Test copy statement with file overwrite", "[copy]") {
977 FileSystem fs;
978 unique_ptr<QueryResult> result;
979 DuckDB db(nullptr);
980 Connection con(db);
981
982 auto csv_path = GetCSVPath();
983
984 // create a table and insert some values
985 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR(10));"));
986 REQUIRE_NO_FAIL(con.Query("INSERT INTO test VALUES (1, 'hello'), (2, 'world '), (3, ' xx');"));
987
988 result = con.Query("SELECT * FROM test ORDER BY 1;");
989 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
990 REQUIRE(CHECK_COLUMN(result, 1, {"hello", "world ", " xx"}));
991
992 // copy to the CSV file
993 result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test.csv") + "';");
994 REQUIRE(CHECK_COLUMN(result, 0, {3}));
995
996 // now copy to the file again
997 result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test.csv") + "';");
998 REQUIRE(CHECK_COLUMN(result, 0, {3}));
999
1000 // reload the data from the file: it should only have three rows
1001 REQUIRE_NO_FAIL(con.Query("DELETE FROM test;"));
1002
1003 result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv") + "';");
1004 REQUIRE(CHECK_COLUMN(result, 0, {3}));
1005
1006 result = con.Query("SELECT * FROM test ORDER BY 1;");
1007 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
1008 REQUIRE(CHECK_COLUMN(result, 1, {"hello", "world ", " xx"}));
1009}
1010
1011TEST_CASE("Test copy statement with default values", "[copy]") {
1012 FileSystem fs;
1013 unique_ptr<QueryResult> result;
1014 DuckDB db(nullptr);
1015 Connection con(db);
1016
1017 auto csv_path = GetCSVPath();
1018
1019 // create a file only consisting of integers
1020 ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv"));
1021 int64_t expected_sum_a = 0;
1022 int64_t expected_sum_c = 0;
1023 for (int i = 0; i < 5000; i++) {
1024 from_csv_file << i << endl;
1025
1026 expected_sum_a += i;
1027 expected_sum_c += i + 7;
1028 }
1029 from_csv_file.close();
1030
1031 // load CSV file into a table
1032 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR DEFAULT('hello'), c INTEGER DEFAULT(3+4));"));
1033 result = con.Query("COPY test (a) FROM '" + fs.JoinPath(csv_path, "test.csv") + "';");
1034 REQUIRE(CHECK_COLUMN(result, 0, {5000}));
1035 result = con.Query("COPY test (c) FROM '" + fs.JoinPath(csv_path, "test.csv") + "';");
1036 REQUIRE(CHECK_COLUMN(result, 0, {5000}));
1037
1038 result =
1039 con.Query("SELECT COUNT(a), COUNT(b), COUNT(c), MIN(LENGTH(b)), MAX(LENGTH(b)), SUM(a), SUM(c) FROM test;");
1040 REQUIRE(CHECK_COLUMN(result, 0, {5000}));
1041 REQUIRE(CHECK_COLUMN(result, 1, {10000}));
1042 REQUIRE(CHECK_COLUMN(result, 2, {10000}));
1043 REQUIRE(CHECK_COLUMN(result, 3, {5}));
1044 REQUIRE(CHECK_COLUMN(result, 4, {5}));
1045 REQUIRE(CHECK_COLUMN(result, 5, {Value::BIGINT(expected_sum_a)}));
1046 REQUIRE(CHECK_COLUMN(result, 6, {Value::BIGINT(expected_sum_c)}));
1047}
1048
1049TEST_CASE("Test copy statement with long lines", "[copy]") {
1050 FileSystem fs;
1051 unique_ptr<QueryResult> result;
1052 DuckDB db(nullptr);
1053 Connection con(db);
1054
1055 auto csv_path = GetCSVPath();
1056
1057 // generate a CSV file with a very long string
1058 ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv"));
1059 string big_string_a(100000, 'a');
1060 string big_string_b(200000, 'b');
1061 from_csv_file << 10 << "," << big_string_a << "," << 20 << endl;
1062 from_csv_file << 20 << "," << big_string_b << "," << 30 << endl;
1063 from_csv_file.close();
1064
1065 // load CSV file into a table
1066 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR, c INTEGER);"));
1067 result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv") + "';");
1068 REQUIRE(CHECK_COLUMN(result, 0, {2}));
1069
1070 result = con.Query("SELECT LENGTH(b) FROM test ORDER BY a;");
1071 REQUIRE(CHECK_COLUMN(result, 0, {100000, 200000}));
1072
1073 result = con.Query("SELECT SUM(a), SUM(c) FROM test;");
1074 REQUIRE(CHECK_COLUMN(result, 0, {30}));
1075 REQUIRE(CHECK_COLUMN(result, 1, {50}));
1076}
1077
1078TEST_CASE("Test copy statement with quotes and newlines", "[copy]") {
1079 FileSystem fs;
1080 unique_ptr<QueryResult> result;
1081 DuckDB db(nullptr);
1082 Connection con(db);
1083
1084 auto csv_path = GetCSVPath();
1085
1086 // generate a CSV file with newlines enclosed by quotes
1087 ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv"));
1088 from_csv_file << "\"hello\\nworld\",\"5\"" << endl;
1089 from_csv_file << "\"what,\\n brings, you here\\n, today\",\"6\"" << endl;
1090 from_csv_file.close();
1091
1092 // load CSV file into a table
1093 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a VARCHAR, b INTEGER);"));
1094 result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv") + "';");
1095 REQUIRE(CHECK_COLUMN(result, 0, {2}));
1096
1097 result = con.Query("SELECT SUM(b) FROM test;");
1098 REQUIRE(CHECK_COLUMN(result, 0, {11}));
1099
1100 result = con.Query("SELECT a FROM test ORDER BY a;");
1101 REQUIRE(CHECK_COLUMN(result, 0, {"hello\\nworld", "what,\\n brings, you here\\n, today"}));
1102 REQUIRE_NO_FAIL(con.Query("DROP TABLE test;"));
1103
1104 // quotes in the middle of a quoted string cause an exception if they are not escaped
1105 from_csv_file.open(fs.JoinPath(csv_path, "test.csv"));
1106 from_csv_file << "\"hello\\n\"w\"o\"rld\",\"5\"" << endl;
1107 from_csv_file << "\"what,\\n brings, you here\\n, today\",\"6\"" << endl;
1108 from_csv_file.close();
1109
1110 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a VARCHAR, b INTEGER);"));
1111 REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv") + "';"));
1112
1113 // now the same quotes are escaped
1114 from_csv_file.open(fs.JoinPath(csv_path, "test.csv"));
1115 from_csv_file << "\"hello\\n\"\"w\"\"o\"\"rld\",\"5\"" << endl;
1116 from_csv_file << "\"what,\\n brings, you here\\n, today\",\"6\"" << endl;
1117 from_csv_file.close();
1118
1119 result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv") + "';");
1120 REQUIRE(CHECK_COLUMN(result, 0, {2}));
1121
1122 result = con.Query("SELECT SUM(b) FROM test;");
1123 REQUIRE(CHECK_COLUMN(result, 0, {11}));
1124 result = con.Query("SELECT a,b FROM test ORDER BY b;");
1125 REQUIRE(CHECK_COLUMN(result, 0, {"hello\\n\"w\"o\"rld", "what,\\n brings, you here\\n, today"}));
1126 REQUIRE(CHECK_COLUMN(result, 1, {5, 6}));
1127
1128 REQUIRE_NO_FAIL(con.Query("DROP TABLE test;"));
1129
1130 // not escaped escape string in quotes throws an exception
1131 from_csv_file.open(fs.JoinPath(csv_path, "test.csv"));
1132 from_csv_file << "\"\\\"escaped\\\",\"5\"" << endl;
1133 from_csv_file << "yea,6" << endl;
1134 from_csv_file.close();
1135
1136 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a VARCHAR, b INTEGER);"));
1137 REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv") + "' (ESCAPE '\\');"));
1138}
1139
1140TEST_CASE("Test copy statement with many empty lines", "[copy]") {
1141 FileSystem fs;
1142 unique_ptr<QueryResult> result;
1143 DuckDB db(nullptr);
1144 Connection con(db);
1145
1146 auto csv_path = GetCSVPath();
1147
1148 // generate CSV file with a very long string
1149 ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv"));
1150 from_csv_file << "1\n";
1151 for (idx_t i = 0; i < 19999; i++) {
1152 from_csv_file << "\n";
1153 }
1154 from_csv_file.close();
1155
1156 // load CSV file into a table
1157 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER);"));
1158 result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv") + "';");
1159 REQUIRE(CHECK_COLUMN(result, 0, {20000}));
1160
1161 result = con.Query("SELECT SUM(a) FROM test;");
1162 REQUIRE(CHECK_COLUMN(result, 0, {1}));
1163}
1164
1165TEST_CASE("Test different line endings", "[copy]") {
1166 FileSystem fs;
1167 unique_ptr<QueryResult> result;
1168 DuckDB db(nullptr);
1169 Connection con(db);
1170
1171 auto csv_path = GetCSVPath();
1172
1173 // generate CSV file with different line endings
1174 ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv"));
1175 from_csv_file << 10 << ","
1176 << "hello"
1177 << "," << 20 << "\r\n";
1178 from_csv_file << 20 << ","
1179 << "world"
1180 << "," << 30 << '\n';
1181 from_csv_file << 30 << ","
1182 << "test"
1183 << "," << 30 << '\r';
1184 from_csv_file.close();
1185
1186 // load CSV file into a table
1187 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR, c INTEGER);"));
1188 result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv") + "';");
1189 REQUIRE(CHECK_COLUMN(result, 0, {3}));
1190
1191 result = con.Query("SELECT LENGTH(b) FROM test ORDER BY a;");
1192 REQUIRE(CHECK_COLUMN(result, 0, {5, 5, 4}));
1193
1194 result = con.Query("SELECT SUM(a), SUM(c) FROM test;");
1195 REQUIRE(CHECK_COLUMN(result, 0, {60}));
1196 REQUIRE(CHECK_COLUMN(result, 1, {80}));
1197}
1198
1199TEST_CASE("Test Windows Newlines with a long file", "[copy]") {
1200 FileSystem fs;
1201 unique_ptr<QueryResult> result;
1202 DuckDB db(nullptr);
1203 Connection con(db);
1204
1205 auto csv_path = GetCSVPath();
1206
1207 idx_t line_count = 20000;
1208 int64_t sum_a = 0, sum_c = 0;
1209
1210 // generate a CSV file with many strings
1211 ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv"));
1212 for (idx_t i = 0; i < line_count; i++) {
1213 from_csv_file << i << ","
1214 << "hello"
1215 << "," << i + 2 << "\r\n";
1216
1217 sum_a += i;
1218 sum_c += i + 2;
1219 }
1220 from_csv_file.close();
1221
1222 // load CSV file into a table
1223 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR, c INTEGER);"));
1224 result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv") + "';");
1225 REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(line_count)}));
1226
1227 result = con.Query("SELECT SUM(a), MIN(LENGTH(b)), MAX(LENGTH(b)), SUM(LENGTH(b)), SUM(c) FROM test;");
1228 REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(sum_a)}));
1229 REQUIRE(CHECK_COLUMN(result, 1, {Value::BIGINT(5)}));
1230 REQUIRE(CHECK_COLUMN(result, 2, {Value::BIGINT(5)}));
1231 REQUIRE(CHECK_COLUMN(result, 3, {Value::BIGINT(5 * line_count)}));
1232 REQUIRE(CHECK_COLUMN(result, 4, {Value::BIGINT(sum_c)}));
1233
1234 REQUIRE_NO_FAIL(con.Query("DELETE FROM test;"));
1235 // now do the same with a multi-byte quote that is not actually used
1236 result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv") + "' QUOTE 'BLABLABLA';");
1237 REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(line_count)}));
1238
1239 result = con.Query("SELECT SUM(a), MIN(LENGTH(b)), MAX(LENGTH(b)), SUM(LENGTH(b)), SUM(c) FROM test;");
1240 REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(sum_a)}));
1241 REQUIRE(CHECK_COLUMN(result, 1, {Value::BIGINT(5)}));
1242 REQUIRE(CHECK_COLUMN(result, 2, {Value::BIGINT(5)}));
1243 REQUIRE(CHECK_COLUMN(result, 3, {Value::BIGINT(5 * line_count)}));
1244 REQUIRE(CHECK_COLUMN(result, 4, {Value::BIGINT(sum_c)}));
1245
1246 REQUIRE_NO_FAIL(con.Query("DROP TABLE test;"));
1247
1248 // generate a csv file with one value and many empty values
1249 ofstream from_csv_file_empty(fs.JoinPath(csv_path, "test2.csv"));
1250 from_csv_file_empty << 1 << "\r\n";
1251 for (idx_t i = 0; i < line_count - 1; i++) {
1252 from_csv_file_empty << "\r\n";
1253 }
1254 from_csv_file_empty.close();
1255
1256 // load CSV file into a table
1257 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER);"));
1258 result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test2.csv") + "';");
1259 REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(line_count)}));
1260
1261 result = con.Query("SELECT SUM(a) FROM test;");
1262 REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(1)}));
1263}
1264
1265TEST_CASE("Test lines that exceed the maximum line size", "[copy]") {
1266 FileSystem fs;
1267 unique_ptr<QueryResult> result;
1268 DuckDB db(nullptr);
1269 Connection con(db);
1270
1271 auto csv_path = GetCSVPath();
1272
1273 // generate CSV file with 20 MB string
1274 ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv"));
1275 string big_string(2048576, 'a');
1276 from_csv_file << 10 << "," << big_string << "," << 20 << endl;
1277 from_csv_file.close();
1278
1279 // value is too big for loading
1280 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR, c INTEGER);"));
1281 REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv") + "';"));
1282}
1283
1284TEST_CASE("Test copy from/to on-time dataset", "[copy]") {
1285 FileSystem fs;
1286 unique_ptr<QueryResult> result;
1287 DuckDB db(nullptr);
1288 Connection con(db);
1289
1290 auto csv_path = GetCSVPath();
1291 auto ontime_csv = fs.JoinPath(csv_path, "ontime.csv");
1292 WriteBinary(ontime_csv, ontime_sample, sizeof(ontime_sample));
1293
1294 REQUIRE_NO_FAIL(con.Query(
1295 "CREATE TABLE ontime(year SMALLINT, quarter SMALLINT, month SMALLINT, dayofmonth SMALLINT, dayofweek SMALLINT, "
1296 "flightdate DATE, uniquecarrier CHAR(7), airlineid DECIMAL(8,2), carrier CHAR(2), tailnum VARCHAR(50), "
1297 "flightnum VARCHAR(10), originairportid INTEGER, originairportseqid INTEGER, origincitymarketid INTEGER, "
1298 "origin CHAR(5), origincityname VARCHAR(100), originstate CHAR(2), originstatefips VARCHAR(10), "
1299 "originstatename VARCHAR(100), originwac DECIMAL(8,2), destairportid INTEGER, destairportseqid INTEGER, "
1300 "destcitymarketid INTEGER, dest CHAR(5), destcityname VARCHAR(100), deststate CHAR(2), deststatefips "
1301 "VARCHAR(10), deststatename VARCHAR(100), destwac DECIMAL(8,2), crsdeptime DECIMAL(8,2), deptime DECIMAL(8,2), "
1302 "depdelay DECIMAL(8,2), depdelayminutes DECIMAL(8,2), depdel15 DECIMAL(8,2), departuredelaygroups "
1303 "DECIMAL(8,2), deptimeblk VARCHAR(20), taxiout DECIMAL(8,2), wheelsoff DECIMAL(8,2), wheelson DECIMAL(8,2), "
1304 "taxiin DECIMAL(8,2), crsarrtime DECIMAL(8,2), arrtime DECIMAL(8,2), arrdelay DECIMAL(8,2), arrdelayminutes "
1305 "DECIMAL(8,2), arrdel15 DECIMAL(8,2), arrivaldelaygroups DECIMAL(8,2), arrtimeblk VARCHAR(20), cancelled "
1306 "SMALLINT, cancellationcode CHAR(1), diverted SMALLINT, crselapsedtime DECIMAL(8,2), actualelapsedtime "
1307 "DECIMAL(8,2), airtime DECIMAL(8,2), flights DECIMAL(8,2), distance DECIMAL(8,2), distancegroup SMALLINT, "
1308 "carrierdelay DECIMAL(8,2), weatherdelay DECIMAL(8,2), nasdelay DECIMAL(8,2), securitydelay DECIMAL(8,2), "
1309 "lateaircraftdelay DECIMAL(8,2), firstdeptime VARCHAR(10), totaladdgtime VARCHAR(10), longestaddgtime "
1310 "VARCHAR(10), divairportlandings VARCHAR(10), divreacheddest VARCHAR(10), divactualelapsedtime VARCHAR(10), "
1311 "divarrdelay VARCHAR(10), divdistance VARCHAR(10), div1airport VARCHAR(10), div1aiportid INTEGER, "
1312 "div1airportseqid INTEGER, div1wheelson VARCHAR(10), div1totalgtime VARCHAR(10), div1longestgtime VARCHAR(10), "
1313 "div1wheelsoff VARCHAR(10), div1tailnum VARCHAR(10), div2airport VARCHAR(10), div2airportid INTEGER, "
1314 "div2airportseqid INTEGER, div2wheelson VARCHAR(10), div2totalgtime VARCHAR(10), div2longestgtime VARCHAR(10), "
1315 "div2wheelsoff VARCHAR(10), div2tailnum VARCHAR(10), div3airport VARCHAR(10), div3airportid INTEGER, "
1316 "div3airportseqid INTEGER, div3wheelson VARCHAR(10), div3totalgtime VARCHAR(10), div3longestgtime VARCHAR(10), "
1317 "div3wheelsoff VARCHAR(10), div3tailnum VARCHAR(10), div4airport VARCHAR(10), div4airportid INTEGER, "
1318 "div4airportseqid INTEGER, div4wheelson VARCHAR(10), div4totalgtime VARCHAR(10), div4longestgtime VARCHAR(10), "
1319 "div4wheelsoff VARCHAR(10), div4tailnum VARCHAR(10), div5airport VARCHAR(10), div5airportid INTEGER, "
1320 "div5airportseqid INTEGER, div5wheelson VARCHAR(10), div5totalgtime VARCHAR(10), div5longestgtime VARCHAR(10), "
1321 "div5wheelsoff VARCHAR(10), div5tailnum VARCHAR(10));"));
1322
1323 result = con.Query("COPY ontime FROM '" + ontime_csv + "' DELIMITER ',' HEADER;");
1324 REQUIRE(CHECK_COLUMN(result, 0, {9}));
1325
1326 result = con.Query("SELECT year, uniquecarrier, origin, origincityname, div5longestgtime FROM ontime;");
1327 REQUIRE(CHECK_COLUMN(result, 0, {1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988}));
1328 REQUIRE(CHECK_COLUMN(result, 1, {"AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA"}));
1329 REQUIRE(CHECK_COLUMN(result, 2, {"JFK", "JFK", "JFK", "JFK", "JFK", "JFK", "JFK", "JFK", "JFK"}));
1330 REQUIRE(CHECK_COLUMN(result, 3,
1331 {"New York, NY", "New York, NY", "New York, NY", "New York, NY", "New York, NY",
1332 "New York, NY", "New York, NY", "New York, NY", "New York, NY"}));
1333 REQUIRE(CHECK_COLUMN(result, 4, {Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value()}));
1334
1335 result = con.Query("COPY ontime TO '" + ontime_csv + "' DELIMITER ',' HEADER;");
1336 REQUIRE(CHECK_COLUMN(result, 0, {9}));
1337 REQUIRE_NO_FAIL(con.Query("DELETE FROM ontime;"));
1338 result = con.Query("COPY ontime FROM '" + ontime_csv + "' DELIMITER ',' HEADER;");
1339 REQUIRE(CHECK_COLUMN(result, 0, {9}));
1340
1341 result = con.Query("SELECT year, uniquecarrier, origin, origincityname, div5longestgtime FROM ontime;");
1342 REQUIRE(CHECK_COLUMN(result, 0, {1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988}));
1343 REQUIRE(CHECK_COLUMN(result, 1, {"AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA"}));
1344 REQUIRE(CHECK_COLUMN(result, 2, {"JFK", "JFK", "JFK", "JFK", "JFK", "JFK", "JFK", "JFK", "JFK"}));
1345 REQUIRE(CHECK_COLUMN(result, 3,
1346 {"New York, NY", "New York, NY", "New York, NY", "New York, NY", "New York, NY",
1347 "New York, NY", "New York, NY", "New York, NY", "New York, NY"}));
1348 REQUIRE(CHECK_COLUMN(result, 4, {Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value()}));
1349}
1350
1351TEST_CASE("Test copy from/to lineitem csv", "[copy]") {
1352 FileSystem fs;
1353 unique_ptr<QueryResult> result;
1354 DuckDB db(nullptr);
1355 Connection con(db);
1356
1357 auto csv_path = GetCSVPath();
1358 auto lineitem_csv = fs.JoinPath(csv_path, "lineitem.csv");
1359 WriteBinary(lineitem_csv, lineitem_sample, sizeof(lineitem_sample));
1360
1361 REQUIRE_NO_FAIL(con.Query(
1362 "CREATE TABLE lineitem(l_orderkey INT NOT NULL, l_partkey INT NOT NULL, l_suppkey INT NOT NULL, l_linenumber "
1363 "INT NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) "
1364 "NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR(1) NOT NULL, l_linestatus VARCHAR(1) NOT NULL, "
1365 "l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR(25) "
1366 "NOT NULL, l_shipmode VARCHAR(10) NOT NULL, l_comment VARCHAR(44) NOT NULL);"));
1367 result = con.Query("COPY lineitem FROM '" + lineitem_csv + "' DELIMITER '|'");
1368 REQUIRE(CHECK_COLUMN(result, 0, {10}));
1369
1370 result = con.Query("SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;");
1371 REQUIRE(CHECK_COLUMN(result, 0, {15519, 6731, 6370, 214, 2403, 1564}));
1372 REQUIRE(
1373 CHECK_COLUMN(result, 1,
1374 {"egular courts above the", "ly final dependencies: slyly bold ", "riously. regular, express dep",
1375 "lites. fluffily even de", " pending foxes. slyly re", "arefully slyly ex"}));
1376
1377 // test COPY ... TO ... with HEADER
1378 result = con.Query("COPY lineitem TO '" + lineitem_csv + "' (DELIMITER ' ', HEADER);");
1379 REQUIRE(CHECK_COLUMN(result, 0, {10}));
1380
1381 // clear the table
1382 REQUIRE_NO_FAIL(con.Query("DELETE FROM lineitem;"));
1383 result = con.Query("SELECT * FROM lineitem;");
1384 REQUIRE(CHECK_COLUMN(result, 0, {}));
1385
1386 // now copy back into the table
1387 result = con.Query("COPY lineitem FROM '" + lineitem_csv + "' DELIMITER ' ' HEADER;");
1388 REQUIRE(CHECK_COLUMN(result, 0, {10}));
1389
1390 result = con.Query("SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;");
1391 REQUIRE(CHECK_COLUMN(result, 0, {15519, 6731, 6370, 214, 2403, 1564}));
1392 REQUIRE(
1393 CHECK_COLUMN(result, 1,
1394 {"egular courts above the", "ly final dependencies: slyly bold ", "riously. regular, express dep",
1395 "lites. fluffily even de", " pending foxes. slyly re", "arefully slyly ex"}));
1396}
1397
1398TEST_CASE("Test copy from web_page csv", "[copy]") {
1399 FileSystem fs;
1400 unique_ptr<QueryResult> result;
1401 DuckDB db(nullptr);
1402 Connection con(db);
1403
1404 auto csv_path = GetCSVPath();
1405 auto webpage_csv = fs.JoinPath(csv_path, "web_page.csv");
1406 WriteBinary(webpage_csv, web_page, sizeof(web_page));
1407
1408 REQUIRE_NO_FAIL(con.Query(
1409 "CREATE TABLE web_page(wp_web_page_sk integer not null, wp_web_page_id char(16) not null, wp_rec_start_date "
1410 "date, wp_rec_end_date date, wp_creation_date_sk integer, wp_access_date_sk integer, wp_autogen_flag char(1), "
1411 "wp_customer_sk integer, wp_url varchar(100), wp_type char(50), wp_char_count integer, wp_link_count integer, "
1412 "wp_image_count integer, wp_max_ad_count integer, primary key (wp_web_page_sk));"));
1413
1414 result = con.Query("COPY web_page FROM '" + webpage_csv + "' DELIMITER '|';");
1415 REQUIRE(CHECK_COLUMN(result, 0, {60}));
1416
1417 result = con.Query("SELECT * FROM web_page ORDER BY wp_web_page_sk LIMIT 3;");
1418 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
1419 REQUIRE(CHECK_COLUMN(result, 1, {"AAAAAAAABAAAAAAA", "AAAAAAAACAAAAAAA", "AAAAAAAACAAAAAAA"}));
1420 REQUIRE(CHECK_COLUMN(result, 2, {Value::DATE(1997, 9, 3), Value::DATE(1997, 9, 3), Value::DATE(2000, 9, 3)}));
1421 REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value::DATE(2000, 9, 2), Value()}));
1422 REQUIRE(CHECK_COLUMN(result, 4, {2450810, 2450814, 2450814}));
1423 REQUIRE(CHECK_COLUMN(result, 5, {2452620, 2452580, 2452611}));
1424 REQUIRE(CHECK_COLUMN(result, 6, {"Y", "N", "N"}));
1425 REQUIRE(CHECK_COLUMN(result, 7, {98539, Value(), Value()}));
1426 REQUIRE(CHECK_COLUMN(result, 8, {"http://www.foo.com", "http://www.foo.com", "http://www.foo.com"}));
1427 REQUIRE(CHECK_COLUMN(result, 9, {"welcome", "protected", "feedback"}));
1428 REQUIRE(CHECK_COLUMN(result, 10, {2531, 1564, 1564}));
1429 REQUIRE(CHECK_COLUMN(result, 11, {8, 4, 4}));
1430 REQUIRE(CHECK_COLUMN(result, 12, {3, 3, 3}));
1431 REQUIRE(CHECK_COLUMN(result, 13, {4, 1, 4}));
1432}
1433
1434TEST_CASE("Test copy from greek-utf8 csv", "[copy]") {
1435 FileSystem fs;
1436 unique_ptr<QueryResult> result;
1437 DuckDB db(nullptr);
1438 Connection con(db);
1439
1440 auto csv_path = GetCSVPath();
1441 auto csv_file = fs.JoinPath(csv_path, "greek_utf8.csv");
1442 WriteBinary(csv_file, greek_utf8, sizeof(greek_utf8));
1443
1444 REQUIRE_NO_FAIL(con.Query("CREATE TABLE greek_utf8(i INTEGER, j VARCHAR, k INTEGER);"));
1445
1446 result = con.Query("COPY greek_utf8 FROM '" + csv_file + "' DELIMITER '|';");
1447 REQUIRE(CHECK_COLUMN(result, 0, {8}));
1448
1449 result = con.Query("SELECT * FROM greek_utf8 ORDER BY 1;");
1450 REQUIRE(CHECK_COLUMN(result, 0, {1689, 1690, 41561, 45804, 51981, 171067, 182773, 607808}));
1451 REQUIRE(CHECK_COLUMN(result, 1,
1452 {"\x30\x30\x69\\047\x6d", "\x30\x30\x69\\047\x76", "\x32\x30\x31\x35\xe2\x80\x8e",
1453 "\x32\x31\xcf\x80", "\x32\x34\x68\x6f\x75\x72\x73\xe2\x80\xac",
1454 "\x61\x72\x64\x65\xcc\x80\x63\x68", "\x61\xef\xac\x81",
1455 "\x70\x6f\x76\x65\x72\x74\x79\xe2\x80\xaa"}));
1456 REQUIRE(CHECK_COLUMN(result, 2, {2, 2, 1, 1, 1, 2, 1, 1}));
1457}
1458
1459TEST_CASE("Test copy from ncvoter csv", "[copy]") {
1460 FileSystem fs;
1461 unique_ptr<QueryResult> result;
1462 DuckDB db(nullptr);
1463 Connection con(db);
1464
1465 auto csv_path = GetCSVPath();
1466 auto ncvoter_csv = fs.JoinPath(csv_path, "ncvoter.csv");
1467 WriteBinary(ncvoter_csv, ncvoter, sizeof(ncvoter));
1468
1469 REQUIRE_NO_FAIL(con.Query(
1470 "CREATE TABLE IF NOT EXISTS ncvoters(county_id INTEGER, county_desc STRING, voter_reg_num STRING,status_cd "
1471 "STRING, voter_status_desc STRING, reason_cd STRING, voter_status_reason_desc STRING, absent_ind STRING, "
1472 "name_prefx_cd STRING,last_name STRING, first_name STRING, midl_name STRING, name_sufx_cd STRING, "
1473 "full_name_rep STRING,full_name_mail STRING, house_num STRING, half_code STRING, street_dir STRING, "
1474 "street_name STRING, street_type_cd STRING, street_sufx_cd STRING, unit_designator STRING, unit_num STRING, "
1475 "res_city_desc STRING,state_cd STRING, zip_code STRING, res_street_address STRING, res_city_state_zip STRING, "
1476 "mail_addr1 STRING, mail_addr2 STRING, mail_addr3 STRING, mail_addr4 STRING, mail_city STRING, mail_state "
1477 "STRING, mail_zipcode STRING, mail_city_state_zip STRING, area_cd STRING, phone_num STRING, full_phone_number "
1478 "STRING, drivers_lic STRING, race_code STRING, race_desc STRING, ethnic_code STRING, ethnic_desc STRING, "
1479 "party_cd STRING, party_desc STRING, sex_code STRING, sex STRING, birth_age STRING, birth_place STRING, "
1480 "registr_dt STRING, precinct_abbrv STRING, precinct_desc STRING,municipality_abbrv STRING, municipality_desc "
1481 "STRING, ward_abbrv STRING, ward_desc STRING, cong_dist_abbrv STRING, cong_dist_desc STRING, super_court_abbrv "
1482 "STRING, super_court_desc STRING, judic_dist_abbrv STRING, judic_dist_desc STRING, nc_senate_abbrv STRING, "
1483 "nc_senate_desc STRING, nc_house_abbrv STRING, nc_house_desc STRING,county_commiss_abbrv STRING, "
1484 "county_commiss_desc STRING, township_abbrv STRING, township_desc STRING,school_dist_abbrv STRING, "
1485 "school_dist_desc STRING, fire_dist_abbrv STRING, fire_dist_desc STRING, water_dist_abbrv STRING, "
1486 "water_dist_desc STRING, sewer_dist_abbrv STRING, sewer_dist_desc STRING, sanit_dist_abbrv STRING, "
1487 "sanit_dist_desc STRING, rescue_dist_abbrv STRING, rescue_dist_desc STRING, munic_dist_abbrv STRING, "
1488 "munic_dist_desc STRING, dist_1_abbrv STRING, dist_1_desc STRING, dist_2_abbrv STRING, dist_2_desc STRING, "
1489 "confidential_ind STRING, age STRING, ncid STRING, vtd_abbrv STRING, vtd_desc STRING);"));
1490 result = con.Query("COPY ncvoters FROM '" + ncvoter_csv + "' DELIMITER '\t';");
1491 REQUIRE(CHECK_COLUMN(result, 0, {10}));
1492
1493 result = con.Query("SELECT county_id, county_desc, vtd_desc, name_prefx_cd FROM ncvoters;");
1494 REQUIRE(CHECK_COLUMN(result, 0, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}));
1495 REQUIRE(CHECK_COLUMN(result, 1,
1496 {"ALAMANCE", "ALAMANCE", "ALAMANCE", "ALAMANCE", "ALAMANCE", "ALAMANCE", "ALAMANCE",
1497 "ALAMANCE", "ALAMANCE", "ALAMANCE"}));
1498 REQUIRE(CHECK_COLUMN(result, 2, {"09S", "09S", "03W", "09S", "1210", "035", "124", "06E", "035", "064"}));
1499 REQUIRE(CHECK_COLUMN(result, 3,
1500 {Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value()}));
1501}
1502
1503TEST_CASE("Test date copy", "[copy]") {
1504 FileSystem fs;
1505 unique_ptr<QueryResult> result;
1506 DuckDB db(nullptr);
1507 Connection con(db);
1508
1509 REQUIRE_NO_FAIL(con.Query("CREATE TABLE date_test(d date);"));
1510
1511 auto csv_path = GetCSVPath();
1512 auto date_csv = fs.JoinPath(csv_path, "date.csv");
1513 WriteCSV(date_csv, "2019-06-05\n");
1514
1515 result = con.Query("COPY date_test FROM '" + date_csv + "';");
1516 REQUIRE(CHECK_COLUMN(result, 0, {1}));
1517
1518 result = con.Query("SELECT cast(d as string) FROM date_test;");
1519 REQUIRE(CHECK_COLUMN(result, 0, {"2019-06-05"}));
1520}
1521
1522TEST_CASE("Test cranlogs broken gzip copy and temp table", "[copy][.]") {
1523 FileSystem fs;
1524 unique_ptr<QueryResult> result;
1525 DuckDB db(nullptr);
1526 Connection con(db);
1527
1528 auto csv_path = GetCSVPath();
1529 auto cranlogs_csv = fs.JoinPath(csv_path, "cranlogs.csv.gz");
1530 WriteBinary(cranlogs_csv, tmp2013_06_15, sizeof(tmp2013_06_15));
1531
1532 REQUIRE_NO_FAIL(con.Query("CREATE TEMPORARY TABLE cranlogs (date date,time string,size int,r_version string,r_arch "
1533 "string,r_os string,package string,version string,country string,ip_id int)"));
1534
1535 result = con.Query("COPY cranlogs FROM '" + cranlogs_csv + "' DELIMITER ',' HEADER;");
1536 REQUIRE(CHECK_COLUMN(result, 0, {37459}));
1537}
1538
1539TEST_CASE("Test imdb escapes", "[copy]") {
1540 FileSystem fs;
1541 unique_ptr<QueryResult> result;
1542 DuckDB db(nullptr);
1543 Connection con(db);
1544
1545 auto csv_path = GetCSVPath();
1546 auto imdb_movie_info = fs.JoinPath(csv_path, "imdb_movie_info.csv");
1547 WriteBinary(imdb_movie_info, imdb_movie_info_escaped, sizeof(imdb_movie_info_escaped));
1548
1549 REQUIRE_NO_FAIL(con.Query("CREATE TABLE movie_info (id integer NOT NULL PRIMARY KEY, movie_id integer NOT NULL, "
1550 "info_type_id integer NOT NULL, info text NOT NULL, note text);"));
1551
1552 result = con.Query("COPY movie_info FROM '" + imdb_movie_info + "' DELIMITER ',' ESCAPE '\\';");
1553 REQUIRE(result->success);
1554 REQUIRE(CHECK_COLUMN(result, 0, {201}));
1555
1556 // TODO: actually check results
1557 result = con.Query("SELECT * FROM movie_info;");
1558}
1559
1560TEST_CASE("Test read CSV function with lineitem", "[copy]") {
1561 FileSystem fs;
1562 unique_ptr<QueryResult> result;
1563 DuckDB db(nullptr);
1564 Connection con(db);
1565
1566 auto csv_path = GetCSVPath();
1567 auto lineitem_csv = fs.JoinPath(csv_path, "lineitem.csv");
1568 WriteBinary(lineitem_csv, lineitem_sample, sizeof(lineitem_sample));
1569
1570 // create a view using the read_csv function
1571 REQUIRE_NO_FAIL(con.Query(
1572 "CREATE VIEW lineitem AS SELECT * FROM read_csv('" + lineitem_csv +
1573 "', '|', STRUCT_PACK(l_orderkey := 'INT', l_partkey := 'INT', l_suppkey := 'INT', l_linenumber := 'INT', "
1574 "l_quantity := 'INTEGER', l_extendedprice := 'DOUBLE', l_discount := 'DOUBLE', l_tax := 'DOUBLE', l_returnflag "
1575 ":= 'VARCHAR', l_linestatus := 'VARCHAR', l_shipdate := 'DATE', l_commitdate := 'DATE', l_receiptdate := "
1576 "'DATE', l_shipinstruct := 'VARCHAR', l_shipmode := 'VARCHAR', l_comment := 'VARCHAR'));"));
1577
1578 // each of these will read the CSV again through the view
1579 result = con.Query("SELECT COUNT(*) FROM lineitem");
1580 REQUIRE(CHECK_COLUMN(result, 0, {10}));
1581
1582 result = con.Query("SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;");
1583 REQUIRE(CHECK_COLUMN(result, 0, {15519, 6731, 6370, 214, 2403, 1564}));
1584 REQUIRE(
1585 CHECK_COLUMN(result, 1,
1586 {"egular courts above the", "ly final dependencies: slyly bold ", "riously. regular, express dep",
1587 "lites. fluffily even de", " pending foxes. slyly re", "arefully slyly ex"}));
1588
1589 // test incorrect usage of read_csv function
1590 // wrong argument type
1591 REQUIRE_FAIL(con.Query("SELECT * FROM read_csv('" + lineitem_csv + "', '|', STRUCT_PACK(l_orderkey := 5))"));
1592}
1593
1594TEST_CASE("Test CSV with UTF8 NFC Normalization", "[copy]") {
1595 FileSystem fs;
1596 unique_ptr<QueryResult> result;
1597 DuckDB db(nullptr);
1598 Connection con(db);
1599
1600 auto csv_path = GetCSVPath();
1601 auto nfc_csv = fs.JoinPath(csv_path, "nfc.csv");
1602
1603 const char *nfc_content = "\xc3\xbc\n\x75\xcc\x88";
1604
1605 WriteBinary(nfc_csv, (const uint8_t *)nfc_content, strlen(nfc_content));
1606
1607 REQUIRE_NO_FAIL(con.Query("CREATE TABLE nfcstrings (s STRING);"));
1608 REQUIRE_NO_FAIL(con.Query("COPY nfcstrings FROM '" + nfc_csv + "';"));
1609
1610 result = con.Query("SELECT COUNT(*) FROM nfcstrings WHERE s = '\xc3\xbc'");
1611 REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(2)}));
1612}
1613
1614// http://www.unicode.org/Public/UCD/latest/ucd/NormalizationTest.txt
1615TEST_CASE("Test CSV with Unicode NFC Normalization test suite", "[copy]") {
1616 FileSystem fs;
1617 unique_ptr<QueryResult> result;
1618 DuckDB db(nullptr);
1619 Connection con(db);
1620
1621 auto csv_path = GetCSVPath();
1622 auto nfc_csv = fs.JoinPath(csv_path, "nfc_test_suite.csv");
1623
1624 WriteBinary(nfc_csv, nfc_normalization, sizeof(nfc_normalization));
1625
1626 REQUIRE_NO_FAIL(con.Query("CREATE TABLE nfcstrings (source STRING, nfc STRING, nfd STRING);"));
1627 REQUIRE_NO_FAIL(con.Query("COPY nfcstrings FROM '" + nfc_csv + "' DELIMITER '|';"));
1628
1629 result = con.Query("SELECT COUNT(*) FROM nfcstrings");
1630 REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(18819)}));
1631
1632 result = con.Query("SELECT COUNT(*) FROM nfcstrings WHERE source=nfc");
1633 REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(18819)}));
1634
1635 result = con.Query("SELECT COUNT(*) FROM nfcstrings WHERE nfc=nfd");
1636 REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(18819)}));
1637}
1638
1639TEST_CASE("Test CSV reading/writing from relations", "[relation_api]") {
1640 DuckDB db(nullptr);
1641 Connection con(db);
1642 unique_ptr<QueryResult> result;
1643
1644 // write a bunch of values to a CSV
1645 auto csv_file = TestCreatePath("relationtest.csv");
1646
1647 con.Values("(1), (2), (3)", {"i"})->WriteCSV(csv_file);
1648
1649 // now scan the CSV file
1650 auto csv_scan = con.ReadCSV(csv_file, {"i INTEGER"});
1651 result = csv_scan->Execute();
1652 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
1653
1654 REQUIRE_THROWS(con.ReadCSV(csv_file, {"i INTEGER); SELECT 42;--"}));
1655}
1656