1 | #include "catch.hpp" |
2 | #include "duckdb/common/file_system.hpp" |
3 | #include "duckdb/common/types/date.hpp" |
4 | #include "test_csv_header.hpp" |
5 | #include "test_helpers.hpp" |
6 | |
7 | #include <fstream> |
8 | |
9 | using namespace duckdb; |
10 | using namespace std; |
11 | |
12 | TEST_CASE("Test copy statement" , "[copy]" ) { |
13 | FileSystem fs; |
14 | unique_ptr<QueryResult> result; |
15 | DuckDB db(nullptr); |
16 | Connection con(db); |
17 | |
18 | auto csv_path = GetCSVPath(); |
19 | |
20 | // generate CSV file with ',' as delimiter and complex strings |
21 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
22 | for (int i = 0; i < 5000; i++) { |
23 | from_csv_file << i << "," << i << ", test" << endl; |
24 | } |
25 | from_csv_file.close(); |
26 | |
27 | // load CSV file into a table |
28 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
29 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
30 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
31 | |
32 | result = con.Query("SELECT COUNT(a), SUM(a) FROM test;" ); |
33 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
34 | REQUIRE(CHECK_COLUMN(result, 1, {12497500})); |
35 | |
36 | result = con.Query("SELECT * FROM test ORDER BY 1 LIMIT 3;" ); |
37 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
38 | REQUIRE(CHECK_COLUMN(result, 1, {0, 1, 2})); |
39 | REQUIRE(CHECK_COLUMN(result, 2, {" test" , " test" , " test" })); |
40 | |
41 | // create CSV file from table |
42 | result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test2.csv" ) + "';" ); |
43 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
44 | // load the same CSV file back again |
45 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test2 (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
46 | result = con.Query("COPY test2 FROM '" + fs.JoinPath(csv_path, "test2.csv" ) + "';" ); |
47 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
48 | result = con.Query("SELECT * FROM test2 ORDER BY 1 LIMIT 3;" ); |
49 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
50 | REQUIRE(CHECK_COLUMN(result, 1, {0, 1, 2})); |
51 | REQUIRE(CHECK_COLUMN(result, 2, {" test" , " test" , " test" })); |
52 | |
53 | // test too few rows |
54 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test_too_few_rows (a INTEGER, b INTEGER, c VARCHAR, d INTEGER);" )); |
55 | REQUIRE_FAIL(con.Query("COPY test_too_few_rows FROM '" + fs.JoinPath(csv_path, "test2.csv" ) + "';" )); |
56 | |
57 | // create CSV file from query |
58 | result = con.Query("COPY (SELECT a,b FROM test WHERE a < 4000) TO '" + fs.JoinPath(csv_path, "test3.csv" ) + "';" ); |
59 | REQUIRE(CHECK_COLUMN(result, 0, {4000})); |
60 | // load the same CSV file back again |
61 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test3 (a INTEGER, b INTEGER);" )); |
62 | result = con.Query("COPY test3 FROM '" + fs.JoinPath(csv_path, "test3.csv" ) + "';" ); |
63 | REQUIRE(CHECK_COLUMN(result, 0, {4000})); |
64 | result = con.Query("SELECT * FROM test3 ORDER BY 1 LIMIT 3;" ); |
65 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
66 | REQUIRE(CHECK_COLUMN(result, 1, {0, 1, 2})); |
67 | |
68 | // export selected columns from a table to a CSV file |
69 | result = |
70 | con.Query("COPY test (a,c) TO '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (DELIMITER ',', HEADER false);" ); |
71 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
72 | |
73 | // import selected columns from CSV file |
74 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test4 (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
75 | result = con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (DELIM ',', HEADER 0);" ); |
76 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
77 | result = con.Query("SELECT * FROM test4 ORDER BY 1 LIMIT 3;" ); |
78 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
79 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), Value(), Value()})); |
80 | REQUIRE(CHECK_COLUMN(result, 2, {" test" , " test" , " test" })); |
81 | |
82 | // unsupported type for HEADER |
83 | REQUIRE_FAIL( |
84 | con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (SEP ',', HEADER 0.2);" )); |
85 | // empty delimiter |
86 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (SEP);" )); |
87 | // number as delimiter |
88 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (SEP 1);" )); |
89 | // multiple format options |
90 | REQUIRE_FAIL( |
91 | con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (FORMAT 'csv', FORMAT 'json');" )); |
92 | // number as escape string |
93 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (ESCAPE 1);" )); |
94 | // no escape string |
95 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (ESCAPE);" )); |
96 | // number as quote string |
97 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (QUOTE 1);" )); |
98 | // no quote string |
99 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (QUOTE);" )); |
100 | // no format string |
101 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (FORMAT);" )); |
102 | // encoding must not be empty and must have the correct parameter type and value |
103 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (ENCODING);" )); |
104 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (ENCODING 42);" )); |
105 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (ENCODING 'utf-42');" )); |
106 | // don't allow for non-existant copy options |
107 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (MAGIC '42');" )); |
108 | |
109 | // use a different delimiter |
110 | auto pipe_csv = fs.JoinPath(csv_path, "test_pipe.csv" ); |
111 | ofstream from_csv_file_pipe(pipe_csv); |
112 | for (int i = 0; i < 10; i++) { |
113 | from_csv_file_pipe << i << "|" << i << "|test" << endl; |
114 | } |
115 | from_csv_file_pipe.close(); |
116 | |
117 | // create new table |
118 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
119 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
120 | result = con.Query("COPY test FROM '" + pipe_csv + "' (SEPARATOR '|');" ); |
121 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
122 | |
123 | // throw exception if a line contains too many values |
124 | ofstream csv_too_many_values_file(fs.JoinPath(csv_path, "too_many_values.csv" )); |
125 | csv_too_many_values_file << "1,2,3,4" << endl; |
126 | csv_too_many_values_file.close(); |
127 | REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "too_many_values.csv" ) + "';" )); |
128 | |
129 | // test default null string |
130 | auto null_csv = fs.JoinPath(csv_path, "null.csv" ); |
131 | ofstream from_csv_file_null(null_csv); |
132 | for (int i = 0; i < 1; i++) |
133 | from_csv_file_null << i << "||test" << endl; |
134 | from_csv_file_null.close(); |
135 | result = con.Query("COPY test FROM '" + null_csv + "' DELIMITER '|';" ); |
136 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
137 | |
138 | // test invalid UTF-8 |
139 | auto invalid_utf_csv = fs.JoinPath(csv_path, "invalid_utf.csv" ); |
140 | ofstream from_csv_file_utf(invalid_utf_csv); |
141 | for (int i = 0; i < 1; i++) |
142 | from_csv_file_utf << i << "42|42|\xe2\x82\x28" << endl; |
143 | from_csv_file_utf.close(); |
144 | REQUIRE_FAIL(con.Query("COPY test FROM '" + invalid_utf_csv + "' DELIMITER '|';" )); |
145 | |
146 | // empty file |
147 | ofstream empty_file(fs.JoinPath(csv_path, "empty.csv" )); |
148 | empty_file.close(); |
149 | |
150 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE empty_table (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
151 | result = con.Query("COPY empty_table FROM '" + fs.JoinPath(csv_path, "empty.csv" ) + "';" ); |
152 | REQUIRE(CHECK_COLUMN(result, 0, {0})); |
153 | |
154 | // unterminated quotes |
155 | ofstream unterminated_quotes_file(fs.JoinPath(csv_path, "unterminated.csv" )); |
156 | unterminated_quotes_file << "\"hello\n\n world\n" ; |
157 | unterminated_quotes_file.close(); |
158 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE unterminated (a VARCHAR);" )); |
159 | REQUIRE_FAIL(con.Query("COPY unterminated FROM '" + fs.JoinPath(csv_path, "unterminated.csv" ) + "';" )); |
160 | |
161 | // 1024 rows (vector size) |
162 | ofstream csv_vector_size(fs.JoinPath(csv_path, "vsize.csv" )); |
163 | for (int i = 0; i < STANDARD_VECTOR_SIZE; i++) { |
164 | csv_vector_size << i << "," << i << ", test" << endl; |
165 | } |
166 | csv_vector_size.close(); |
167 | |
168 | // load CSV file into a table |
169 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE vsize (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
170 | result = con.Query("COPY vsize FROM '" + fs.JoinPath(csv_path, "vsize.csv" ) + "';" ); |
171 | REQUIRE(CHECK_COLUMN(result, 0, {STANDARD_VECTOR_SIZE})); |
172 | } |
173 | |
174 | TEST_CASE("Test CSV file without trailing newline" , "[copy]" ) { |
175 | FileSystem fs; |
176 | unique_ptr<QueryResult> result; |
177 | DuckDB db(nullptr); |
178 | Connection con(db); |
179 | |
180 | auto csv_path = GetCSVPath(); |
181 | |
182 | // no newline at end of file with simple delimiter |
183 | ofstream csv_no_newline(fs.JoinPath(csv_path, "no_newline.csv" )); |
184 | for (int i = 0; i < 1024; i++) { |
185 | csv_no_newline << i << "," << i << ", test" << (i + 1 < 1024 ? "\n" : "" ); |
186 | } |
187 | csv_no_newline.close(); |
188 | |
189 | // load CSV file into a table |
190 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE no_newline (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
191 | result = con.Query("COPY no_newline FROM '" + fs.JoinPath(csv_path, "no_newline.csv" ) + "';" ); |
192 | REQUIRE(CHECK_COLUMN(result, 0, {1024})); |
193 | |
194 | // no newline at end of file with unicode delimiter |
195 | ofstream csv_no_newline_unicode(fs.JoinPath(csv_path, "no_newline_unicode.csv" )); |
196 | for (int i = 0; i < 1024; i++) { |
197 | csv_no_newline_unicode << i << "🦆" << i << "🦆 test" << (i + 1 < 1024 ? "\n" : "" ); |
198 | } |
199 | csv_no_newline_unicode.close(); |
200 | |
201 | // load CSV file into a table |
202 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE no_newline_unicode (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
203 | result = con.Query("COPY no_newline_unicode FROM '" + fs.JoinPath(csv_path, "no_newline_unicode.csv" ) + |
204 | "' DELIMITER '🦆';" ); |
205 | REQUIRE(CHECK_COLUMN(result, 0, {1024})); |
206 | } |
207 | |
208 | TEST_CASE("Test CSVs with repeating patterns in delimiter/escape/quote" , "[copy]" ) { |
209 | FileSystem fs; |
210 | unique_ptr<QueryResult> result; |
211 | DuckDB db(nullptr); |
212 | Connection con(db); |
213 | |
214 | auto csv_dir = GetCSVPath(); |
215 | auto csv_path = fs.JoinPath(csv_dir, "abac.csv" ); |
216 | |
217 | SECTION("ABAC delimiter" ) { |
218 | ofstream csv_stream(csv_path); |
219 | // this is equivalent to "AB|ABAB|" |
220 | csv_stream << "ABABACABABABAC" ; |
221 | csv_stream.close(); |
222 | |
223 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR, c VARCHAR);" )); |
224 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'ABAC';" ); |
225 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
226 | |
227 | result = con.Query("SELECT * FROM abac_tbl" ); |
228 | REQUIRE(CHECK_COLUMN(result, 0, {"AB" })); |
229 | REQUIRE(CHECK_COLUMN(result, 1, {"ABAB" })); |
230 | REQUIRE(CHECK_COLUMN(result, 2, {Value()})); |
231 | |
232 | // do the same but with a large unused quote specifier |
233 | REQUIRE_NO_FAIL(con.Query("DELETE FROM abac_tbl;" )); |
234 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'ABAC' QUOTE 'ABABABABABAB';" ); |
235 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
236 | |
237 | result = con.Query("SELECT * FROM abac_tbl" ); |
238 | REQUIRE(CHECK_COLUMN(result, 0, {"AB" })); |
239 | REQUIRE(CHECK_COLUMN(result, 1, {"ABAB" })); |
240 | REQUIRE(CHECK_COLUMN(result, 2, {Value()})); |
241 | } |
242 | SECTION("Mix of complex quotes/delimiters/escapes" ) { |
243 | ofstream csv_stream(csv_path); |
244 | // quote -> "ABAB" |
245 | // escape -> "ABAC" |
246 | // delimiter -> "ABAD" |
247 | // first value is an escaped quote (ABAB) |
248 | // second value is a quoted delimiter followed by an escaped quote |
249 | // third value is an escape outside of a set of quotes (interpreted as a literal value) |
250 | csv_stream << "ABABABACABABABABABADABABABADABACABABABABABADABAC" ; |
251 | csv_stream.close(); |
252 | |
253 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR, c VARCHAR);" )); |
254 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'ABAD' QUOTE 'ABAB' ESCAPE 'ABAC';" ); |
255 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
256 | |
257 | result = con.Query("SELECT * FROM abac_tbl" ); |
258 | REQUIRE(CHECK_COLUMN(result, 0, {"ABAB" })); |
259 | REQUIRE(CHECK_COLUMN(result, 1, {"ABADABAB" })); |
260 | REQUIRE(CHECK_COLUMN(result, 2, {"ABAC" })); |
261 | } |
262 | SECTION("CSV terminates in the middle of quote parsing" ) { |
263 | ofstream csv_stream(csv_path); |
264 | csv_stream << "ABAB" ; |
265 | csv_stream.close(); |
266 | |
267 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
268 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABABABABAB';" ); |
269 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
270 | |
271 | result = con.Query("SELECT * FROM abac_tbl" ); |
272 | REQUIRE(CHECK_COLUMN(result, 0, {"ABAB" })); |
273 | } |
274 | SECTION("Newline in the middle of quote parsing" ) { |
275 | ofstream csv_stream(csv_path); |
276 | csv_stream << "ABAB\nABAB" ; |
277 | csv_stream.close(); |
278 | |
279 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
280 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABABABABAB';" ); |
281 | REQUIRE(CHECK_COLUMN(result, 0, {2})); |
282 | |
283 | result = con.Query("SELECT * FROM abac_tbl" ); |
284 | REQUIRE(CHECK_COLUMN(result, 0, {"ABAB" , "ABAB" })); |
285 | } |
286 | SECTION("Simple quote terminates immediately results in error" ) { |
287 | ofstream csv_stream(csv_path); |
288 | csv_stream << "\"" ; |
289 | csv_stream.close(); |
290 | |
291 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
292 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"';" )); |
293 | } |
294 | SECTION("File ends in quoted value (simple)" ) { |
295 | ofstream csv_stream(csv_path); |
296 | csv_stream << "\"\"" ; |
297 | csv_stream.close(); |
298 | |
299 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
300 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"';" ); |
301 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
302 | |
303 | result = con.Query("SELECT * FROM abac_tbl" ); |
304 | REQUIRE(CHECK_COLUMN(result, 0, {Value()})); |
305 | } |
306 | SECTION("File ends in quoted value (complex)" ) { |
307 | ofstream csv_stream(csv_path); |
308 | csv_stream << "\"\"" ; |
309 | csv_stream.close(); |
310 | |
311 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
312 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"' DELIMITER 'AAAB';" ); |
313 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
314 | |
315 | result = con.Query("SELECT * FROM abac_tbl" ); |
316 | REQUIRE(CHECK_COLUMN(result, 0, {Value()})); |
317 | } |
318 | SECTION("Simple quote terminates after escape results in error" ) { |
319 | ofstream csv_stream(csv_path); |
320 | csv_stream << "\"\\\"" ; |
321 | csv_stream.close(); |
322 | |
323 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
324 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"' ESCAPE '\\';" )); |
325 | } |
326 | SECTION("Simple quote terminates after quote escape results in error" ) { |
327 | ofstream csv_stream(csv_path); |
328 | csv_stream << "\"\"\"" ; |
329 | csv_stream.close(); |
330 | |
331 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
332 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"' ESCAPE '\"';" )); |
333 | } |
334 | SECTION("Simple quote terminates after escape results in error" ) { |
335 | ofstream csv_stream(csv_path); |
336 | csv_stream << "\"\\" ; |
337 | csv_stream.close(); |
338 | |
339 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
340 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"' ESCAPE '\\';" )); |
341 | } |
342 | SECTION("Multi-byte quote terminates immediately results in error" ) { |
343 | ofstream csv_stream(csv_path); |
344 | csv_stream << "ABABAC" ; |
345 | csv_stream.close(); |
346 | |
347 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
348 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABABAC';" )); |
349 | } |
350 | SECTION("Quote followed by incomplete multi-byte delimiter" ) { |
351 | ofstream csv_stream(csv_path); |
352 | csv_stream << "\"\"AB" ; |
353 | csv_stream.close(); |
354 | |
355 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
356 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'ABAC';" )); |
357 | REQUIRE_NO_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'AB';" )); |
358 | } |
359 | SECTION("Multi-byte quote terminates after escape results in error" ) { |
360 | ofstream csv_stream(csv_path); |
361 | csv_stream << "ABACABABABAC" ; |
362 | csv_stream.close(); |
363 | |
364 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
365 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABAC' ESCAPE 'ABAB';" )); |
366 | } |
367 | SECTION("Multi-byte quote terminates after quote escape results in error" ) { |
368 | ofstream csv_stream(csv_path); |
369 | csv_stream << "ABACABACABAC" ; |
370 | csv_stream.close(); |
371 | |
372 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
373 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABAC' ESCAPE 'ABAC';" )); |
374 | } |
375 | SECTION("Multi-byte quote terminates after escape results in error" ) { |
376 | ofstream csv_stream(csv_path); |
377 | csv_stream << "ABACABAB" ; |
378 | csv_stream.close(); |
379 | |
380 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
381 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABAC' ESCAPE 'ABAB';" )); |
382 | } |
383 | SECTION("Delimiter, quote and escape have a maximum size of 255 bytes" ) { |
384 | ofstream csv_stream(csv_path); |
385 | csv_stream << "ABAB" ; |
386 | csv_stream.close(); |
387 | |
388 | string long_string(1000, 'a'); |
389 | |
390 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
391 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '" + long_string + "';" )); |
392 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' ESCAPE '" + long_string + "';" )); |
393 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER '" + long_string + "';" )); |
394 | REQUIRE_NO_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'BLABLABLA';" )); |
395 | } |
396 | SECTION("Test \r newline with multi-byte delimiter" ) { |
397 | ofstream csv_stream(csv_path); |
398 | csv_stream << "ABAB\rABAC\r" ; |
399 | csv_stream.close(); |
400 | |
401 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);" )); |
402 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';" ); |
403 | REQUIRE(CHECK_COLUMN(result, 0, {2})); |
404 | |
405 | result = con.Query("SELECT * FROM abac_tbl" ); |
406 | REQUIRE(CHECK_COLUMN(result, 0, {"A" , "A" })); |
407 | REQUIRE(CHECK_COLUMN(result, 1, {"B" , "C" })); |
408 | } |
409 | SECTION("Test \r\n newline with multi-byte delimiter" ) { |
410 | ofstream csv_stream(csv_path); |
411 | csv_stream << "ABAB\r\nABAC\r\n" ; |
412 | csv_stream.close(); |
413 | |
414 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);" )); |
415 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';" ); |
416 | REQUIRE(CHECK_COLUMN(result, 0, {2})); |
417 | |
418 | result = con.Query("SELECT * FROM abac_tbl" ); |
419 | REQUIRE(CHECK_COLUMN(result, 0, {"A" , "A" })); |
420 | REQUIRE(CHECK_COLUMN(result, 1, {"B" , "C" })); |
421 | } |
422 | SECTION("Test unterminated quotes with multi-line delimiter" ) { |
423 | ofstream csv_stream(csv_path); |
424 | csv_stream << "\"AAA" ; |
425 | csv_stream.close(); |
426 | |
427 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
428 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';" )); |
429 | } |
430 | SECTION("Test unquote not followed by delimiter" ) { |
431 | ofstream csv_stream(csv_path); |
432 | csv_stream << "\"AAA\"BB" ; |
433 | csv_stream.close(); |
434 | |
435 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);" )); |
436 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';" )); |
437 | } |
438 | SECTION("Test escape followed by non-quote and non-escape (single-byte)" ) { |
439 | ofstream csv_stream(csv_path); |
440 | csv_stream << "\"AAA\\BB\"|A" ; |
441 | csv_stream.close(); |
442 | |
443 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);" )); |
444 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER '|' ESCAPE '\\';" )); |
445 | } |
446 | SECTION("Test escape followed by non-quote and non-escape (multi-byte)" ) { |
447 | ofstream csv_stream(csv_path); |
448 | csv_stream << "\"AAA\\BB\"BAA" ; |
449 | csv_stream.close(); |
450 | |
451 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);" )); |
452 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA' ESCAPE '\\';" )); |
453 | } |
454 | SECTION("Test file end after delimiter with multi-byte delimiter" ) { |
455 | ofstream csv_stream(csv_path); |
456 | csv_stream << "AAABA" ; |
457 | csv_stream.close(); |
458 | |
459 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
460 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';" ); |
461 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
462 | |
463 | result = con.Query("SELECT * FROM abac_tbl" ); |
464 | REQUIRE(CHECK_COLUMN(result, 0, {"AAA" })); |
465 | } |
466 | SECTION("Test file end after delimiter with single-byte delimiter" ) { |
467 | ofstream csv_stream(csv_path); |
468 | csv_stream << "AAA|" ; |
469 | csv_stream.close(); |
470 | |
471 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
472 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER '|';" ); |
473 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
474 | |
475 | result = con.Query("SELECT * FROM abac_tbl" ); |
476 | REQUIRE(CHECK_COLUMN(result, 0, {"AAA" })); |
477 | } |
478 | } |
479 | |
480 | TEST_CASE("Test long value with escapes" , "[copy]" ) { |
481 | FileSystem fs; |
482 | unique_ptr<QueryResult> result; |
483 | DuckDB db(nullptr); |
484 | Connection con(db); |
485 | |
486 | auto csv_path = GetCSVPath(); |
487 | |
488 | string value = string(10000, 'a') + "\"\"" + string(20000, 'b'); |
489 | string expected_value = string(10000, 'a') + "\"" + string(20000, 'b'); |
490 | |
491 | // long value with escape and simple delimiter |
492 | ofstream long_escaped_value(fs.JoinPath(csv_path, "long_escaped_value.csv" )); |
493 | long_escaped_value << 1 << "🦆" << 2 << "🦆" |
494 | << "\"" << value << "\"" << endl; |
495 | long_escaped_value.close(); |
496 | |
497 | // load CSV file into a table |
498 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE long_escaped_value (a INTEGER, b INTEGER, c VARCHAR);" )); |
499 | result = con.Query("COPY long_escaped_value FROM '" + fs.JoinPath(csv_path, "long_escaped_value.csv" ) + |
500 | "' DELIMITER '🦆';" ); |
501 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
502 | |
503 | result = con.Query("SELECT * FROM long_escaped_value" ); |
504 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
505 | REQUIRE(CHECK_COLUMN(result, 1, {2})); |
506 | REQUIRE(CHECK_COLUMN(result, 2, {expected_value})); |
507 | |
508 | // long value with escape and complex delimiter |
509 | ofstream long_escaped_value_unicode(fs.JoinPath(csv_path, "long_escaped_value_unicode.csv" )); |
510 | long_escaped_value_unicode << 1 << "," << 2 << "," |
511 | << "\"" << value << "\"" << endl; |
512 | long_escaped_value_unicode.close(); |
513 | |
514 | // load CSV file into a table |
515 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE long_escaped_value_unicode (a INTEGER, b INTEGER, c VARCHAR);" )); |
516 | result = con.Query("COPY long_escaped_value_unicode FROM '" + |
517 | fs.JoinPath(csv_path, "long_escaped_value_unicode.csv" ) + "';" ); |
518 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
519 | |
520 | result = con.Query("SELECT * FROM long_escaped_value_unicode" ); |
521 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
522 | REQUIRE(CHECK_COLUMN(result, 1, {2})); |
523 | REQUIRE(CHECK_COLUMN(result, 2, {expected_value})); |
524 | } |
525 | |
526 | TEST_CASE("Test NULL option of copy statement" , "[copy]" ) { |
527 | FileSystem fs; |
528 | unique_ptr<QueryResult> result; |
529 | DuckDB db(nullptr); |
530 | Connection con(db); |
531 | |
532 | auto csv_path = GetCSVPath(); |
533 | |
534 | // generate CSV file with default delimiter |
535 | ofstream from_csv_file(fs.JoinPath(csv_path, "test_null_option.csv" )); |
536 | for (int i = 0; i < 3; i++) { |
537 | from_csv_file << i << ",,\"test\",null" << endl; |
538 | } |
539 | from_csv_file.close(); |
540 | |
541 | // create a table |
542 | REQUIRE_NO_FAIL(con.Query( |
543 | "CREATE TABLE test_null_option (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10), col_d VARCHAR(10));" )); |
544 | |
545 | // test COPY ... FROM ... |
546 | |
547 | // implicitly using default NULL value |
548 | result = con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + "';" ); |
549 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
550 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
551 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
552 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), Value(), Value()})); |
553 | REQUIRE(CHECK_COLUMN(result, 2, {"test" , "test" , "test" })); |
554 | REQUIRE(CHECK_COLUMN(result, 3, {"null" , "null" , "null" })); |
555 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;" )); |
556 | |
557 | // explicitly using default NULL value |
558 | result = con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + "' (NULL '');" ); |
559 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
560 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
561 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
562 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), Value(), Value()})); |
563 | REQUIRE(CHECK_COLUMN(result, 2, {"test" , "test" , "test" })); |
564 | REQUIRE(CHECK_COLUMN(result, 3, {"null" , "null" , "null" })); |
565 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;" )); |
566 | |
567 | // make sure a quoted null string is interpreted as a null value |
568 | result = |
569 | con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + "' (NULL 'test');" ); |
570 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
571 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
572 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
573 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "" })); |
574 | REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), Value()})); |
575 | REQUIRE(CHECK_COLUMN(result, 3, {"null" , "null" , "null" })); |
576 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;" )); |
577 | |
578 | // setting specific NULL value |
579 | result = |
580 | con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + "' (NULL 'null');" ); |
581 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
582 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
583 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
584 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "" })); |
585 | REQUIRE(CHECK_COLUMN(result, 2, {"test" , "test" , "test" })); |
586 | REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()})); |
587 | |
588 | // invalid parameter type |
589 | REQUIRE_FAIL( |
590 | con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + "' (NULL null);" )); |
591 | |
592 | // delimiter must not appear in the NULL specification |
593 | REQUIRE_FAIL(con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + |
594 | "' (NULL 'null,');" )); |
595 | REQUIRE_FAIL(con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + |
596 | "' (DELIMITER 'null', NULL 'null');" )); |
597 | REQUIRE_FAIL(con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + |
598 | "' (DELIMITER 'null', NULL 'nu');" )); |
599 | |
600 | // no parameter type |
601 | REQUIRE_FAIL( |
602 | con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + "' (NULL);" )); |
603 | |
604 | // empty integer column with non-default NULL string |
605 | REQUIRE_NO_FAIL(con.Query( |
606 | "CREATE TABLE test_null_option_2 (col_a INTEGER, col_b INTEGER, col_c VARCHAR(10), col_d VARCHAR(10));" )); |
607 | REQUIRE_FAIL(con.Query("COPY test_null_option_2 FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + |
608 | "' (NULL 'null');" )); |
609 | |
610 | // test COPY ... TO ... |
611 | |
612 | // implicitly using default NULL value |
613 | result = con.Query("COPY test_null_option TO '" + fs.JoinPath(csv_path, "test_null_option_2.csv" ) + "';" ); |
614 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
615 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;" )); |
616 | result = con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option_2.csv" ) + "';" ); |
617 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
618 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
619 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
620 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "" })); |
621 | REQUIRE(CHECK_COLUMN(result, 2, {"test" , "test" , "test" })); |
622 | REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()})); |
623 | |
624 | // explicitly using default NULL value |
625 | result = con.Query("COPY test_null_option TO '" + fs.JoinPath(csv_path, "test_null_option_3.csv" ) + "' (NULL '');" ); |
626 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
627 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;" )); |
628 | result = |
629 | con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option_3.csv" ) + "' (NULL '');" ); |
630 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
631 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
632 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
633 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "" })); |
634 | REQUIRE(CHECK_COLUMN(result, 2, {"test" , "test" , "test" })); |
635 | REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()})); |
636 | |
637 | // setting specific NULL value |
638 | result = |
639 | con.Query("COPY test_null_option TO '" + fs.JoinPath(csv_path, "test_null_option_4.csv" ) + "' (NULL 'null');" ); |
640 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
641 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;" )); |
642 | result = con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option_4.csv" ) + |
643 | "' (NULL 'null');" ); |
644 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
645 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
646 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
647 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "" })); |
648 | REQUIRE(CHECK_COLUMN(result, 2, {"test" , "test" , "test" })); |
649 | REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()})); |
650 | } |
651 | |
652 | TEST_CASE("Test force_quote and force_not_null" , "[copy]" ) { |
653 | FileSystem fs; |
654 | unique_ptr<QueryResult> result; |
655 | DuckDB db(nullptr); |
656 | Connection con(db); |
657 | |
658 | auto csv_path = GetCSVPath(); |
659 | |
660 | // generate CSV file with default delimiter |
661 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
662 | from_csv_file << 8 << ",test,tea" << endl; |
663 | for (int i = 0; i < 2; i++) { |
664 | from_csv_file << i << ",,test" << endl; |
665 | } |
666 | from_csv_file.close(); |
667 | |
668 | // generate another CSV file |
669 | ofstream from_csv_file_2(fs.JoinPath(csv_path, "test_2.csv" )); |
670 | from_csv_file_2 << ",test,tea" << endl; |
671 | for (int i = 0; i < 2; i++) { |
672 | from_csv_file_2 << i << ",,test" << endl; |
673 | } |
674 | from_csv_file_2.close(); |
675 | |
676 | // create a table |
677 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10));" )); |
678 | |
679 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
680 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
681 | |
682 | // test FORCE_QUOTE * |
683 | result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_star.csv" ) + "' (FORCE_QUOTE *);" ); |
684 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
685 | |
686 | vector<string> lines; |
687 | string line; |
688 | ifstream test_star_file(fs.JoinPath(csv_path, "test_star.csv" )); |
689 | if (test_star_file.is_open()) { |
690 | while (getline(test_star_file, line)) { |
691 | lines.push_back(line); |
692 | } |
693 | test_star_file.close(); |
694 | } else { |
695 | throw Exception("Unable to open file: " + fs.JoinPath(csv_path, "test_star.csv" )); |
696 | }; |
697 | REQUIRE(lines[0] == "\"8\",\"test\",\"tea\"" ); |
698 | REQUIRE(lines[1] == "\"0\",,\"test\"" ); |
699 | REQUIRE(lines[2] == "\"1\",,\"test\"" ); |
700 | |
701 | // test FORCE_QUOTE with specific columns and non-default quote character and non-default null character |
702 | result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_chosen_columns.csv" ) + |
703 | "' (FORCE_QUOTE (col_a, col_c), QUOTE 't', NULL 'tea');" ); |
704 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
705 | |
706 | ifstream test_columns_file(fs.JoinPath(csv_path, "test_chosen_columns.csv" )); |
707 | if (test_columns_file.is_open()) { |
708 | while (getline(test_columns_file, line)) { |
709 | lines.push_back(line); |
710 | } |
711 | test_columns_file.close(); |
712 | } else { |
713 | throw Exception("Unable to open file: " + fs.JoinPath(csv_path, "test_chosen_columns.csv" )); |
714 | }; |
715 | REQUIRE(lines[3] == "t8t,tttesttt,ttteat" ); |
716 | REQUIRE(lines[4] == "t0t,tea,tttesttt" ); |
717 | REQUIRE(lines[5] == "t1t,tea,tttesttt" ); |
718 | |
719 | // test FORCE_QUOTE with reordered columns |
720 | result = con.Query("COPY test (col_b, col_c, col_a) TO '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + |
721 | "' (FORCE_QUOTE (col_c, col_b), NULL 'test');" ); |
722 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
723 | |
724 | ifstream test_reorder_file(fs.JoinPath(csv_path, "test_reorder.csv" )); |
725 | if (test_reorder_file.is_open()) { |
726 | while (getline(test_reorder_file, line)) { |
727 | lines.push_back(line); |
728 | } |
729 | test_reorder_file.close(); |
730 | } else { |
731 | throw Exception("Unable to open file: " + fs.JoinPath(csv_path, "test_reorder.csv" )); |
732 | }; |
733 | REQUIRE(lines[6] == "\"test\",\"tea\",8" ); |
734 | REQUIRE(lines[7] == "test,\"test\",0" ); |
735 | REQUIRE(lines[8] == "test,\"test\",1" ); |
736 | |
737 | // test using a column in FORCE_QUOTE that is not set as output, but that is a column of the table |
738 | REQUIRE_FAIL(con.Query("COPY test (col_b, col_a) TO '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + |
739 | "' (FORCE_QUOTE (col_c, col_b));" )); |
740 | // test using a column in FORCE_QUOTE that is not a column of the table |
741 | REQUIRE_FAIL( |
742 | con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + "' (FORCE_QUOTE (col_c, col_d));" )); |
743 | // FORCE_QUOTE is only supported in COPY ... TO ... |
744 | REQUIRE_FAIL( |
745 | con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + "' (FORCE_QUOTE (col_c, col_d));" )); |
746 | // FORCE_QUOTE must not be empty and must have the correct parameter type |
747 | REQUIRE_FAIL(con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + "' (FORCE_QUOTE);" )); |
748 | REQUIRE_FAIL(con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + "' (FORCE_QUOTE 42);" )); |
749 | |
750 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test;" )); |
751 | |
752 | // test FORCE_NOT_NULL |
753 | |
754 | // test if null value is correctly converted into string |
755 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_star.csv" ) + |
756 | "' (FORCE_NOT_NULL (col_b), NULL 'test');" ); |
757 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
758 | result = con.Query("SELECT * FROM test ORDER BY 1;" ); |
759 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 8})); |
760 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "test" })); |
761 | REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), "tea" })); |
762 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test;" )); |
763 | |
764 | // test if null value is correctly converted into string if explicit columns are used |
765 | result = con.Query("COPY test (col_a, col_b, col_c) FROM '" + fs.JoinPath(csv_path, "test_star.csv" ) + |
766 | "' (FORCE_NOT_NULL (col_b), NULL 'test');" ); |
767 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
768 | result = con.Query("SELECT * FROM test ORDER BY 1;" ); |
769 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 8})); |
770 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "test" })); |
771 | REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), "tea" })); |
772 | |
773 | // FORCE_NOT_NULL is only supported in COPY ... FROM ... |
774 | REQUIRE_FAIL(result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_star.csv" ) + |
775 | "' (FORCE_NOT_NULL (col_b), NULL 'test');" )); |
776 | // FORCE_NOT_NULL must not be empty and must have the correct parameter type |
777 | REQUIRE_FAIL(result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_star.csv" ) + |
778 | "' (FORCE_NOT_NULL, NULL 'test');" )); |
779 | REQUIRE_FAIL(result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_star.csv" ) + |
780 | "' (FORCE_NOT_NULL 42, NULL 'test');" )); |
781 | // test using a column in FORCE_NOT_NULL that is not set as output, but that is a column of the table |
782 | REQUIRE_FAIL(con.Query("COPY test (col_b, col_a) FROM '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + |
783 | "' (FORCE_NOT_NULL (col_c, col_b));" )); |
784 | // test using a column in FORCE_NOT_NULL that is not a column of the table |
785 | REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + |
786 | "' (FORCE_NOT_NULL (col_c, col_d));" )); |
787 | |
788 | // FORCE_NOT_NULL fails on integer columns |
789 | REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_2.csv" ) + "' (FORCE_NOT_NULL (col_a));" )); |
790 | } |
791 | |
792 | TEST_CASE("Test copy statement with unicode delimiter/quote/escape" , "[copy]" ) { |
793 | FileSystem fs; |
794 | unique_ptr<QueryResult> result; |
795 | DuckDB db(nullptr); |
796 | Connection con(db); |
797 | |
798 | auto csv_path = GetCSVPath(); |
799 | |
800 | // generate CSV file with unicode (> one-byte) delimiter/quote/escape |
801 | ofstream from_csv_file1(fs.JoinPath(csv_path, "multi_char.csv" )); |
802 | from_csv_file1 << 0 << "🦆ˮdu˧˧🦆ckˮ🦆ˮd˧ˮ˧ˮu🦆ckˮ🦆duck" << endl; |
803 | from_csv_file1 << 1 << "🦆ˮdou˧ˮbleˮ🦆🦆duck" << endl; |
804 | from_csv_file1 << 2 << "🦆🦆🦆" << endl; |
805 | from_csv_file1 << 3 << "🦆duck inv˧asion🦆🦆" << endl; |
806 | from_csv_file1.close(); |
807 | |
808 | // generate CSV file with unicode (> one-byte) delimiter/quote/escape that exceeds the buffer size a few times |
809 | ofstream from_csv_file2(fs.JoinPath(csv_path, "multi_char_buffer_exhausted.csv" )); |
810 | int64_t sum = 0; |
811 | for (int i = 0; i < 16384; i++) { |
812 | if (i % 2 == 0) { |
813 | from_csv_file2 << i << "🦆ˮ🦆dˮ🦆ˮd˧ˮ🦆ˮ🦆d˧" << endl; |
814 | } else { |
815 | from_csv_file2 << i << "🦆ˮ˧ˮ˧ˮ˧ˮˮ🦆˧˧🦆 test test 🦆" << endl; |
816 | } |
817 | sum += i; |
818 | } |
819 | from_csv_file2.close(); |
820 | |
821 | // generate CSV file with one-byte delimiter/quote/escape |
822 | ofstream from_csv_file3(fs.JoinPath(csv_path, "one_byte_char.csv" )); |
823 | for (int i = 0; i < 3; i++) { |
824 | from_csv_file3 << i << ",'du''ck','''''du,ck',duck" << endl; |
825 | } |
826 | from_csv_file3.close(); |
827 | |
828 | // generate CSV file with unterminated quotes |
829 | ofstream from_csv_file4(fs.JoinPath(csv_path, "unterminated_quotes.csv" )); |
830 | for (int i = 0; i < 3; i++) { |
831 | from_csv_file4 << i << ",duck,\"duck" << endl; |
832 | } |
833 | from_csv_file4.close(); |
834 | |
835 | // generate CSV file with quotes that start midway in the value |
836 | ofstream from_csv_file5(fs.JoinPath(csv_path, "unterminated_quotes_2.csv" )); |
837 | for (int i = 0; i < 3; i++) { |
838 | from_csv_file5 << i << ",du\"ck,duck" << endl; |
839 | } |
840 | from_csv_file5.close(); |
841 | |
842 | // generate a CSV file with a very long string exceeding the buffer midway in an escape sequence (delimiter and |
843 | // escape share substrings) |
844 | ofstream from_csv_file6(fs.JoinPath(csv_path, "shared_substrings.csv" )); |
845 | string big_string_a(16370, 'a'); |
846 | from_csv_file6 << big_string_a << "AAA\"aaaaaaaaAAB\"\"" << endl; |
847 | from_csv_file6.close(); |
848 | |
849 | // create three tables for testing |
850 | REQUIRE_NO_FAIL(con.Query( |
851 | "CREATE TABLE test_unicode_1 (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10), col_d VARCHAR(10));" )); |
852 | REQUIRE_NO_FAIL(con.Query( |
853 | "CREATE TABLE test_unicode_2 (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10), col_d VARCHAR(10));" )); |
854 | REQUIRE_NO_FAIL(con.Query( |
855 | "CREATE TABLE test_unicode_3 (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10), col_d VARCHAR(10));" )); |
856 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test_unicode_4 (col_a VARCHAR, col_b VARCHAR);" )); |
857 | |
858 | // throw error if unterminated quotes are detected |
859 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "unterminated_quotes.csv" ) + "';" )); |
860 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "unterminated_quotes_2.csv" ) + "';" )); |
861 | |
862 | // test COPY ... FROM ... |
863 | |
864 | // test unicode delimiter/quote/escape |
865 | result = con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "multi_char.csv" ) + |
866 | "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');" ); |
867 | REQUIRE(CHECK_COLUMN(result, 0, {4})); |
868 | result = con.Query("SELECT * FROM test_unicode_1 ORDER BY 1 LIMIT 4;" ); |
869 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2, 3})); |
870 | REQUIRE(CHECK_COLUMN(result, 1, {"du˧🦆ck" , "douˮble" , Value(), "duck inv˧asion" })); |
871 | REQUIRE(CHECK_COLUMN(result, 2, {"dˮˮu🦆ck" , Value(), Value(), Value()})); |
872 | REQUIRE(CHECK_COLUMN(result, 3, {"duck" , "duck" , Value(), Value()})); |
873 | |
874 | // test unicode delimiter/quote/escape that exceeds the buffer size a few times |
875 | result = con.Query("COPY test_unicode_2 FROM '" + fs.JoinPath(csv_path, "multi_char_buffer_exhausted.csv" ) + |
876 | "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');" ); |
877 | REQUIRE(CHECK_COLUMN(result, 0, {16384})); |
878 | result = con.Query("SELECT * FROM test_unicode_2 ORDER BY 1 LIMIT 4;" ); |
879 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2, 3})); |
880 | REQUIRE(CHECK_COLUMN(result, 1, {"🦆d" , "ˮˮˮ" , "🦆d" , "ˮˮˮ" })); |
881 | REQUIRE(CHECK_COLUMN(result, 2, {"dˮ🦆" , "˧˧" , "dˮ🦆" , "˧˧" })); |
882 | REQUIRE(CHECK_COLUMN(result, 3, {"d˧" , " test test " , "d˧" , " test test " })); |
883 | result = con.Query("SELECT SUM(col_a) FROM test_unicode_2;" ); |
884 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(sum)})); |
885 | |
886 | // test one-byte delimiter/quote/escape |
887 | result = con.Query("COPY test_unicode_3 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + "' (QUOTE '''');" ); |
888 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
889 | result = con.Query("SELECT * FROM test_unicode_3 ORDER BY 1 LIMIT 3;" ); |
890 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
891 | REQUIRE(CHECK_COLUMN(result, 1, {"du'ck" , "du'ck" , "du'ck" })); |
892 | REQUIRE(CHECK_COLUMN(result, 2, {"''du,ck" , "''du,ck" , "''du,ck" })); |
893 | REQUIRE(CHECK_COLUMN(result, 3, {"duck" , "duck" , "duck" })); |
894 | |
895 | // test correct shared substring behavior at buffer borders |
896 | result = con.Query("COPY test_unicode_4 FROM '" + fs.JoinPath(csv_path, "shared_substrings.csv" ) + |
897 | "' (DELIMITER 'AAA', ESCAPE 'AAB');" ); |
898 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
899 | result = con.Query("SELECT * FROM test_unicode_4;" ); |
900 | REQUIRE(CHECK_COLUMN(result, 0, {big_string_a})); |
901 | REQUIRE(CHECK_COLUMN(result, 1, {"aaaaaaaa\"" })); |
902 | |
903 | // quote and escape must not be empty |
904 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
905 | "' (DELIMITER '🦆', QUOTE '');" )); |
906 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
907 | "' (DELIMITER '🦆', ESCAPE '');" )); |
908 | |
909 | // test same string for delimiter and quote |
910 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
911 | "' (DELIMITER '🦆', QUOTE '🦆');" )); |
912 | |
913 | // escape and quote cannot be substrings of each other |
914 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
915 | "' (ESCAPE 'du', QUOTE 'duck');" )); |
916 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
917 | "' (ESCAPE 'duck', QUOTE 'du');" )); |
918 | |
919 | // delimiter and quote cannot be substrings of each other |
920 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
921 | "' (DELIMITER 'du', QUOTE 'duck');" )); |
922 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
923 | "' (DELIMITER 'duck', QUOTE 'du');" )); |
924 | |
925 | // delimiter and escape cannot be substrings of each other |
926 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
927 | "' (DELIMITER 'AA', ESCAPE 'AAAA');" )); |
928 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
929 | "' (DELIMITER 'AAAA', ESCAPE 'AA');" )); |
930 | |
931 | // COPY ... TO ... |
932 | |
933 | // test unicode delimiter/quote/escape |
934 | result = con.Query("COPY test_unicode_1 TO '" + fs.JoinPath(csv_path, "test_unicode_1.csv" ) + |
935 | "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');" ); |
936 | REQUIRE(CHECK_COLUMN(result, 0, {4})); |
937 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_unicode_1;" )); |
938 | result = con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "test_unicode_1.csv" ) + |
939 | "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');" ); |
940 | REQUIRE(CHECK_COLUMN(result, 0, {4})); |
941 | result = con.Query("SELECT * FROM test_unicode_1 ORDER BY 1 LIMIT 4;" ); |
942 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2, 3})); |
943 | REQUIRE(CHECK_COLUMN(result, 1, {"du˧🦆ck" , "douˮble" , Value(), "duck inv˧asion" })); |
944 | REQUIRE(CHECK_COLUMN(result, 2, {"dˮˮu🦆ck" , Value(), Value(), Value()})); |
945 | REQUIRE(CHECK_COLUMN(result, 3, {"duck" , "duck" , Value(), Value()})); |
946 | |
947 | // test unicode delimiter/quote/escape |
948 | result = con.Query("COPY test_unicode_2 TO '" + fs.JoinPath(csv_path, "test_unicode_2.csv" ) + |
949 | "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');" ); |
950 | REQUIRE(CHECK_COLUMN(result, 0, {16384})); |
951 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_unicode_2;" )); |
952 | result = con.Query("COPY test_unicode_2 FROM '" + fs.JoinPath(csv_path, "test_unicode_2.csv" ) + |
953 | "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');" ); |
954 | REQUIRE(CHECK_COLUMN(result, 0, {16384})); |
955 | result = con.Query("SELECT * FROM test_unicode_2 ORDER BY 1 LIMIT 4;" ); |
956 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2, 3})); |
957 | REQUIRE(CHECK_COLUMN(result, 1, {"🦆d" , "ˮˮˮ" , "🦆d" , "ˮˮˮ" })); |
958 | REQUIRE(CHECK_COLUMN(result, 2, {"dˮ🦆" , "˧˧" , "dˮ🦆" , "˧˧" })); |
959 | REQUIRE(CHECK_COLUMN(result, 3, {"d˧" , " test test " , "d˧" , " test test " })); |
960 | result = con.Query("SELECT SUM(col_a) FROM test_unicode_2;" ); |
961 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(sum)})); |
962 | |
963 | // test one-byte delimiter/quote/escape |
964 | result = con.Query("COPY test_unicode_3 TO '" + fs.JoinPath(csv_path, "test_unicode_3.csv" ) + "' (QUOTE '''');" ); |
965 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
966 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_unicode_3;" )); |
967 | result = con.Query("COPY test_unicode_3 FROM '" + fs.JoinPath(csv_path, "test_unicode_3.csv" ) + "' (QUOTE '''');" ); |
968 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
969 | result = con.Query("SELECT * FROM test_unicode_3 ORDER BY 1 LIMIT 3;" ); |
970 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
971 | REQUIRE(CHECK_COLUMN(result, 1, {"du'ck" , "du'ck" , "du'ck" })); |
972 | REQUIRE(CHECK_COLUMN(result, 2, {"''du,ck" , "''du,ck" , "''du,ck" })); |
973 | REQUIRE(CHECK_COLUMN(result, 3, {"duck" , "duck" , "duck" })); |
974 | } |
975 | |
976 | TEST_CASE("Test copy statement with file overwrite" , "[copy]" ) { |
977 | FileSystem fs; |
978 | unique_ptr<QueryResult> result; |
979 | DuckDB db(nullptr); |
980 | Connection con(db); |
981 | |
982 | auto csv_path = GetCSVPath(); |
983 | |
984 | // create a table and insert some values |
985 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR(10));" )); |
986 | REQUIRE_NO_FAIL(con.Query("INSERT INTO test VALUES (1, 'hello'), (2, 'world '), (3, ' xx');" )); |
987 | |
988 | result = con.Query("SELECT * FROM test ORDER BY 1;" ); |
989 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
990 | REQUIRE(CHECK_COLUMN(result, 1, {"hello" , "world " , " xx" })); |
991 | |
992 | // copy to the CSV file |
993 | result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
994 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
995 | |
996 | // now copy to the file again |
997 | result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
998 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
999 | |
1000 | // reload the data from the file: it should only have three rows |
1001 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test;" )); |
1002 | |
1003 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
1004 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
1005 | |
1006 | result = con.Query("SELECT * FROM test ORDER BY 1;" ); |
1007 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
1008 | REQUIRE(CHECK_COLUMN(result, 1, {"hello" , "world " , " xx" })); |
1009 | } |
1010 | |
1011 | TEST_CASE("Test copy statement with default values" , "[copy]" ) { |
1012 | FileSystem fs; |
1013 | unique_ptr<QueryResult> result; |
1014 | DuckDB db(nullptr); |
1015 | Connection con(db); |
1016 | |
1017 | auto csv_path = GetCSVPath(); |
1018 | |
1019 | // create a file only consisting of integers |
1020 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
1021 | int64_t expected_sum_a = 0; |
1022 | int64_t expected_sum_c = 0; |
1023 | for (int i = 0; i < 5000; i++) { |
1024 | from_csv_file << i << endl; |
1025 | |
1026 | expected_sum_a += i; |
1027 | expected_sum_c += i + 7; |
1028 | } |
1029 | from_csv_file.close(); |
1030 | |
1031 | // load CSV file into a table |
1032 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR DEFAULT('hello'), c INTEGER DEFAULT(3+4));" )); |
1033 | result = con.Query("COPY test (a) FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
1034 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
1035 | result = con.Query("COPY test (c) FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
1036 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
1037 | |
1038 | result = |
1039 | con.Query("SELECT COUNT(a), COUNT(b), COUNT(c), MIN(LENGTH(b)), MAX(LENGTH(b)), SUM(a), SUM(c) FROM test;" ); |
1040 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
1041 | REQUIRE(CHECK_COLUMN(result, 1, {10000})); |
1042 | REQUIRE(CHECK_COLUMN(result, 2, {10000})); |
1043 | REQUIRE(CHECK_COLUMN(result, 3, {5})); |
1044 | REQUIRE(CHECK_COLUMN(result, 4, {5})); |
1045 | REQUIRE(CHECK_COLUMN(result, 5, {Value::BIGINT(expected_sum_a)})); |
1046 | REQUIRE(CHECK_COLUMN(result, 6, {Value::BIGINT(expected_sum_c)})); |
1047 | } |
1048 | |
1049 | TEST_CASE("Test copy statement with long lines" , "[copy]" ) { |
1050 | FileSystem fs; |
1051 | unique_ptr<QueryResult> result; |
1052 | DuckDB db(nullptr); |
1053 | Connection con(db); |
1054 | |
1055 | auto csv_path = GetCSVPath(); |
1056 | |
1057 | // generate a CSV file with a very long string |
1058 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
1059 | string big_string_a(100000, 'a'); |
1060 | string big_string_b(200000, 'b'); |
1061 | from_csv_file << 10 << "," << big_string_a << "," << 20 << endl; |
1062 | from_csv_file << 20 << "," << big_string_b << "," << 30 << endl; |
1063 | from_csv_file.close(); |
1064 | |
1065 | // load CSV file into a table |
1066 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR, c INTEGER);" )); |
1067 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
1068 | REQUIRE(CHECK_COLUMN(result, 0, {2})); |
1069 | |
1070 | result = con.Query("SELECT LENGTH(b) FROM test ORDER BY a;" ); |
1071 | REQUIRE(CHECK_COLUMN(result, 0, {100000, 200000})); |
1072 | |
1073 | result = con.Query("SELECT SUM(a), SUM(c) FROM test;" ); |
1074 | REQUIRE(CHECK_COLUMN(result, 0, {30})); |
1075 | REQUIRE(CHECK_COLUMN(result, 1, {50})); |
1076 | } |
1077 | |
1078 | TEST_CASE("Test copy statement with quotes and newlines" , "[copy]" ) { |
1079 | FileSystem fs; |
1080 | unique_ptr<QueryResult> result; |
1081 | DuckDB db(nullptr); |
1082 | Connection con(db); |
1083 | |
1084 | auto csv_path = GetCSVPath(); |
1085 | |
1086 | // generate a CSV file with newlines enclosed by quotes |
1087 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
1088 | from_csv_file << "\"hello\\nworld\",\"5\"" << endl; |
1089 | from_csv_file << "\"what,\\n brings, you here\\n, today\",\"6\"" << endl; |
1090 | from_csv_file.close(); |
1091 | |
1092 | // load CSV file into a table |
1093 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a VARCHAR, b INTEGER);" )); |
1094 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
1095 | REQUIRE(CHECK_COLUMN(result, 0, {2})); |
1096 | |
1097 | result = con.Query("SELECT SUM(b) FROM test;" ); |
1098 | REQUIRE(CHECK_COLUMN(result, 0, {11})); |
1099 | |
1100 | result = con.Query("SELECT a FROM test ORDER BY a;" ); |
1101 | REQUIRE(CHECK_COLUMN(result, 0, {"hello\\nworld" , "what,\\n brings, you here\\n, today" })); |
1102 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
1103 | |
1104 | // quotes in the middle of a quoted string cause an exception if they are not escaped |
1105 | from_csv_file.open(fs.JoinPath(csv_path, "test.csv" )); |
1106 | from_csv_file << "\"hello\\n\"w\"o\"rld\",\"5\"" << endl; |
1107 | from_csv_file << "\"what,\\n brings, you here\\n, today\",\"6\"" << endl; |
1108 | from_csv_file.close(); |
1109 | |
1110 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a VARCHAR, b INTEGER);" )); |
1111 | REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" )); |
1112 | |
1113 | // now the same quotes are escaped |
1114 | from_csv_file.open(fs.JoinPath(csv_path, "test.csv" )); |
1115 | from_csv_file << "\"hello\\n\"\"w\"\"o\"\"rld\",\"5\"" << endl; |
1116 | from_csv_file << "\"what,\\n brings, you here\\n, today\",\"6\"" << endl; |
1117 | from_csv_file.close(); |
1118 | |
1119 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
1120 | REQUIRE(CHECK_COLUMN(result, 0, {2})); |
1121 | |
1122 | result = con.Query("SELECT SUM(b) FROM test;" ); |
1123 | REQUIRE(CHECK_COLUMN(result, 0, {11})); |
1124 | result = con.Query("SELECT a,b FROM test ORDER BY b;" ); |
1125 | REQUIRE(CHECK_COLUMN(result, 0, {"hello\\n\"w\"o\"rld" , "what,\\n brings, you here\\n, today" })); |
1126 | REQUIRE(CHECK_COLUMN(result, 1, {5, 6})); |
1127 | |
1128 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
1129 | |
1130 | // not escaped escape string in quotes throws an exception |
1131 | from_csv_file.open(fs.JoinPath(csv_path, "test.csv" )); |
1132 | from_csv_file << "\"\\\"escaped\\\",\"5\"" << endl; |
1133 | from_csv_file << "yea,6" << endl; |
1134 | from_csv_file.close(); |
1135 | |
1136 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a VARCHAR, b INTEGER);" )); |
1137 | REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "' (ESCAPE '\\');" )); |
1138 | } |
1139 | |
1140 | TEST_CASE("Test copy statement with many empty lines" , "[copy]" ) { |
1141 | FileSystem fs; |
1142 | unique_ptr<QueryResult> result; |
1143 | DuckDB db(nullptr); |
1144 | Connection con(db); |
1145 | |
1146 | auto csv_path = GetCSVPath(); |
1147 | |
1148 | // generate CSV file with a very long string |
1149 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
1150 | from_csv_file << "1\n" ; |
1151 | for (idx_t i = 0; i < 19999; i++) { |
1152 | from_csv_file << "\n" ; |
1153 | } |
1154 | from_csv_file.close(); |
1155 | |
1156 | // load CSV file into a table |
1157 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER);" )); |
1158 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
1159 | REQUIRE(CHECK_COLUMN(result, 0, {20000})); |
1160 | |
1161 | result = con.Query("SELECT SUM(a) FROM test;" ); |
1162 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
1163 | } |
1164 | |
1165 | TEST_CASE("Test different line endings" , "[copy]" ) { |
1166 | FileSystem fs; |
1167 | unique_ptr<QueryResult> result; |
1168 | DuckDB db(nullptr); |
1169 | Connection con(db); |
1170 | |
1171 | auto csv_path = GetCSVPath(); |
1172 | |
1173 | // generate CSV file with different line endings |
1174 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
1175 | from_csv_file << 10 << "," |
1176 | << "hello" |
1177 | << "," << 20 << "\r\n" ; |
1178 | from_csv_file << 20 << "," |
1179 | << "world" |
1180 | << "," << 30 << '\n'; |
1181 | from_csv_file << 30 << "," |
1182 | << "test" |
1183 | << "," << 30 << '\r'; |
1184 | from_csv_file.close(); |
1185 | |
1186 | // load CSV file into a table |
1187 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR, c INTEGER);" )); |
1188 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
1189 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
1190 | |
1191 | result = con.Query("SELECT LENGTH(b) FROM test ORDER BY a;" ); |
1192 | REQUIRE(CHECK_COLUMN(result, 0, {5, 5, 4})); |
1193 | |
1194 | result = con.Query("SELECT SUM(a), SUM(c) FROM test;" ); |
1195 | REQUIRE(CHECK_COLUMN(result, 0, {60})); |
1196 | REQUIRE(CHECK_COLUMN(result, 1, {80})); |
1197 | } |
1198 | |
1199 | TEST_CASE("Test Windows Newlines with a long file" , "[copy]" ) { |
1200 | FileSystem fs; |
1201 | unique_ptr<QueryResult> result; |
1202 | DuckDB db(nullptr); |
1203 | Connection con(db); |
1204 | |
1205 | auto csv_path = GetCSVPath(); |
1206 | |
1207 | idx_t line_count = 20000; |
1208 | int64_t sum_a = 0, sum_c = 0; |
1209 | |
1210 | // generate a CSV file with many strings |
1211 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
1212 | for (idx_t i = 0; i < line_count; i++) { |
1213 | from_csv_file << i << "," |
1214 | << "hello" |
1215 | << "," << i + 2 << "\r\n" ; |
1216 | |
1217 | sum_a += i; |
1218 | sum_c += i + 2; |
1219 | } |
1220 | from_csv_file.close(); |
1221 | |
1222 | // load CSV file into a table |
1223 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR, c INTEGER);" )); |
1224 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
1225 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(line_count)})); |
1226 | |
1227 | result = con.Query("SELECT SUM(a), MIN(LENGTH(b)), MAX(LENGTH(b)), SUM(LENGTH(b)), SUM(c) FROM test;" ); |
1228 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(sum_a)})); |
1229 | REQUIRE(CHECK_COLUMN(result, 1, {Value::BIGINT(5)})); |
1230 | REQUIRE(CHECK_COLUMN(result, 2, {Value::BIGINT(5)})); |
1231 | REQUIRE(CHECK_COLUMN(result, 3, {Value::BIGINT(5 * line_count)})); |
1232 | REQUIRE(CHECK_COLUMN(result, 4, {Value::BIGINT(sum_c)})); |
1233 | |
1234 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test;" )); |
1235 | // now do the same with a multi-byte quote that is not actually used |
1236 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "' QUOTE 'BLABLABLA';" ); |
1237 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(line_count)})); |
1238 | |
1239 | result = con.Query("SELECT SUM(a), MIN(LENGTH(b)), MAX(LENGTH(b)), SUM(LENGTH(b)), SUM(c) FROM test;" ); |
1240 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(sum_a)})); |
1241 | REQUIRE(CHECK_COLUMN(result, 1, {Value::BIGINT(5)})); |
1242 | REQUIRE(CHECK_COLUMN(result, 2, {Value::BIGINT(5)})); |
1243 | REQUIRE(CHECK_COLUMN(result, 3, {Value::BIGINT(5 * line_count)})); |
1244 | REQUIRE(CHECK_COLUMN(result, 4, {Value::BIGINT(sum_c)})); |
1245 | |
1246 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
1247 | |
1248 | // generate a csv file with one value and many empty values |
1249 | ofstream from_csv_file_empty(fs.JoinPath(csv_path, "test2.csv" )); |
1250 | from_csv_file_empty << 1 << "\r\n" ; |
1251 | for (idx_t i = 0; i < line_count - 1; i++) { |
1252 | from_csv_file_empty << "\r\n" ; |
1253 | } |
1254 | from_csv_file_empty.close(); |
1255 | |
1256 | // load CSV file into a table |
1257 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER);" )); |
1258 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test2.csv" ) + "';" ); |
1259 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(line_count)})); |
1260 | |
1261 | result = con.Query("SELECT SUM(a) FROM test;" ); |
1262 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(1)})); |
1263 | } |
1264 | |
1265 | TEST_CASE("Test lines that exceed the maximum line size" , "[copy]" ) { |
1266 | FileSystem fs; |
1267 | unique_ptr<QueryResult> result; |
1268 | DuckDB db(nullptr); |
1269 | Connection con(db); |
1270 | |
1271 | auto csv_path = GetCSVPath(); |
1272 | |
1273 | // generate CSV file with 20 MB string |
1274 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
1275 | string big_string(2048576, 'a'); |
1276 | from_csv_file << 10 << "," << big_string << "," << 20 << endl; |
1277 | from_csv_file.close(); |
1278 | |
1279 | // value is too big for loading |
1280 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR, c INTEGER);" )); |
1281 | REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" )); |
1282 | } |
1283 | |
1284 | TEST_CASE("Test copy from/to on-time dataset" , "[copy]" ) { |
1285 | FileSystem fs; |
1286 | unique_ptr<QueryResult> result; |
1287 | DuckDB db(nullptr); |
1288 | Connection con(db); |
1289 | |
1290 | auto csv_path = GetCSVPath(); |
1291 | auto ontime_csv = fs.JoinPath(csv_path, "ontime.csv" ); |
1292 | WriteBinary(ontime_csv, ontime_sample, sizeof(ontime_sample)); |
1293 | |
1294 | REQUIRE_NO_FAIL(con.Query( |
1295 | "CREATE TABLE ontime(year SMALLINT, quarter SMALLINT, month SMALLINT, dayofmonth SMALLINT, dayofweek SMALLINT, " |
1296 | "flightdate DATE, uniquecarrier CHAR(7), airlineid DECIMAL(8,2), carrier CHAR(2), tailnum VARCHAR(50), " |
1297 | "flightnum VARCHAR(10), originairportid INTEGER, originairportseqid INTEGER, origincitymarketid INTEGER, " |
1298 | "origin CHAR(5), origincityname VARCHAR(100), originstate CHAR(2), originstatefips VARCHAR(10), " |
1299 | "originstatename VARCHAR(100), originwac DECIMAL(8,2), destairportid INTEGER, destairportseqid INTEGER, " |
1300 | "destcitymarketid INTEGER, dest CHAR(5), destcityname VARCHAR(100), deststate CHAR(2), deststatefips " |
1301 | "VARCHAR(10), deststatename VARCHAR(100), destwac DECIMAL(8,2), crsdeptime DECIMAL(8,2), deptime DECIMAL(8,2), " |
1302 | "depdelay DECIMAL(8,2), depdelayminutes DECIMAL(8,2), depdel15 DECIMAL(8,2), departuredelaygroups " |
1303 | "DECIMAL(8,2), deptimeblk VARCHAR(20), taxiout DECIMAL(8,2), wheelsoff DECIMAL(8,2), wheelson DECIMAL(8,2), " |
1304 | "taxiin DECIMAL(8,2), crsarrtime DECIMAL(8,2), arrtime DECIMAL(8,2), arrdelay DECIMAL(8,2), arrdelayminutes " |
1305 | "DECIMAL(8,2), arrdel15 DECIMAL(8,2), arrivaldelaygroups DECIMAL(8,2), arrtimeblk VARCHAR(20), cancelled " |
1306 | "SMALLINT, cancellationcode CHAR(1), diverted SMALLINT, crselapsedtime DECIMAL(8,2), actualelapsedtime " |
1307 | "DECIMAL(8,2), airtime DECIMAL(8,2), flights DECIMAL(8,2), distance DECIMAL(8,2), distancegroup SMALLINT, " |
1308 | "carrierdelay DECIMAL(8,2), weatherdelay DECIMAL(8,2), nasdelay DECIMAL(8,2), securitydelay DECIMAL(8,2), " |
1309 | "lateaircraftdelay DECIMAL(8,2), firstdeptime VARCHAR(10), totaladdgtime VARCHAR(10), longestaddgtime " |
1310 | "VARCHAR(10), divairportlandings VARCHAR(10), divreacheddest VARCHAR(10), divactualelapsedtime VARCHAR(10), " |
1311 | "divarrdelay VARCHAR(10), divdistance VARCHAR(10), div1airport VARCHAR(10), div1aiportid INTEGER, " |
1312 | "div1airportseqid INTEGER, div1wheelson VARCHAR(10), div1totalgtime VARCHAR(10), div1longestgtime VARCHAR(10), " |
1313 | "div1wheelsoff VARCHAR(10), div1tailnum VARCHAR(10), div2airport VARCHAR(10), div2airportid INTEGER, " |
1314 | "div2airportseqid INTEGER, div2wheelson VARCHAR(10), div2totalgtime VARCHAR(10), div2longestgtime VARCHAR(10), " |
1315 | "div2wheelsoff VARCHAR(10), div2tailnum VARCHAR(10), div3airport VARCHAR(10), div3airportid INTEGER, " |
1316 | "div3airportseqid INTEGER, div3wheelson VARCHAR(10), div3totalgtime VARCHAR(10), div3longestgtime VARCHAR(10), " |
1317 | "div3wheelsoff VARCHAR(10), div3tailnum VARCHAR(10), div4airport VARCHAR(10), div4airportid INTEGER, " |
1318 | "div4airportseqid INTEGER, div4wheelson VARCHAR(10), div4totalgtime VARCHAR(10), div4longestgtime VARCHAR(10), " |
1319 | "div4wheelsoff VARCHAR(10), div4tailnum VARCHAR(10), div5airport VARCHAR(10), div5airportid INTEGER, " |
1320 | "div5airportseqid INTEGER, div5wheelson VARCHAR(10), div5totalgtime VARCHAR(10), div5longestgtime VARCHAR(10), " |
1321 | "div5wheelsoff VARCHAR(10), div5tailnum VARCHAR(10));" )); |
1322 | |
1323 | result = con.Query("COPY ontime FROM '" + ontime_csv + "' DELIMITER ',' HEADER;" ); |
1324 | REQUIRE(CHECK_COLUMN(result, 0, {9})); |
1325 | |
1326 | result = con.Query("SELECT year, uniquecarrier, origin, origincityname, div5longestgtime FROM ontime;" ); |
1327 | REQUIRE(CHECK_COLUMN(result, 0, {1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988})); |
1328 | REQUIRE(CHECK_COLUMN(result, 1, {"AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" })); |
1329 | REQUIRE(CHECK_COLUMN(result, 2, {"JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" })); |
1330 | REQUIRE(CHECK_COLUMN(result, 3, |
1331 | {"New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" , |
1332 | "New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" })); |
1333 | REQUIRE(CHECK_COLUMN(result, 4, {Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value()})); |
1334 | |
1335 | result = con.Query("COPY ontime TO '" + ontime_csv + "' DELIMITER ',' HEADER;" ); |
1336 | REQUIRE(CHECK_COLUMN(result, 0, {9})); |
1337 | REQUIRE_NO_FAIL(con.Query("DELETE FROM ontime;" )); |
1338 | result = con.Query("COPY ontime FROM '" + ontime_csv + "' DELIMITER ',' HEADER;" ); |
1339 | REQUIRE(CHECK_COLUMN(result, 0, {9})); |
1340 | |
1341 | result = con.Query("SELECT year, uniquecarrier, origin, origincityname, div5longestgtime FROM ontime;" ); |
1342 | REQUIRE(CHECK_COLUMN(result, 0, {1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988})); |
1343 | REQUIRE(CHECK_COLUMN(result, 1, {"AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" })); |
1344 | REQUIRE(CHECK_COLUMN(result, 2, {"JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" })); |
1345 | REQUIRE(CHECK_COLUMN(result, 3, |
1346 | {"New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" , |
1347 | "New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" })); |
1348 | REQUIRE(CHECK_COLUMN(result, 4, {Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value()})); |
1349 | } |
1350 | |
1351 | TEST_CASE("Test copy from/to lineitem csv" , "[copy]" ) { |
1352 | FileSystem fs; |
1353 | unique_ptr<QueryResult> result; |
1354 | DuckDB db(nullptr); |
1355 | Connection con(db); |
1356 | |
1357 | auto csv_path = GetCSVPath(); |
1358 | auto lineitem_csv = fs.JoinPath(csv_path, "lineitem.csv" ); |
1359 | WriteBinary(lineitem_csv, lineitem_sample, sizeof(lineitem_sample)); |
1360 | |
1361 | REQUIRE_NO_FAIL(con.Query( |
1362 | "CREATE TABLE lineitem(l_orderkey INT NOT NULL, l_partkey INT NOT NULL, l_suppkey INT NOT NULL, l_linenumber " |
1363 | "INT NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) " |
1364 | "NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR(1) NOT NULL, l_linestatus VARCHAR(1) NOT NULL, " |
1365 | "l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR(25) " |
1366 | "NOT NULL, l_shipmode VARCHAR(10) NOT NULL, l_comment VARCHAR(44) NOT NULL);" )); |
1367 | result = con.Query("COPY lineitem FROM '" + lineitem_csv + "' DELIMITER '|'" ); |
1368 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
1369 | |
1370 | result = con.Query("SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;" ); |
1371 | REQUIRE(CHECK_COLUMN(result, 0, {15519, 6731, 6370, 214, 2403, 1564})); |
1372 | REQUIRE( |
1373 | CHECK_COLUMN(result, 1, |
1374 | {"egular courts above the" , "ly final dependencies: slyly bold " , "riously. regular, express dep" , |
1375 | "lites. fluffily even de" , " pending foxes. slyly re" , "arefully slyly ex" })); |
1376 | |
1377 | // test COPY ... TO ... with HEADER |
1378 | result = con.Query("COPY lineitem TO '" + lineitem_csv + "' (DELIMITER ' ', HEADER);" ); |
1379 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
1380 | |
1381 | // clear the table |
1382 | REQUIRE_NO_FAIL(con.Query("DELETE FROM lineitem;" )); |
1383 | result = con.Query("SELECT * FROM lineitem;" ); |
1384 | REQUIRE(CHECK_COLUMN(result, 0, {})); |
1385 | |
1386 | // now copy back into the table |
1387 | result = con.Query("COPY lineitem FROM '" + lineitem_csv + "' DELIMITER ' ' HEADER;" ); |
1388 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
1389 | |
1390 | result = con.Query("SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;" ); |
1391 | REQUIRE(CHECK_COLUMN(result, 0, {15519, 6731, 6370, 214, 2403, 1564})); |
1392 | REQUIRE( |
1393 | CHECK_COLUMN(result, 1, |
1394 | {"egular courts above the" , "ly final dependencies: slyly bold " , "riously. regular, express dep" , |
1395 | "lites. fluffily even de" , " pending foxes. slyly re" , "arefully slyly ex" })); |
1396 | } |
1397 | |
1398 | TEST_CASE("Test copy from web_page csv" , "[copy]" ) { |
1399 | FileSystem fs; |
1400 | unique_ptr<QueryResult> result; |
1401 | DuckDB db(nullptr); |
1402 | Connection con(db); |
1403 | |
1404 | auto csv_path = GetCSVPath(); |
1405 | auto webpage_csv = fs.JoinPath(csv_path, "web_page.csv" ); |
1406 | WriteBinary(webpage_csv, web_page, sizeof(web_page)); |
1407 | |
1408 | REQUIRE_NO_FAIL(con.Query( |
1409 | "CREATE TABLE web_page(wp_web_page_sk integer not null, wp_web_page_id char(16) not null, wp_rec_start_date " |
1410 | "date, wp_rec_end_date date, wp_creation_date_sk integer, wp_access_date_sk integer, wp_autogen_flag char(1), " |
1411 | "wp_customer_sk integer, wp_url varchar(100), wp_type char(50), wp_char_count integer, wp_link_count integer, " |
1412 | "wp_image_count integer, wp_max_ad_count integer, primary key (wp_web_page_sk));" )); |
1413 | |
1414 | result = con.Query("COPY web_page FROM '" + webpage_csv + "' DELIMITER '|';" ); |
1415 | REQUIRE(CHECK_COLUMN(result, 0, {60})); |
1416 | |
1417 | result = con.Query("SELECT * FROM web_page ORDER BY wp_web_page_sk LIMIT 3;" ); |
1418 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
1419 | REQUIRE(CHECK_COLUMN(result, 1, {"AAAAAAAABAAAAAAA" , "AAAAAAAACAAAAAAA" , "AAAAAAAACAAAAAAA" })); |
1420 | REQUIRE(CHECK_COLUMN(result, 2, {Value::DATE(1997, 9, 3), Value::DATE(1997, 9, 3), Value::DATE(2000, 9, 3)})); |
1421 | REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value::DATE(2000, 9, 2), Value()})); |
1422 | REQUIRE(CHECK_COLUMN(result, 4, {2450810, 2450814, 2450814})); |
1423 | REQUIRE(CHECK_COLUMN(result, 5, {2452620, 2452580, 2452611})); |
1424 | REQUIRE(CHECK_COLUMN(result, 6, {"Y" , "N" , "N" })); |
1425 | REQUIRE(CHECK_COLUMN(result, 7, {98539, Value(), Value()})); |
1426 | REQUIRE(CHECK_COLUMN(result, 8, {"http://www.foo.com" , "http://www.foo.com" , "http://www.foo.com" })); |
1427 | REQUIRE(CHECK_COLUMN(result, 9, {"welcome" , "protected" , "feedback" })); |
1428 | REQUIRE(CHECK_COLUMN(result, 10, {2531, 1564, 1564})); |
1429 | REQUIRE(CHECK_COLUMN(result, 11, {8, 4, 4})); |
1430 | REQUIRE(CHECK_COLUMN(result, 12, {3, 3, 3})); |
1431 | REQUIRE(CHECK_COLUMN(result, 13, {4, 1, 4})); |
1432 | } |
1433 | |
1434 | TEST_CASE("Test copy from greek-utf8 csv" , "[copy]" ) { |
1435 | FileSystem fs; |
1436 | unique_ptr<QueryResult> result; |
1437 | DuckDB db(nullptr); |
1438 | Connection con(db); |
1439 | |
1440 | auto csv_path = GetCSVPath(); |
1441 | auto csv_file = fs.JoinPath(csv_path, "greek_utf8.csv" ); |
1442 | WriteBinary(csv_file, greek_utf8, sizeof(greek_utf8)); |
1443 | |
1444 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE greek_utf8(i INTEGER, j VARCHAR, k INTEGER);" )); |
1445 | |
1446 | result = con.Query("COPY greek_utf8 FROM '" + csv_file + "' DELIMITER '|';" ); |
1447 | REQUIRE(CHECK_COLUMN(result, 0, {8})); |
1448 | |
1449 | result = con.Query("SELECT * FROM greek_utf8 ORDER BY 1;" ); |
1450 | REQUIRE(CHECK_COLUMN(result, 0, {1689, 1690, 41561, 45804, 51981, 171067, 182773, 607808})); |
1451 | REQUIRE(CHECK_COLUMN(result, 1, |
1452 | {"\x30\x30\x69\\047\x6d" , "\x30\x30\x69\\047\x76" , "\x32\x30\x31\x35\xe2\x80\x8e" , |
1453 | "\x32\x31\xcf\x80" , "\x32\x34\x68\x6f\x75\x72\x73\xe2\x80\xac" , |
1454 | "\x61\x72\x64\x65\xcc\x80\x63\x68" , "\x61\xef\xac\x81" , |
1455 | "\x70\x6f\x76\x65\x72\x74\x79\xe2\x80\xaa" })); |
1456 | REQUIRE(CHECK_COLUMN(result, 2, {2, 2, 1, 1, 1, 2, 1, 1})); |
1457 | } |
1458 | |
1459 | TEST_CASE("Test copy from ncvoter csv" , "[copy]" ) { |
1460 | FileSystem fs; |
1461 | unique_ptr<QueryResult> result; |
1462 | DuckDB db(nullptr); |
1463 | Connection con(db); |
1464 | |
1465 | auto csv_path = GetCSVPath(); |
1466 | auto ncvoter_csv = fs.JoinPath(csv_path, "ncvoter.csv" ); |
1467 | WriteBinary(ncvoter_csv, ncvoter, sizeof(ncvoter)); |
1468 | |
1469 | REQUIRE_NO_FAIL(con.Query( |
1470 | "CREATE TABLE IF NOT EXISTS ncvoters(county_id INTEGER, county_desc STRING, voter_reg_num STRING,status_cd " |
1471 | "STRING, voter_status_desc STRING, reason_cd STRING, voter_status_reason_desc STRING, absent_ind STRING, " |
1472 | "name_prefx_cd STRING,last_name STRING, first_name STRING, midl_name STRING, name_sufx_cd STRING, " |
1473 | "full_name_rep STRING,full_name_mail STRING, house_num STRING, half_code STRING, street_dir STRING, " |
1474 | "street_name STRING, street_type_cd STRING, street_sufx_cd STRING, unit_designator STRING, unit_num STRING, " |
1475 | "res_city_desc STRING,state_cd STRING, zip_code STRING, res_street_address STRING, res_city_state_zip STRING, " |
1476 | "mail_addr1 STRING, mail_addr2 STRING, mail_addr3 STRING, mail_addr4 STRING, mail_city STRING, mail_state " |
1477 | "STRING, mail_zipcode STRING, mail_city_state_zip STRING, area_cd STRING, phone_num STRING, full_phone_number " |
1478 | "STRING, drivers_lic STRING, race_code STRING, race_desc STRING, ethnic_code STRING, ethnic_desc STRING, " |
1479 | "party_cd STRING, party_desc STRING, sex_code STRING, sex STRING, birth_age STRING, birth_place STRING, " |
1480 | "registr_dt STRING, precinct_abbrv STRING, precinct_desc STRING,municipality_abbrv STRING, municipality_desc " |
1481 | "STRING, ward_abbrv STRING, ward_desc STRING, cong_dist_abbrv STRING, cong_dist_desc STRING, super_court_abbrv " |
1482 | "STRING, super_court_desc STRING, judic_dist_abbrv STRING, judic_dist_desc STRING, nc_senate_abbrv STRING, " |
1483 | "nc_senate_desc STRING, nc_house_abbrv STRING, nc_house_desc STRING,county_commiss_abbrv STRING, " |
1484 | "county_commiss_desc STRING, township_abbrv STRING, township_desc STRING,school_dist_abbrv STRING, " |
1485 | "school_dist_desc STRING, fire_dist_abbrv STRING, fire_dist_desc STRING, water_dist_abbrv STRING, " |
1486 | "water_dist_desc STRING, sewer_dist_abbrv STRING, sewer_dist_desc STRING, sanit_dist_abbrv STRING, " |
1487 | "sanit_dist_desc STRING, rescue_dist_abbrv STRING, rescue_dist_desc STRING, munic_dist_abbrv STRING, " |
1488 | "munic_dist_desc STRING, dist_1_abbrv STRING, dist_1_desc STRING, dist_2_abbrv STRING, dist_2_desc STRING, " |
1489 | "confidential_ind STRING, age STRING, ncid STRING, vtd_abbrv STRING, vtd_desc STRING);" )); |
1490 | result = con.Query("COPY ncvoters FROM '" + ncvoter_csv + "' DELIMITER '\t';" ); |
1491 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
1492 | |
1493 | result = con.Query("SELECT county_id, county_desc, vtd_desc, name_prefx_cd FROM ncvoters;" ); |
1494 | REQUIRE(CHECK_COLUMN(result, 0, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1})); |
1495 | REQUIRE(CHECK_COLUMN(result, 1, |
1496 | {"ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , |
1497 | "ALAMANCE" , "ALAMANCE" , "ALAMANCE" })); |
1498 | REQUIRE(CHECK_COLUMN(result, 2, {"09S" , "09S" , "03W" , "09S" , "1210" , "035" , "124" , "06E" , "035" , "064" })); |
1499 | REQUIRE(CHECK_COLUMN(result, 3, |
1500 | {Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value()})); |
1501 | } |
1502 | |
1503 | TEST_CASE("Test date copy" , "[copy]" ) { |
1504 | FileSystem fs; |
1505 | unique_ptr<QueryResult> result; |
1506 | DuckDB db(nullptr); |
1507 | Connection con(db); |
1508 | |
1509 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE date_test(d date);" )); |
1510 | |
1511 | auto csv_path = GetCSVPath(); |
1512 | auto date_csv = fs.JoinPath(csv_path, "date.csv" ); |
1513 | WriteCSV(date_csv, "2019-06-05\n" ); |
1514 | |
1515 | result = con.Query("COPY date_test FROM '" + date_csv + "';" ); |
1516 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
1517 | |
1518 | result = con.Query("SELECT cast(d as string) FROM date_test;" ); |
1519 | REQUIRE(CHECK_COLUMN(result, 0, {"2019-06-05" })); |
1520 | } |
1521 | |
1522 | TEST_CASE("Test cranlogs broken gzip copy and temp table" , "[copy][.]" ) { |
1523 | FileSystem fs; |
1524 | unique_ptr<QueryResult> result; |
1525 | DuckDB db(nullptr); |
1526 | Connection con(db); |
1527 | |
1528 | auto csv_path = GetCSVPath(); |
1529 | auto cranlogs_csv = fs.JoinPath(csv_path, "cranlogs.csv.gz" ); |
1530 | WriteBinary(cranlogs_csv, tmp2013_06_15, sizeof(tmp2013_06_15)); |
1531 | |
1532 | REQUIRE_NO_FAIL(con.Query("CREATE TEMPORARY TABLE cranlogs (date date,time string,size int,r_version string,r_arch " |
1533 | "string,r_os string,package string,version string,country string,ip_id int)" )); |
1534 | |
1535 | result = con.Query("COPY cranlogs FROM '" + cranlogs_csv + "' DELIMITER ',' HEADER;" ); |
1536 | REQUIRE(CHECK_COLUMN(result, 0, {37459})); |
1537 | } |
1538 | |
1539 | TEST_CASE("Test imdb escapes" , "[copy]" ) { |
1540 | FileSystem fs; |
1541 | unique_ptr<QueryResult> result; |
1542 | DuckDB db(nullptr); |
1543 | Connection con(db); |
1544 | |
1545 | auto csv_path = GetCSVPath(); |
1546 | auto imdb_movie_info = fs.JoinPath(csv_path, "imdb_movie_info.csv" ); |
1547 | WriteBinary(imdb_movie_info, imdb_movie_info_escaped, sizeof(imdb_movie_info_escaped)); |
1548 | |
1549 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE movie_info (id integer NOT NULL PRIMARY KEY, movie_id integer NOT NULL, " |
1550 | "info_type_id integer NOT NULL, info text NOT NULL, note text);" )); |
1551 | |
1552 | result = con.Query("COPY movie_info FROM '" + imdb_movie_info + "' DELIMITER ',' ESCAPE '\\';" ); |
1553 | REQUIRE(result->success); |
1554 | REQUIRE(CHECK_COLUMN(result, 0, {201})); |
1555 | |
1556 | // TODO: actually check results |
1557 | result = con.Query("SELECT * FROM movie_info;" ); |
1558 | } |
1559 | |
1560 | TEST_CASE("Test read CSV function with lineitem" , "[copy]" ) { |
1561 | FileSystem fs; |
1562 | unique_ptr<QueryResult> result; |
1563 | DuckDB db(nullptr); |
1564 | Connection con(db); |
1565 | |
1566 | auto csv_path = GetCSVPath(); |
1567 | auto lineitem_csv = fs.JoinPath(csv_path, "lineitem.csv" ); |
1568 | WriteBinary(lineitem_csv, lineitem_sample, sizeof(lineitem_sample)); |
1569 | |
1570 | // create a view using the read_csv function |
1571 | REQUIRE_NO_FAIL(con.Query( |
1572 | "CREATE VIEW lineitem AS SELECT * FROM read_csv('" + lineitem_csv + |
1573 | "', '|', STRUCT_PACK(l_orderkey := 'INT', l_partkey := 'INT', l_suppkey := 'INT', l_linenumber := 'INT', " |
1574 | "l_quantity := 'INTEGER', l_extendedprice := 'DOUBLE', l_discount := 'DOUBLE', l_tax := 'DOUBLE', l_returnflag " |
1575 | ":= 'VARCHAR', l_linestatus := 'VARCHAR', l_shipdate := 'DATE', l_commitdate := 'DATE', l_receiptdate := " |
1576 | "'DATE', l_shipinstruct := 'VARCHAR', l_shipmode := 'VARCHAR', l_comment := 'VARCHAR'));" )); |
1577 | |
1578 | // each of these will read the CSV again through the view |
1579 | result = con.Query("SELECT COUNT(*) FROM lineitem" ); |
1580 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
1581 | |
1582 | result = con.Query("SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;" ); |
1583 | REQUIRE(CHECK_COLUMN(result, 0, {15519, 6731, 6370, 214, 2403, 1564})); |
1584 | REQUIRE( |
1585 | CHECK_COLUMN(result, 1, |
1586 | {"egular courts above the" , "ly final dependencies: slyly bold " , "riously. regular, express dep" , |
1587 | "lites. fluffily even de" , " pending foxes. slyly re" , "arefully slyly ex" })); |
1588 | |
1589 | // test incorrect usage of read_csv function |
1590 | // wrong argument type |
1591 | REQUIRE_FAIL(con.Query("SELECT * FROM read_csv('" + lineitem_csv + "', '|', STRUCT_PACK(l_orderkey := 5))" )); |
1592 | } |
1593 | |
1594 | TEST_CASE("Test CSV with UTF8 NFC Normalization" , "[copy]" ) { |
1595 | FileSystem fs; |
1596 | unique_ptr<QueryResult> result; |
1597 | DuckDB db(nullptr); |
1598 | Connection con(db); |
1599 | |
1600 | auto csv_path = GetCSVPath(); |
1601 | auto nfc_csv = fs.JoinPath(csv_path, "nfc.csv" ); |
1602 | |
1603 | const char *nfc_content = "\xc3\xbc\n\x75\xcc\x88" ; |
1604 | |
1605 | WriteBinary(nfc_csv, (const uint8_t *)nfc_content, strlen(nfc_content)); |
1606 | |
1607 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE nfcstrings (s STRING);" )); |
1608 | REQUIRE_NO_FAIL(con.Query("COPY nfcstrings FROM '" + nfc_csv + "';" )); |
1609 | |
1610 | result = con.Query("SELECT COUNT(*) FROM nfcstrings WHERE s = '\xc3\xbc'" ); |
1611 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(2)})); |
1612 | } |
1613 | |
1614 | // http://www.unicode.org/Public/UCD/latest/ucd/NormalizationTest.txt |
1615 | TEST_CASE("Test CSV with Unicode NFC Normalization test suite" , "[copy]" ) { |
1616 | FileSystem fs; |
1617 | unique_ptr<QueryResult> result; |
1618 | DuckDB db(nullptr); |
1619 | Connection con(db); |
1620 | |
1621 | auto csv_path = GetCSVPath(); |
1622 | auto nfc_csv = fs.JoinPath(csv_path, "nfc_test_suite.csv" ); |
1623 | |
1624 | WriteBinary(nfc_csv, nfc_normalization, sizeof(nfc_normalization)); |
1625 | |
1626 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE nfcstrings (source STRING, nfc STRING, nfd STRING);" )); |
1627 | REQUIRE_NO_FAIL(con.Query("COPY nfcstrings FROM '" + nfc_csv + "' DELIMITER '|';" )); |
1628 | |
1629 | result = con.Query("SELECT COUNT(*) FROM nfcstrings" ); |
1630 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(18819)})); |
1631 | |
1632 | result = con.Query("SELECT COUNT(*) FROM nfcstrings WHERE source=nfc" ); |
1633 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(18819)})); |
1634 | |
1635 | result = con.Query("SELECT COUNT(*) FROM nfcstrings WHERE nfc=nfd" ); |
1636 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(18819)})); |
1637 | } |
1638 | |
1639 | TEST_CASE("Test CSV reading/writing from relations" , "[relation_api]" ) { |
1640 | DuckDB db(nullptr); |
1641 | Connection con(db); |
1642 | unique_ptr<QueryResult> result; |
1643 | |
1644 | // write a bunch of values to a CSV |
1645 | auto csv_file = TestCreatePath("relationtest.csv" ); |
1646 | |
1647 | con.Values("(1), (2), (3)" , {"i" })->WriteCSV(csv_file); |
1648 | |
1649 | // now scan the CSV file |
1650 | auto csv_scan = con.ReadCSV(csv_file, {"i INTEGER" }); |
1651 | result = csv_scan->Execute(); |
1652 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
1653 | |
1654 | REQUIRE_THROWS(con.ReadCSV(csv_file, {"i INTEGER); SELECT 42;--" })); |
1655 | } |
1656 | |