| 1 | #include "catch.hpp" |
| 2 | #include "duckdb/common/file_system.hpp" |
| 3 | #include "duckdb/common/types/date.hpp" |
| 4 | #include "test_csv_header.hpp" |
| 5 | #include "test_helpers.hpp" |
| 6 | |
| 7 | #include <fstream> |
| 8 | |
| 9 | using namespace duckdb; |
| 10 | using namespace std; |
| 11 | |
| 12 | TEST_CASE("Test copy statement" , "[copy]" ) { |
| 13 | FileSystem fs; |
| 14 | unique_ptr<QueryResult> result; |
| 15 | DuckDB db(nullptr); |
| 16 | Connection con(db); |
| 17 | |
| 18 | auto csv_path = GetCSVPath(); |
| 19 | |
| 20 | // generate CSV file with ',' as delimiter and complex strings |
| 21 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
| 22 | for (int i = 0; i < 5000; i++) { |
| 23 | from_csv_file << i << "," << i << ", test" << endl; |
| 24 | } |
| 25 | from_csv_file.close(); |
| 26 | |
| 27 | // load CSV file into a table |
| 28 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
| 29 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
| 30 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
| 31 | |
| 32 | result = con.Query("SELECT COUNT(a), SUM(a) FROM test;" ); |
| 33 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
| 34 | REQUIRE(CHECK_COLUMN(result, 1, {12497500})); |
| 35 | |
| 36 | result = con.Query("SELECT * FROM test ORDER BY 1 LIMIT 3;" ); |
| 37 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
| 38 | REQUIRE(CHECK_COLUMN(result, 1, {0, 1, 2})); |
| 39 | REQUIRE(CHECK_COLUMN(result, 2, {" test" , " test" , " test" })); |
| 40 | |
| 41 | // create CSV file from table |
| 42 | result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test2.csv" ) + "';" ); |
| 43 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
| 44 | // load the same CSV file back again |
| 45 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test2 (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
| 46 | result = con.Query("COPY test2 FROM '" + fs.JoinPath(csv_path, "test2.csv" ) + "';" ); |
| 47 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
| 48 | result = con.Query("SELECT * FROM test2 ORDER BY 1 LIMIT 3;" ); |
| 49 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
| 50 | REQUIRE(CHECK_COLUMN(result, 1, {0, 1, 2})); |
| 51 | REQUIRE(CHECK_COLUMN(result, 2, {" test" , " test" , " test" })); |
| 52 | |
| 53 | // test too few rows |
| 54 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test_too_few_rows (a INTEGER, b INTEGER, c VARCHAR, d INTEGER);" )); |
| 55 | REQUIRE_FAIL(con.Query("COPY test_too_few_rows FROM '" + fs.JoinPath(csv_path, "test2.csv" ) + "';" )); |
| 56 | |
| 57 | // create CSV file from query |
| 58 | result = con.Query("COPY (SELECT a,b FROM test WHERE a < 4000) TO '" + fs.JoinPath(csv_path, "test3.csv" ) + "';" ); |
| 59 | REQUIRE(CHECK_COLUMN(result, 0, {4000})); |
| 60 | // load the same CSV file back again |
| 61 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test3 (a INTEGER, b INTEGER);" )); |
| 62 | result = con.Query("COPY test3 FROM '" + fs.JoinPath(csv_path, "test3.csv" ) + "';" ); |
| 63 | REQUIRE(CHECK_COLUMN(result, 0, {4000})); |
| 64 | result = con.Query("SELECT * FROM test3 ORDER BY 1 LIMIT 3;" ); |
| 65 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
| 66 | REQUIRE(CHECK_COLUMN(result, 1, {0, 1, 2})); |
| 67 | |
| 68 | // export selected columns from a table to a CSV file |
| 69 | result = |
| 70 | con.Query("COPY test (a,c) TO '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (DELIMITER ',', HEADER false);" ); |
| 71 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
| 72 | |
| 73 | // import selected columns from CSV file |
| 74 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test4 (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
| 75 | result = con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (DELIM ',', HEADER 0);" ); |
| 76 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
| 77 | result = con.Query("SELECT * FROM test4 ORDER BY 1 LIMIT 3;" ); |
| 78 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
| 79 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), Value(), Value()})); |
| 80 | REQUIRE(CHECK_COLUMN(result, 2, {" test" , " test" , " test" })); |
| 81 | |
| 82 | // unsupported type for HEADER |
| 83 | REQUIRE_FAIL( |
| 84 | con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (SEP ',', HEADER 0.2);" )); |
| 85 | // empty delimiter |
| 86 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (SEP);" )); |
| 87 | // number as delimiter |
| 88 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (SEP 1);" )); |
| 89 | // multiple format options |
| 90 | REQUIRE_FAIL( |
| 91 | con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (FORMAT 'csv', FORMAT 'json');" )); |
| 92 | // number as escape string |
| 93 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (ESCAPE 1);" )); |
| 94 | // no escape string |
| 95 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (ESCAPE);" )); |
| 96 | // number as quote string |
| 97 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (QUOTE 1);" )); |
| 98 | // no quote string |
| 99 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (QUOTE);" )); |
| 100 | // no format string |
| 101 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (FORMAT);" )); |
| 102 | // encoding must not be empty and must have the correct parameter type and value |
| 103 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (ENCODING);" )); |
| 104 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (ENCODING 42);" )); |
| 105 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (ENCODING 'utf-42');" )); |
| 106 | // don't allow for non-existant copy options |
| 107 | REQUIRE_FAIL(con.Query("COPY test4 (a,c) FROM '" + fs.JoinPath(csv_path, "test4.csv" ) + "' (MAGIC '42');" )); |
| 108 | |
| 109 | // use a different delimiter |
| 110 | auto pipe_csv = fs.JoinPath(csv_path, "test_pipe.csv" ); |
| 111 | ofstream from_csv_file_pipe(pipe_csv); |
| 112 | for (int i = 0; i < 10; i++) { |
| 113 | from_csv_file_pipe << i << "|" << i << "|test" << endl; |
| 114 | } |
| 115 | from_csv_file_pipe.close(); |
| 116 | |
| 117 | // create new table |
| 118 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 119 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
| 120 | result = con.Query("COPY test FROM '" + pipe_csv + "' (SEPARATOR '|');" ); |
| 121 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
| 122 | |
| 123 | // throw exception if a line contains too many values |
| 124 | ofstream csv_too_many_values_file(fs.JoinPath(csv_path, "too_many_values.csv" )); |
| 125 | csv_too_many_values_file << "1,2,3,4" << endl; |
| 126 | csv_too_many_values_file.close(); |
| 127 | REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "too_many_values.csv" ) + "';" )); |
| 128 | |
| 129 | // test default null string |
| 130 | auto null_csv = fs.JoinPath(csv_path, "null.csv" ); |
| 131 | ofstream from_csv_file_null(null_csv); |
| 132 | for (int i = 0; i < 1; i++) |
| 133 | from_csv_file_null << i << "||test" << endl; |
| 134 | from_csv_file_null.close(); |
| 135 | result = con.Query("COPY test FROM '" + null_csv + "' DELIMITER '|';" ); |
| 136 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 137 | |
| 138 | // test invalid UTF-8 |
| 139 | auto invalid_utf_csv = fs.JoinPath(csv_path, "invalid_utf.csv" ); |
| 140 | ofstream from_csv_file_utf(invalid_utf_csv); |
| 141 | for (int i = 0; i < 1; i++) |
| 142 | from_csv_file_utf << i << "42|42|\xe2\x82\x28" << endl; |
| 143 | from_csv_file_utf.close(); |
| 144 | REQUIRE_FAIL(con.Query("COPY test FROM '" + invalid_utf_csv + "' DELIMITER '|';" )); |
| 145 | |
| 146 | // empty file |
| 147 | ofstream empty_file(fs.JoinPath(csv_path, "empty.csv" )); |
| 148 | empty_file.close(); |
| 149 | |
| 150 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE empty_table (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
| 151 | result = con.Query("COPY empty_table FROM '" + fs.JoinPath(csv_path, "empty.csv" ) + "';" ); |
| 152 | REQUIRE(CHECK_COLUMN(result, 0, {0})); |
| 153 | |
| 154 | // unterminated quotes |
| 155 | ofstream unterminated_quotes_file(fs.JoinPath(csv_path, "unterminated.csv" )); |
| 156 | unterminated_quotes_file << "\"hello\n\n world\n" ; |
| 157 | unterminated_quotes_file.close(); |
| 158 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE unterminated (a VARCHAR);" )); |
| 159 | REQUIRE_FAIL(con.Query("COPY unterminated FROM '" + fs.JoinPath(csv_path, "unterminated.csv" ) + "';" )); |
| 160 | |
| 161 | // 1024 rows (vector size) |
| 162 | ofstream csv_vector_size(fs.JoinPath(csv_path, "vsize.csv" )); |
| 163 | for (int i = 0; i < STANDARD_VECTOR_SIZE; i++) { |
| 164 | csv_vector_size << i << "," << i << ", test" << endl; |
| 165 | } |
| 166 | csv_vector_size.close(); |
| 167 | |
| 168 | // load CSV file into a table |
| 169 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE vsize (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
| 170 | result = con.Query("COPY vsize FROM '" + fs.JoinPath(csv_path, "vsize.csv" ) + "';" ); |
| 171 | REQUIRE(CHECK_COLUMN(result, 0, {STANDARD_VECTOR_SIZE})); |
| 172 | } |
| 173 | |
| 174 | TEST_CASE("Test CSV file without trailing newline" , "[copy]" ) { |
| 175 | FileSystem fs; |
| 176 | unique_ptr<QueryResult> result; |
| 177 | DuckDB db(nullptr); |
| 178 | Connection con(db); |
| 179 | |
| 180 | auto csv_path = GetCSVPath(); |
| 181 | |
| 182 | // no newline at end of file with simple delimiter |
| 183 | ofstream csv_no_newline(fs.JoinPath(csv_path, "no_newline.csv" )); |
| 184 | for (int i = 0; i < 1024; i++) { |
| 185 | csv_no_newline << i << "," << i << ", test" << (i + 1 < 1024 ? "\n" : "" ); |
| 186 | } |
| 187 | csv_no_newline.close(); |
| 188 | |
| 189 | // load CSV file into a table |
| 190 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE no_newline (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
| 191 | result = con.Query("COPY no_newline FROM '" + fs.JoinPath(csv_path, "no_newline.csv" ) + "';" ); |
| 192 | REQUIRE(CHECK_COLUMN(result, 0, {1024})); |
| 193 | |
| 194 | // no newline at end of file with unicode delimiter |
| 195 | ofstream csv_no_newline_unicode(fs.JoinPath(csv_path, "no_newline_unicode.csv" )); |
| 196 | for (int i = 0; i < 1024; i++) { |
| 197 | csv_no_newline_unicode << i << "🦆" << i << "🦆 test" << (i + 1 < 1024 ? "\n" : "" ); |
| 198 | } |
| 199 | csv_no_newline_unicode.close(); |
| 200 | |
| 201 | // load CSV file into a table |
| 202 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE no_newline_unicode (a INTEGER, b INTEGER, c VARCHAR(10));" )); |
| 203 | result = con.Query("COPY no_newline_unicode FROM '" + fs.JoinPath(csv_path, "no_newline_unicode.csv" ) + |
| 204 | "' DELIMITER '🦆';" ); |
| 205 | REQUIRE(CHECK_COLUMN(result, 0, {1024})); |
| 206 | } |
| 207 | |
| 208 | TEST_CASE("Test CSVs with repeating patterns in delimiter/escape/quote" , "[copy]" ) { |
| 209 | FileSystem fs; |
| 210 | unique_ptr<QueryResult> result; |
| 211 | DuckDB db(nullptr); |
| 212 | Connection con(db); |
| 213 | |
| 214 | auto csv_dir = GetCSVPath(); |
| 215 | auto csv_path = fs.JoinPath(csv_dir, "abac.csv" ); |
| 216 | |
| 217 | SECTION("ABAC delimiter" ) { |
| 218 | ofstream csv_stream(csv_path); |
| 219 | // this is equivalent to "AB|ABAB|" |
| 220 | csv_stream << "ABABACABABABAC" ; |
| 221 | csv_stream.close(); |
| 222 | |
| 223 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR, c VARCHAR);" )); |
| 224 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'ABAC';" ); |
| 225 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 226 | |
| 227 | result = con.Query("SELECT * FROM abac_tbl" ); |
| 228 | REQUIRE(CHECK_COLUMN(result, 0, {"AB" })); |
| 229 | REQUIRE(CHECK_COLUMN(result, 1, {"ABAB" })); |
| 230 | REQUIRE(CHECK_COLUMN(result, 2, {Value()})); |
| 231 | |
| 232 | // do the same but with a large unused quote specifier |
| 233 | REQUIRE_NO_FAIL(con.Query("DELETE FROM abac_tbl;" )); |
| 234 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'ABAC' QUOTE 'ABABABABABAB';" ); |
| 235 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 236 | |
| 237 | result = con.Query("SELECT * FROM abac_tbl" ); |
| 238 | REQUIRE(CHECK_COLUMN(result, 0, {"AB" })); |
| 239 | REQUIRE(CHECK_COLUMN(result, 1, {"ABAB" })); |
| 240 | REQUIRE(CHECK_COLUMN(result, 2, {Value()})); |
| 241 | } |
| 242 | SECTION("Mix of complex quotes/delimiters/escapes" ) { |
| 243 | ofstream csv_stream(csv_path); |
| 244 | // quote -> "ABAB" |
| 245 | // escape -> "ABAC" |
| 246 | // delimiter -> "ABAD" |
| 247 | // first value is an escaped quote (ABAB) |
| 248 | // second value is a quoted delimiter followed by an escaped quote |
| 249 | // third value is an escape outside of a set of quotes (interpreted as a literal value) |
| 250 | csv_stream << "ABABABACABABABABABADABABABADABACABABABABABADABAC" ; |
| 251 | csv_stream.close(); |
| 252 | |
| 253 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR, c VARCHAR);" )); |
| 254 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'ABAD' QUOTE 'ABAB' ESCAPE 'ABAC';" ); |
| 255 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 256 | |
| 257 | result = con.Query("SELECT * FROM abac_tbl" ); |
| 258 | REQUIRE(CHECK_COLUMN(result, 0, {"ABAB" })); |
| 259 | REQUIRE(CHECK_COLUMN(result, 1, {"ABADABAB" })); |
| 260 | REQUIRE(CHECK_COLUMN(result, 2, {"ABAC" })); |
| 261 | } |
| 262 | SECTION("CSV terminates in the middle of quote parsing" ) { |
| 263 | ofstream csv_stream(csv_path); |
| 264 | csv_stream << "ABAB" ; |
| 265 | csv_stream.close(); |
| 266 | |
| 267 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 268 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABABABABAB';" ); |
| 269 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 270 | |
| 271 | result = con.Query("SELECT * FROM abac_tbl" ); |
| 272 | REQUIRE(CHECK_COLUMN(result, 0, {"ABAB" })); |
| 273 | } |
| 274 | SECTION("Newline in the middle of quote parsing" ) { |
| 275 | ofstream csv_stream(csv_path); |
| 276 | csv_stream << "ABAB\nABAB" ; |
| 277 | csv_stream.close(); |
| 278 | |
| 279 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 280 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABABABABAB';" ); |
| 281 | REQUIRE(CHECK_COLUMN(result, 0, {2})); |
| 282 | |
| 283 | result = con.Query("SELECT * FROM abac_tbl" ); |
| 284 | REQUIRE(CHECK_COLUMN(result, 0, {"ABAB" , "ABAB" })); |
| 285 | } |
| 286 | SECTION("Simple quote terminates immediately results in error" ) { |
| 287 | ofstream csv_stream(csv_path); |
| 288 | csv_stream << "\"" ; |
| 289 | csv_stream.close(); |
| 290 | |
| 291 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 292 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"';" )); |
| 293 | } |
| 294 | SECTION("File ends in quoted value (simple)" ) { |
| 295 | ofstream csv_stream(csv_path); |
| 296 | csv_stream << "\"\"" ; |
| 297 | csv_stream.close(); |
| 298 | |
| 299 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 300 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"';" ); |
| 301 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 302 | |
| 303 | result = con.Query("SELECT * FROM abac_tbl" ); |
| 304 | REQUIRE(CHECK_COLUMN(result, 0, {Value()})); |
| 305 | } |
| 306 | SECTION("File ends in quoted value (complex)" ) { |
| 307 | ofstream csv_stream(csv_path); |
| 308 | csv_stream << "\"\"" ; |
| 309 | csv_stream.close(); |
| 310 | |
| 311 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 312 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"' DELIMITER 'AAAB';" ); |
| 313 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 314 | |
| 315 | result = con.Query("SELECT * FROM abac_tbl" ); |
| 316 | REQUIRE(CHECK_COLUMN(result, 0, {Value()})); |
| 317 | } |
| 318 | SECTION("Simple quote terminates after escape results in error" ) { |
| 319 | ofstream csv_stream(csv_path); |
| 320 | csv_stream << "\"\\\"" ; |
| 321 | csv_stream.close(); |
| 322 | |
| 323 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 324 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"' ESCAPE '\\';" )); |
| 325 | } |
| 326 | SECTION("Simple quote terminates after quote escape results in error" ) { |
| 327 | ofstream csv_stream(csv_path); |
| 328 | csv_stream << "\"\"\"" ; |
| 329 | csv_stream.close(); |
| 330 | |
| 331 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 332 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"' ESCAPE '\"';" )); |
| 333 | } |
| 334 | SECTION("Simple quote terminates after escape results in error" ) { |
| 335 | ofstream csv_stream(csv_path); |
| 336 | csv_stream << "\"\\" ; |
| 337 | csv_stream.close(); |
| 338 | |
| 339 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 340 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '\"' ESCAPE '\\';" )); |
| 341 | } |
| 342 | SECTION("Multi-byte quote terminates immediately results in error" ) { |
| 343 | ofstream csv_stream(csv_path); |
| 344 | csv_stream << "ABABAC" ; |
| 345 | csv_stream.close(); |
| 346 | |
| 347 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 348 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABABAC';" )); |
| 349 | } |
| 350 | SECTION("Quote followed by incomplete multi-byte delimiter" ) { |
| 351 | ofstream csv_stream(csv_path); |
| 352 | csv_stream << "\"\"AB" ; |
| 353 | csv_stream.close(); |
| 354 | |
| 355 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 356 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'ABAC';" )); |
| 357 | REQUIRE_NO_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'AB';" )); |
| 358 | } |
| 359 | SECTION("Multi-byte quote terminates after escape results in error" ) { |
| 360 | ofstream csv_stream(csv_path); |
| 361 | csv_stream << "ABACABABABAC" ; |
| 362 | csv_stream.close(); |
| 363 | |
| 364 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 365 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABAC' ESCAPE 'ABAB';" )); |
| 366 | } |
| 367 | SECTION("Multi-byte quote terminates after quote escape results in error" ) { |
| 368 | ofstream csv_stream(csv_path); |
| 369 | csv_stream << "ABACABACABAC" ; |
| 370 | csv_stream.close(); |
| 371 | |
| 372 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 373 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABAC' ESCAPE 'ABAC';" )); |
| 374 | } |
| 375 | SECTION("Multi-byte quote terminates after escape results in error" ) { |
| 376 | ofstream csv_stream(csv_path); |
| 377 | csv_stream << "ABACABAB" ; |
| 378 | csv_stream.close(); |
| 379 | |
| 380 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 381 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'ABAC' ESCAPE 'ABAB';" )); |
| 382 | } |
| 383 | SECTION("Delimiter, quote and escape have a maximum size of 255 bytes" ) { |
| 384 | ofstream csv_stream(csv_path); |
| 385 | csv_stream << "ABAB" ; |
| 386 | csv_stream.close(); |
| 387 | |
| 388 | string long_string(1000, 'a'); |
| 389 | |
| 390 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 391 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE '" + long_string + "';" )); |
| 392 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' ESCAPE '" + long_string + "';" )); |
| 393 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER '" + long_string + "';" )); |
| 394 | REQUIRE_NO_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' QUOTE 'BLABLABLA';" )); |
| 395 | } |
| 396 | SECTION("Test \r newline with multi-byte delimiter" ) { |
| 397 | ofstream csv_stream(csv_path); |
| 398 | csv_stream << "ABAB\rABAC\r" ; |
| 399 | csv_stream.close(); |
| 400 | |
| 401 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);" )); |
| 402 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';" ); |
| 403 | REQUIRE(CHECK_COLUMN(result, 0, {2})); |
| 404 | |
| 405 | result = con.Query("SELECT * FROM abac_tbl" ); |
| 406 | REQUIRE(CHECK_COLUMN(result, 0, {"A" , "A" })); |
| 407 | REQUIRE(CHECK_COLUMN(result, 1, {"B" , "C" })); |
| 408 | } |
| 409 | SECTION("Test \r\n newline with multi-byte delimiter" ) { |
| 410 | ofstream csv_stream(csv_path); |
| 411 | csv_stream << "ABAB\r\nABAC\r\n" ; |
| 412 | csv_stream.close(); |
| 413 | |
| 414 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);" )); |
| 415 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';" ); |
| 416 | REQUIRE(CHECK_COLUMN(result, 0, {2})); |
| 417 | |
| 418 | result = con.Query("SELECT * FROM abac_tbl" ); |
| 419 | REQUIRE(CHECK_COLUMN(result, 0, {"A" , "A" })); |
| 420 | REQUIRE(CHECK_COLUMN(result, 1, {"B" , "C" })); |
| 421 | } |
| 422 | SECTION("Test unterminated quotes with multi-line delimiter" ) { |
| 423 | ofstream csv_stream(csv_path); |
| 424 | csv_stream << "\"AAA" ; |
| 425 | csv_stream.close(); |
| 426 | |
| 427 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 428 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';" )); |
| 429 | } |
| 430 | SECTION("Test unquote not followed by delimiter" ) { |
| 431 | ofstream csv_stream(csv_path); |
| 432 | csv_stream << "\"AAA\"BB" ; |
| 433 | csv_stream.close(); |
| 434 | |
| 435 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);" )); |
| 436 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';" )); |
| 437 | } |
| 438 | SECTION("Test escape followed by non-quote and non-escape (single-byte)" ) { |
| 439 | ofstream csv_stream(csv_path); |
| 440 | csv_stream << "\"AAA\\BB\"|A" ; |
| 441 | csv_stream.close(); |
| 442 | |
| 443 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);" )); |
| 444 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER '|' ESCAPE '\\';" )); |
| 445 | } |
| 446 | SECTION("Test escape followed by non-quote and non-escape (multi-byte)" ) { |
| 447 | ofstream csv_stream(csv_path); |
| 448 | csv_stream << "\"AAA\\BB\"BAA" ; |
| 449 | csv_stream.close(); |
| 450 | |
| 451 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR, b VARCHAR);" )); |
| 452 | REQUIRE_FAIL(con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA' ESCAPE '\\';" )); |
| 453 | } |
| 454 | SECTION("Test file end after delimiter with multi-byte delimiter" ) { |
| 455 | ofstream csv_stream(csv_path); |
| 456 | csv_stream << "AAABA" ; |
| 457 | csv_stream.close(); |
| 458 | |
| 459 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 460 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER 'BA';" ); |
| 461 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 462 | |
| 463 | result = con.Query("SELECT * FROM abac_tbl" ); |
| 464 | REQUIRE(CHECK_COLUMN(result, 0, {"AAA" })); |
| 465 | } |
| 466 | SECTION("Test file end after delimiter with single-byte delimiter" ) { |
| 467 | ofstream csv_stream(csv_path); |
| 468 | csv_stream << "AAA|" ; |
| 469 | csv_stream.close(); |
| 470 | |
| 471 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE abac_tbl (a VARCHAR);" )); |
| 472 | result = con.Query("COPY abac_tbl FROM '" + csv_path + "' DELIMITER '|';" ); |
| 473 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 474 | |
| 475 | result = con.Query("SELECT * FROM abac_tbl" ); |
| 476 | REQUIRE(CHECK_COLUMN(result, 0, {"AAA" })); |
| 477 | } |
| 478 | } |
| 479 | |
| 480 | TEST_CASE("Test long value with escapes" , "[copy]" ) { |
| 481 | FileSystem fs; |
| 482 | unique_ptr<QueryResult> result; |
| 483 | DuckDB db(nullptr); |
| 484 | Connection con(db); |
| 485 | |
| 486 | auto csv_path = GetCSVPath(); |
| 487 | |
| 488 | string value = string(10000, 'a') + "\"\"" + string(20000, 'b'); |
| 489 | string expected_value = string(10000, 'a') + "\"" + string(20000, 'b'); |
| 490 | |
| 491 | // long value with escape and simple delimiter |
| 492 | ofstream long_escaped_value(fs.JoinPath(csv_path, "long_escaped_value.csv" )); |
| 493 | long_escaped_value << 1 << "🦆" << 2 << "🦆" |
| 494 | << "\"" << value << "\"" << endl; |
| 495 | long_escaped_value.close(); |
| 496 | |
| 497 | // load CSV file into a table |
| 498 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE long_escaped_value (a INTEGER, b INTEGER, c VARCHAR);" )); |
| 499 | result = con.Query("COPY long_escaped_value FROM '" + fs.JoinPath(csv_path, "long_escaped_value.csv" ) + |
| 500 | "' DELIMITER '🦆';" ); |
| 501 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 502 | |
| 503 | result = con.Query("SELECT * FROM long_escaped_value" ); |
| 504 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 505 | REQUIRE(CHECK_COLUMN(result, 1, {2})); |
| 506 | REQUIRE(CHECK_COLUMN(result, 2, {expected_value})); |
| 507 | |
| 508 | // long value with escape and complex delimiter |
| 509 | ofstream long_escaped_value_unicode(fs.JoinPath(csv_path, "long_escaped_value_unicode.csv" )); |
| 510 | long_escaped_value_unicode << 1 << "," << 2 << "," |
| 511 | << "\"" << value << "\"" << endl; |
| 512 | long_escaped_value_unicode.close(); |
| 513 | |
| 514 | // load CSV file into a table |
| 515 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE long_escaped_value_unicode (a INTEGER, b INTEGER, c VARCHAR);" )); |
| 516 | result = con.Query("COPY long_escaped_value_unicode FROM '" + |
| 517 | fs.JoinPath(csv_path, "long_escaped_value_unicode.csv" ) + "';" ); |
| 518 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 519 | |
| 520 | result = con.Query("SELECT * FROM long_escaped_value_unicode" ); |
| 521 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 522 | REQUIRE(CHECK_COLUMN(result, 1, {2})); |
| 523 | REQUIRE(CHECK_COLUMN(result, 2, {expected_value})); |
| 524 | } |
| 525 | |
| 526 | TEST_CASE("Test NULL option of copy statement" , "[copy]" ) { |
| 527 | FileSystem fs; |
| 528 | unique_ptr<QueryResult> result; |
| 529 | DuckDB db(nullptr); |
| 530 | Connection con(db); |
| 531 | |
| 532 | auto csv_path = GetCSVPath(); |
| 533 | |
| 534 | // generate CSV file with default delimiter |
| 535 | ofstream from_csv_file(fs.JoinPath(csv_path, "test_null_option.csv" )); |
| 536 | for (int i = 0; i < 3; i++) { |
| 537 | from_csv_file << i << ",,\"test\",null" << endl; |
| 538 | } |
| 539 | from_csv_file.close(); |
| 540 | |
| 541 | // create a table |
| 542 | REQUIRE_NO_FAIL(con.Query( |
| 543 | "CREATE TABLE test_null_option (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10), col_d VARCHAR(10));" )); |
| 544 | |
| 545 | // test COPY ... FROM ... |
| 546 | |
| 547 | // implicitly using default NULL value |
| 548 | result = con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + "';" ); |
| 549 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 550 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
| 551 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
| 552 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), Value(), Value()})); |
| 553 | REQUIRE(CHECK_COLUMN(result, 2, {"test" , "test" , "test" })); |
| 554 | REQUIRE(CHECK_COLUMN(result, 3, {"null" , "null" , "null" })); |
| 555 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;" )); |
| 556 | |
| 557 | // explicitly using default NULL value |
| 558 | result = con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + "' (NULL '');" ); |
| 559 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 560 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
| 561 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
| 562 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), Value(), Value()})); |
| 563 | REQUIRE(CHECK_COLUMN(result, 2, {"test" , "test" , "test" })); |
| 564 | REQUIRE(CHECK_COLUMN(result, 3, {"null" , "null" , "null" })); |
| 565 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;" )); |
| 566 | |
| 567 | // make sure a quoted null string is interpreted as a null value |
| 568 | result = |
| 569 | con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + "' (NULL 'test');" ); |
| 570 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 571 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
| 572 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
| 573 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "" })); |
| 574 | REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), Value()})); |
| 575 | REQUIRE(CHECK_COLUMN(result, 3, {"null" , "null" , "null" })); |
| 576 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;" )); |
| 577 | |
| 578 | // setting specific NULL value |
| 579 | result = |
| 580 | con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + "' (NULL 'null');" ); |
| 581 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 582 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
| 583 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
| 584 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "" })); |
| 585 | REQUIRE(CHECK_COLUMN(result, 2, {"test" , "test" , "test" })); |
| 586 | REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()})); |
| 587 | |
| 588 | // invalid parameter type |
| 589 | REQUIRE_FAIL( |
| 590 | con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + "' (NULL null);" )); |
| 591 | |
| 592 | // delimiter must not appear in the NULL specification |
| 593 | REQUIRE_FAIL(con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + |
| 594 | "' (NULL 'null,');" )); |
| 595 | REQUIRE_FAIL(con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + |
| 596 | "' (DELIMITER 'null', NULL 'null');" )); |
| 597 | REQUIRE_FAIL(con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + |
| 598 | "' (DELIMITER 'null', NULL 'nu');" )); |
| 599 | |
| 600 | // no parameter type |
| 601 | REQUIRE_FAIL( |
| 602 | con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + "' (NULL);" )); |
| 603 | |
| 604 | // empty integer column with non-default NULL string |
| 605 | REQUIRE_NO_FAIL(con.Query( |
| 606 | "CREATE TABLE test_null_option_2 (col_a INTEGER, col_b INTEGER, col_c VARCHAR(10), col_d VARCHAR(10));" )); |
| 607 | REQUIRE_FAIL(con.Query("COPY test_null_option_2 FROM '" + fs.JoinPath(csv_path, "test_null_option.csv" ) + |
| 608 | "' (NULL 'null');" )); |
| 609 | |
| 610 | // test COPY ... TO ... |
| 611 | |
| 612 | // implicitly using default NULL value |
| 613 | result = con.Query("COPY test_null_option TO '" + fs.JoinPath(csv_path, "test_null_option_2.csv" ) + "';" ); |
| 614 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 615 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;" )); |
| 616 | result = con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option_2.csv" ) + "';" ); |
| 617 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 618 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
| 619 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
| 620 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "" })); |
| 621 | REQUIRE(CHECK_COLUMN(result, 2, {"test" , "test" , "test" })); |
| 622 | REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()})); |
| 623 | |
| 624 | // explicitly using default NULL value |
| 625 | result = con.Query("COPY test_null_option TO '" + fs.JoinPath(csv_path, "test_null_option_3.csv" ) + "' (NULL '');" ); |
| 626 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 627 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;" )); |
| 628 | result = |
| 629 | con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option_3.csv" ) + "' (NULL '');" ); |
| 630 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 631 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
| 632 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
| 633 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "" })); |
| 634 | REQUIRE(CHECK_COLUMN(result, 2, {"test" , "test" , "test" })); |
| 635 | REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()})); |
| 636 | |
| 637 | // setting specific NULL value |
| 638 | result = |
| 639 | con.Query("COPY test_null_option TO '" + fs.JoinPath(csv_path, "test_null_option_4.csv" ) + "' (NULL 'null');" ); |
| 640 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 641 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_null_option;" )); |
| 642 | result = con.Query("COPY test_null_option FROM '" + fs.JoinPath(csv_path, "test_null_option_4.csv" ) + |
| 643 | "' (NULL 'null');" ); |
| 644 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 645 | result = con.Query("SELECT * FROM test_null_option ORDER BY 1 LIMIT 3;" ); |
| 646 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
| 647 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "" })); |
| 648 | REQUIRE(CHECK_COLUMN(result, 2, {"test" , "test" , "test" })); |
| 649 | REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()})); |
| 650 | } |
| 651 | |
| 652 | TEST_CASE("Test force_quote and force_not_null" , "[copy]" ) { |
| 653 | FileSystem fs; |
| 654 | unique_ptr<QueryResult> result; |
| 655 | DuckDB db(nullptr); |
| 656 | Connection con(db); |
| 657 | |
| 658 | auto csv_path = GetCSVPath(); |
| 659 | |
| 660 | // generate CSV file with default delimiter |
| 661 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
| 662 | from_csv_file << 8 << ",test,tea" << endl; |
| 663 | for (int i = 0; i < 2; i++) { |
| 664 | from_csv_file << i << ",,test" << endl; |
| 665 | } |
| 666 | from_csv_file.close(); |
| 667 | |
| 668 | // generate another CSV file |
| 669 | ofstream from_csv_file_2(fs.JoinPath(csv_path, "test_2.csv" )); |
| 670 | from_csv_file_2 << ",test,tea" << endl; |
| 671 | for (int i = 0; i < 2; i++) { |
| 672 | from_csv_file_2 << i << ",,test" << endl; |
| 673 | } |
| 674 | from_csv_file_2.close(); |
| 675 | |
| 676 | // create a table |
| 677 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10));" )); |
| 678 | |
| 679 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
| 680 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 681 | |
| 682 | // test FORCE_QUOTE * |
| 683 | result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_star.csv" ) + "' (FORCE_QUOTE *);" ); |
| 684 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 685 | |
| 686 | vector<string> lines; |
| 687 | string line; |
| 688 | ifstream test_star_file(fs.JoinPath(csv_path, "test_star.csv" )); |
| 689 | if (test_star_file.is_open()) { |
| 690 | while (getline(test_star_file, line)) { |
| 691 | lines.push_back(line); |
| 692 | } |
| 693 | test_star_file.close(); |
| 694 | } else { |
| 695 | throw Exception("Unable to open file: " + fs.JoinPath(csv_path, "test_star.csv" )); |
| 696 | }; |
| 697 | REQUIRE(lines[0] == "\"8\",\"test\",\"tea\"" ); |
| 698 | REQUIRE(lines[1] == "\"0\",,\"test\"" ); |
| 699 | REQUIRE(lines[2] == "\"1\",,\"test\"" ); |
| 700 | |
| 701 | // test FORCE_QUOTE with specific columns and non-default quote character and non-default null character |
| 702 | result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_chosen_columns.csv" ) + |
| 703 | "' (FORCE_QUOTE (col_a, col_c), QUOTE 't', NULL 'tea');" ); |
| 704 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 705 | |
| 706 | ifstream test_columns_file(fs.JoinPath(csv_path, "test_chosen_columns.csv" )); |
| 707 | if (test_columns_file.is_open()) { |
| 708 | while (getline(test_columns_file, line)) { |
| 709 | lines.push_back(line); |
| 710 | } |
| 711 | test_columns_file.close(); |
| 712 | } else { |
| 713 | throw Exception("Unable to open file: " + fs.JoinPath(csv_path, "test_chosen_columns.csv" )); |
| 714 | }; |
| 715 | REQUIRE(lines[3] == "t8t,tttesttt,ttteat" ); |
| 716 | REQUIRE(lines[4] == "t0t,tea,tttesttt" ); |
| 717 | REQUIRE(lines[5] == "t1t,tea,tttesttt" ); |
| 718 | |
| 719 | // test FORCE_QUOTE with reordered columns |
| 720 | result = con.Query("COPY test (col_b, col_c, col_a) TO '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + |
| 721 | "' (FORCE_QUOTE (col_c, col_b), NULL 'test');" ); |
| 722 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 723 | |
| 724 | ifstream test_reorder_file(fs.JoinPath(csv_path, "test_reorder.csv" )); |
| 725 | if (test_reorder_file.is_open()) { |
| 726 | while (getline(test_reorder_file, line)) { |
| 727 | lines.push_back(line); |
| 728 | } |
| 729 | test_reorder_file.close(); |
| 730 | } else { |
| 731 | throw Exception("Unable to open file: " + fs.JoinPath(csv_path, "test_reorder.csv" )); |
| 732 | }; |
| 733 | REQUIRE(lines[6] == "\"test\",\"tea\",8" ); |
| 734 | REQUIRE(lines[7] == "test,\"test\",0" ); |
| 735 | REQUIRE(lines[8] == "test,\"test\",1" ); |
| 736 | |
| 737 | // test using a column in FORCE_QUOTE that is not set as output, but that is a column of the table |
| 738 | REQUIRE_FAIL(con.Query("COPY test (col_b, col_a) TO '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + |
| 739 | "' (FORCE_QUOTE (col_c, col_b));" )); |
| 740 | // test using a column in FORCE_QUOTE that is not a column of the table |
| 741 | REQUIRE_FAIL( |
| 742 | con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + "' (FORCE_QUOTE (col_c, col_d));" )); |
| 743 | // FORCE_QUOTE is only supported in COPY ... TO ... |
| 744 | REQUIRE_FAIL( |
| 745 | con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + "' (FORCE_QUOTE (col_c, col_d));" )); |
| 746 | // FORCE_QUOTE must not be empty and must have the correct parameter type |
| 747 | REQUIRE_FAIL(con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + "' (FORCE_QUOTE);" )); |
| 748 | REQUIRE_FAIL(con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + "' (FORCE_QUOTE 42);" )); |
| 749 | |
| 750 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test;" )); |
| 751 | |
| 752 | // test FORCE_NOT_NULL |
| 753 | |
| 754 | // test if null value is correctly converted into string |
| 755 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_star.csv" ) + |
| 756 | "' (FORCE_NOT_NULL (col_b), NULL 'test');" ); |
| 757 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 758 | result = con.Query("SELECT * FROM test ORDER BY 1;" ); |
| 759 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 8})); |
| 760 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "test" })); |
| 761 | REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), "tea" })); |
| 762 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test;" )); |
| 763 | |
| 764 | // test if null value is correctly converted into string if explicit columns are used |
| 765 | result = con.Query("COPY test (col_a, col_b, col_c) FROM '" + fs.JoinPath(csv_path, "test_star.csv" ) + |
| 766 | "' (FORCE_NOT_NULL (col_b), NULL 'test');" ); |
| 767 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 768 | result = con.Query("SELECT * FROM test ORDER BY 1;" ); |
| 769 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 8})); |
| 770 | REQUIRE(CHECK_COLUMN(result, 1, {"" , "" , "test" })); |
| 771 | REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), "tea" })); |
| 772 | |
| 773 | // FORCE_NOT_NULL is only supported in COPY ... FROM ... |
| 774 | REQUIRE_FAIL(result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test_star.csv" ) + |
| 775 | "' (FORCE_NOT_NULL (col_b), NULL 'test');" )); |
| 776 | // FORCE_NOT_NULL must not be empty and must have the correct parameter type |
| 777 | REQUIRE_FAIL(result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_star.csv" ) + |
| 778 | "' (FORCE_NOT_NULL, NULL 'test');" )); |
| 779 | REQUIRE_FAIL(result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_star.csv" ) + |
| 780 | "' (FORCE_NOT_NULL 42, NULL 'test');" )); |
| 781 | // test using a column in FORCE_NOT_NULL that is not set as output, but that is a column of the table |
| 782 | REQUIRE_FAIL(con.Query("COPY test (col_b, col_a) FROM '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + |
| 783 | "' (FORCE_NOT_NULL (col_c, col_b));" )); |
| 784 | // test using a column in FORCE_NOT_NULL that is not a column of the table |
| 785 | REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_reorder.csv" ) + |
| 786 | "' (FORCE_NOT_NULL (col_c, col_d));" )); |
| 787 | |
| 788 | // FORCE_NOT_NULL fails on integer columns |
| 789 | REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test_2.csv" ) + "' (FORCE_NOT_NULL (col_a));" )); |
| 790 | } |
| 791 | |
| 792 | TEST_CASE("Test copy statement with unicode delimiter/quote/escape" , "[copy]" ) { |
| 793 | FileSystem fs; |
| 794 | unique_ptr<QueryResult> result; |
| 795 | DuckDB db(nullptr); |
| 796 | Connection con(db); |
| 797 | |
| 798 | auto csv_path = GetCSVPath(); |
| 799 | |
| 800 | // generate CSV file with unicode (> one-byte) delimiter/quote/escape |
| 801 | ofstream from_csv_file1(fs.JoinPath(csv_path, "multi_char.csv" )); |
| 802 | from_csv_file1 << 0 << "🦆ˮdu˧˧🦆ckˮ🦆ˮd˧ˮ˧ˮu🦆ckˮ🦆duck" << endl; |
| 803 | from_csv_file1 << 1 << "🦆ˮdou˧ˮbleˮ🦆🦆duck" << endl; |
| 804 | from_csv_file1 << 2 << "🦆🦆🦆" << endl; |
| 805 | from_csv_file1 << 3 << "🦆duck inv˧asion🦆🦆" << endl; |
| 806 | from_csv_file1.close(); |
| 807 | |
| 808 | // generate CSV file with unicode (> one-byte) delimiter/quote/escape that exceeds the buffer size a few times |
| 809 | ofstream from_csv_file2(fs.JoinPath(csv_path, "multi_char_buffer_exhausted.csv" )); |
| 810 | int64_t sum = 0; |
| 811 | for (int i = 0; i < 16384; i++) { |
| 812 | if (i % 2 == 0) { |
| 813 | from_csv_file2 << i << "🦆ˮ🦆dˮ🦆ˮd˧ˮ🦆ˮ🦆d˧" << endl; |
| 814 | } else { |
| 815 | from_csv_file2 << i << "🦆ˮ˧ˮ˧ˮ˧ˮˮ🦆˧˧🦆 test test 🦆" << endl; |
| 816 | } |
| 817 | sum += i; |
| 818 | } |
| 819 | from_csv_file2.close(); |
| 820 | |
| 821 | // generate CSV file with one-byte delimiter/quote/escape |
| 822 | ofstream from_csv_file3(fs.JoinPath(csv_path, "one_byte_char.csv" )); |
| 823 | for (int i = 0; i < 3; i++) { |
| 824 | from_csv_file3 << i << ",'du''ck','''''du,ck',duck" << endl; |
| 825 | } |
| 826 | from_csv_file3.close(); |
| 827 | |
| 828 | // generate CSV file with unterminated quotes |
| 829 | ofstream from_csv_file4(fs.JoinPath(csv_path, "unterminated_quotes.csv" )); |
| 830 | for (int i = 0; i < 3; i++) { |
| 831 | from_csv_file4 << i << ",duck,\"duck" << endl; |
| 832 | } |
| 833 | from_csv_file4.close(); |
| 834 | |
| 835 | // generate CSV file with quotes that start midway in the value |
| 836 | ofstream from_csv_file5(fs.JoinPath(csv_path, "unterminated_quotes_2.csv" )); |
| 837 | for (int i = 0; i < 3; i++) { |
| 838 | from_csv_file5 << i << ",du\"ck,duck" << endl; |
| 839 | } |
| 840 | from_csv_file5.close(); |
| 841 | |
| 842 | // generate a CSV file with a very long string exceeding the buffer midway in an escape sequence (delimiter and |
| 843 | // escape share substrings) |
| 844 | ofstream from_csv_file6(fs.JoinPath(csv_path, "shared_substrings.csv" )); |
| 845 | string big_string_a(16370, 'a'); |
| 846 | from_csv_file6 << big_string_a << "AAA\"aaaaaaaaAAB\"\"" << endl; |
| 847 | from_csv_file6.close(); |
| 848 | |
| 849 | // create three tables for testing |
| 850 | REQUIRE_NO_FAIL(con.Query( |
| 851 | "CREATE TABLE test_unicode_1 (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10), col_d VARCHAR(10));" )); |
| 852 | REQUIRE_NO_FAIL(con.Query( |
| 853 | "CREATE TABLE test_unicode_2 (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10), col_d VARCHAR(10));" )); |
| 854 | REQUIRE_NO_FAIL(con.Query( |
| 855 | "CREATE TABLE test_unicode_3 (col_a INTEGER, col_b VARCHAR(10), col_c VARCHAR(10), col_d VARCHAR(10));" )); |
| 856 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test_unicode_4 (col_a VARCHAR, col_b VARCHAR);" )); |
| 857 | |
| 858 | // throw error if unterminated quotes are detected |
| 859 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "unterminated_quotes.csv" ) + "';" )); |
| 860 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "unterminated_quotes_2.csv" ) + "';" )); |
| 861 | |
| 862 | // test COPY ... FROM ... |
| 863 | |
| 864 | // test unicode delimiter/quote/escape |
| 865 | result = con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "multi_char.csv" ) + |
| 866 | "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');" ); |
| 867 | REQUIRE(CHECK_COLUMN(result, 0, {4})); |
| 868 | result = con.Query("SELECT * FROM test_unicode_1 ORDER BY 1 LIMIT 4;" ); |
| 869 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2, 3})); |
| 870 | REQUIRE(CHECK_COLUMN(result, 1, {"du˧🦆ck" , "douˮble" , Value(), "duck inv˧asion" })); |
| 871 | REQUIRE(CHECK_COLUMN(result, 2, {"dˮˮu🦆ck" , Value(), Value(), Value()})); |
| 872 | REQUIRE(CHECK_COLUMN(result, 3, {"duck" , "duck" , Value(), Value()})); |
| 873 | |
| 874 | // test unicode delimiter/quote/escape that exceeds the buffer size a few times |
| 875 | result = con.Query("COPY test_unicode_2 FROM '" + fs.JoinPath(csv_path, "multi_char_buffer_exhausted.csv" ) + |
| 876 | "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');" ); |
| 877 | REQUIRE(CHECK_COLUMN(result, 0, {16384})); |
| 878 | result = con.Query("SELECT * FROM test_unicode_2 ORDER BY 1 LIMIT 4;" ); |
| 879 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2, 3})); |
| 880 | REQUIRE(CHECK_COLUMN(result, 1, {"🦆d" , "ˮˮˮ" , "🦆d" , "ˮˮˮ" })); |
| 881 | REQUIRE(CHECK_COLUMN(result, 2, {"dˮ🦆" , "˧˧" , "dˮ🦆" , "˧˧" })); |
| 882 | REQUIRE(CHECK_COLUMN(result, 3, {"d˧" , " test test " , "d˧" , " test test " })); |
| 883 | result = con.Query("SELECT SUM(col_a) FROM test_unicode_2;" ); |
| 884 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(sum)})); |
| 885 | |
| 886 | // test one-byte delimiter/quote/escape |
| 887 | result = con.Query("COPY test_unicode_3 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + "' (QUOTE '''');" ); |
| 888 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 889 | result = con.Query("SELECT * FROM test_unicode_3 ORDER BY 1 LIMIT 3;" ); |
| 890 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
| 891 | REQUIRE(CHECK_COLUMN(result, 1, {"du'ck" , "du'ck" , "du'ck" })); |
| 892 | REQUIRE(CHECK_COLUMN(result, 2, {"''du,ck" , "''du,ck" , "''du,ck" })); |
| 893 | REQUIRE(CHECK_COLUMN(result, 3, {"duck" , "duck" , "duck" })); |
| 894 | |
| 895 | // test correct shared substring behavior at buffer borders |
| 896 | result = con.Query("COPY test_unicode_4 FROM '" + fs.JoinPath(csv_path, "shared_substrings.csv" ) + |
| 897 | "' (DELIMITER 'AAA', ESCAPE 'AAB');" ); |
| 898 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 899 | result = con.Query("SELECT * FROM test_unicode_4;" ); |
| 900 | REQUIRE(CHECK_COLUMN(result, 0, {big_string_a})); |
| 901 | REQUIRE(CHECK_COLUMN(result, 1, {"aaaaaaaa\"" })); |
| 902 | |
| 903 | // quote and escape must not be empty |
| 904 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
| 905 | "' (DELIMITER '🦆', QUOTE '');" )); |
| 906 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
| 907 | "' (DELIMITER '🦆', ESCAPE '');" )); |
| 908 | |
| 909 | // test same string for delimiter and quote |
| 910 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
| 911 | "' (DELIMITER '🦆', QUOTE '🦆');" )); |
| 912 | |
| 913 | // escape and quote cannot be substrings of each other |
| 914 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
| 915 | "' (ESCAPE 'du', QUOTE 'duck');" )); |
| 916 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
| 917 | "' (ESCAPE 'duck', QUOTE 'du');" )); |
| 918 | |
| 919 | // delimiter and quote cannot be substrings of each other |
| 920 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
| 921 | "' (DELIMITER 'du', QUOTE 'duck');" )); |
| 922 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
| 923 | "' (DELIMITER 'duck', QUOTE 'du');" )); |
| 924 | |
| 925 | // delimiter and escape cannot be substrings of each other |
| 926 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
| 927 | "' (DELIMITER 'AA', ESCAPE 'AAAA');" )); |
| 928 | REQUIRE_FAIL(con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "one_byte_char.csv" ) + |
| 929 | "' (DELIMITER 'AAAA', ESCAPE 'AA');" )); |
| 930 | |
| 931 | // COPY ... TO ... |
| 932 | |
| 933 | // test unicode delimiter/quote/escape |
| 934 | result = con.Query("COPY test_unicode_1 TO '" + fs.JoinPath(csv_path, "test_unicode_1.csv" ) + |
| 935 | "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');" ); |
| 936 | REQUIRE(CHECK_COLUMN(result, 0, {4})); |
| 937 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_unicode_1;" )); |
| 938 | result = con.Query("COPY test_unicode_1 FROM '" + fs.JoinPath(csv_path, "test_unicode_1.csv" ) + |
| 939 | "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');" ); |
| 940 | REQUIRE(CHECK_COLUMN(result, 0, {4})); |
| 941 | result = con.Query("SELECT * FROM test_unicode_1 ORDER BY 1 LIMIT 4;" ); |
| 942 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2, 3})); |
| 943 | REQUIRE(CHECK_COLUMN(result, 1, {"du˧🦆ck" , "douˮble" , Value(), "duck inv˧asion" })); |
| 944 | REQUIRE(CHECK_COLUMN(result, 2, {"dˮˮu🦆ck" , Value(), Value(), Value()})); |
| 945 | REQUIRE(CHECK_COLUMN(result, 3, {"duck" , "duck" , Value(), Value()})); |
| 946 | |
| 947 | // test unicode delimiter/quote/escape |
| 948 | result = con.Query("COPY test_unicode_2 TO '" + fs.JoinPath(csv_path, "test_unicode_2.csv" ) + |
| 949 | "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');" ); |
| 950 | REQUIRE(CHECK_COLUMN(result, 0, {16384})); |
| 951 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_unicode_2;" )); |
| 952 | result = con.Query("COPY test_unicode_2 FROM '" + fs.JoinPath(csv_path, "test_unicode_2.csv" ) + |
| 953 | "' (DELIMITER '🦆', QUOTE 'ˮ', ESCAPE '˧');" ); |
| 954 | REQUIRE(CHECK_COLUMN(result, 0, {16384})); |
| 955 | result = con.Query("SELECT * FROM test_unicode_2 ORDER BY 1 LIMIT 4;" ); |
| 956 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2, 3})); |
| 957 | REQUIRE(CHECK_COLUMN(result, 1, {"🦆d" , "ˮˮˮ" , "🦆d" , "ˮˮˮ" })); |
| 958 | REQUIRE(CHECK_COLUMN(result, 2, {"dˮ🦆" , "˧˧" , "dˮ🦆" , "˧˧" })); |
| 959 | REQUIRE(CHECK_COLUMN(result, 3, {"d˧" , " test test " , "d˧" , " test test " })); |
| 960 | result = con.Query("SELECT SUM(col_a) FROM test_unicode_2;" ); |
| 961 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(sum)})); |
| 962 | |
| 963 | // test one-byte delimiter/quote/escape |
| 964 | result = con.Query("COPY test_unicode_3 TO '" + fs.JoinPath(csv_path, "test_unicode_3.csv" ) + "' (QUOTE '''');" ); |
| 965 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 966 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test_unicode_3;" )); |
| 967 | result = con.Query("COPY test_unicode_3 FROM '" + fs.JoinPath(csv_path, "test_unicode_3.csv" ) + "' (QUOTE '''');" ); |
| 968 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 969 | result = con.Query("SELECT * FROM test_unicode_3 ORDER BY 1 LIMIT 3;" ); |
| 970 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 2})); |
| 971 | REQUIRE(CHECK_COLUMN(result, 1, {"du'ck" , "du'ck" , "du'ck" })); |
| 972 | REQUIRE(CHECK_COLUMN(result, 2, {"''du,ck" , "''du,ck" , "''du,ck" })); |
| 973 | REQUIRE(CHECK_COLUMN(result, 3, {"duck" , "duck" , "duck" })); |
| 974 | } |
| 975 | |
| 976 | TEST_CASE("Test copy statement with file overwrite" , "[copy]" ) { |
| 977 | FileSystem fs; |
| 978 | unique_ptr<QueryResult> result; |
| 979 | DuckDB db(nullptr); |
| 980 | Connection con(db); |
| 981 | |
| 982 | auto csv_path = GetCSVPath(); |
| 983 | |
| 984 | // create a table and insert some values |
| 985 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR(10));" )); |
| 986 | REQUIRE_NO_FAIL(con.Query("INSERT INTO test VALUES (1, 'hello'), (2, 'world '), (3, ' xx');" )); |
| 987 | |
| 988 | result = con.Query("SELECT * FROM test ORDER BY 1;" ); |
| 989 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
| 990 | REQUIRE(CHECK_COLUMN(result, 1, {"hello" , "world " , " xx" })); |
| 991 | |
| 992 | // copy to the CSV file |
| 993 | result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
| 994 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 995 | |
| 996 | // now copy to the file again |
| 997 | result = con.Query("COPY test TO '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
| 998 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 999 | |
| 1000 | // reload the data from the file: it should only have three rows |
| 1001 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test;" )); |
| 1002 | |
| 1003 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
| 1004 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 1005 | |
| 1006 | result = con.Query("SELECT * FROM test ORDER BY 1;" ); |
| 1007 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
| 1008 | REQUIRE(CHECK_COLUMN(result, 1, {"hello" , "world " , " xx" })); |
| 1009 | } |
| 1010 | |
| 1011 | TEST_CASE("Test copy statement with default values" , "[copy]" ) { |
| 1012 | FileSystem fs; |
| 1013 | unique_ptr<QueryResult> result; |
| 1014 | DuckDB db(nullptr); |
| 1015 | Connection con(db); |
| 1016 | |
| 1017 | auto csv_path = GetCSVPath(); |
| 1018 | |
| 1019 | // create a file only consisting of integers |
| 1020 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
| 1021 | int64_t expected_sum_a = 0; |
| 1022 | int64_t expected_sum_c = 0; |
| 1023 | for (int i = 0; i < 5000; i++) { |
| 1024 | from_csv_file << i << endl; |
| 1025 | |
| 1026 | expected_sum_a += i; |
| 1027 | expected_sum_c += i + 7; |
| 1028 | } |
| 1029 | from_csv_file.close(); |
| 1030 | |
| 1031 | // load CSV file into a table |
| 1032 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR DEFAULT('hello'), c INTEGER DEFAULT(3+4));" )); |
| 1033 | result = con.Query("COPY test (a) FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
| 1034 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
| 1035 | result = con.Query("COPY test (c) FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
| 1036 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
| 1037 | |
| 1038 | result = |
| 1039 | con.Query("SELECT COUNT(a), COUNT(b), COUNT(c), MIN(LENGTH(b)), MAX(LENGTH(b)), SUM(a), SUM(c) FROM test;" ); |
| 1040 | REQUIRE(CHECK_COLUMN(result, 0, {5000})); |
| 1041 | REQUIRE(CHECK_COLUMN(result, 1, {10000})); |
| 1042 | REQUIRE(CHECK_COLUMN(result, 2, {10000})); |
| 1043 | REQUIRE(CHECK_COLUMN(result, 3, {5})); |
| 1044 | REQUIRE(CHECK_COLUMN(result, 4, {5})); |
| 1045 | REQUIRE(CHECK_COLUMN(result, 5, {Value::BIGINT(expected_sum_a)})); |
| 1046 | REQUIRE(CHECK_COLUMN(result, 6, {Value::BIGINT(expected_sum_c)})); |
| 1047 | } |
| 1048 | |
| 1049 | TEST_CASE("Test copy statement with long lines" , "[copy]" ) { |
| 1050 | FileSystem fs; |
| 1051 | unique_ptr<QueryResult> result; |
| 1052 | DuckDB db(nullptr); |
| 1053 | Connection con(db); |
| 1054 | |
| 1055 | auto csv_path = GetCSVPath(); |
| 1056 | |
| 1057 | // generate a CSV file with a very long string |
| 1058 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
| 1059 | string big_string_a(100000, 'a'); |
| 1060 | string big_string_b(200000, 'b'); |
| 1061 | from_csv_file << 10 << "," << big_string_a << "," << 20 << endl; |
| 1062 | from_csv_file << 20 << "," << big_string_b << "," << 30 << endl; |
| 1063 | from_csv_file.close(); |
| 1064 | |
| 1065 | // load CSV file into a table |
| 1066 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR, c INTEGER);" )); |
| 1067 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
| 1068 | REQUIRE(CHECK_COLUMN(result, 0, {2})); |
| 1069 | |
| 1070 | result = con.Query("SELECT LENGTH(b) FROM test ORDER BY a;" ); |
| 1071 | REQUIRE(CHECK_COLUMN(result, 0, {100000, 200000})); |
| 1072 | |
| 1073 | result = con.Query("SELECT SUM(a), SUM(c) FROM test;" ); |
| 1074 | REQUIRE(CHECK_COLUMN(result, 0, {30})); |
| 1075 | REQUIRE(CHECK_COLUMN(result, 1, {50})); |
| 1076 | } |
| 1077 | |
| 1078 | TEST_CASE("Test copy statement with quotes and newlines" , "[copy]" ) { |
| 1079 | FileSystem fs; |
| 1080 | unique_ptr<QueryResult> result; |
| 1081 | DuckDB db(nullptr); |
| 1082 | Connection con(db); |
| 1083 | |
| 1084 | auto csv_path = GetCSVPath(); |
| 1085 | |
| 1086 | // generate a CSV file with newlines enclosed by quotes |
| 1087 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
| 1088 | from_csv_file << "\"hello\\nworld\",\"5\"" << endl; |
| 1089 | from_csv_file << "\"what,\\n brings, you here\\n, today\",\"6\"" << endl; |
| 1090 | from_csv_file.close(); |
| 1091 | |
| 1092 | // load CSV file into a table |
| 1093 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a VARCHAR, b INTEGER);" )); |
| 1094 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
| 1095 | REQUIRE(CHECK_COLUMN(result, 0, {2})); |
| 1096 | |
| 1097 | result = con.Query("SELECT SUM(b) FROM test;" ); |
| 1098 | REQUIRE(CHECK_COLUMN(result, 0, {11})); |
| 1099 | |
| 1100 | result = con.Query("SELECT a FROM test ORDER BY a;" ); |
| 1101 | REQUIRE(CHECK_COLUMN(result, 0, {"hello\\nworld" , "what,\\n brings, you here\\n, today" })); |
| 1102 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 1103 | |
| 1104 | // quotes in the middle of a quoted string cause an exception if they are not escaped |
| 1105 | from_csv_file.open(fs.JoinPath(csv_path, "test.csv" )); |
| 1106 | from_csv_file << "\"hello\\n\"w\"o\"rld\",\"5\"" << endl; |
| 1107 | from_csv_file << "\"what,\\n brings, you here\\n, today\",\"6\"" << endl; |
| 1108 | from_csv_file.close(); |
| 1109 | |
| 1110 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a VARCHAR, b INTEGER);" )); |
| 1111 | REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" )); |
| 1112 | |
| 1113 | // now the same quotes are escaped |
| 1114 | from_csv_file.open(fs.JoinPath(csv_path, "test.csv" )); |
| 1115 | from_csv_file << "\"hello\\n\"\"w\"\"o\"\"rld\",\"5\"" << endl; |
| 1116 | from_csv_file << "\"what,\\n brings, you here\\n, today\",\"6\"" << endl; |
| 1117 | from_csv_file.close(); |
| 1118 | |
| 1119 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
| 1120 | REQUIRE(CHECK_COLUMN(result, 0, {2})); |
| 1121 | |
| 1122 | result = con.Query("SELECT SUM(b) FROM test;" ); |
| 1123 | REQUIRE(CHECK_COLUMN(result, 0, {11})); |
| 1124 | result = con.Query("SELECT a,b FROM test ORDER BY b;" ); |
| 1125 | REQUIRE(CHECK_COLUMN(result, 0, {"hello\\n\"w\"o\"rld" , "what,\\n brings, you here\\n, today" })); |
| 1126 | REQUIRE(CHECK_COLUMN(result, 1, {5, 6})); |
| 1127 | |
| 1128 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 1129 | |
| 1130 | // not escaped escape string in quotes throws an exception |
| 1131 | from_csv_file.open(fs.JoinPath(csv_path, "test.csv" )); |
| 1132 | from_csv_file << "\"\\\"escaped\\\",\"5\"" << endl; |
| 1133 | from_csv_file << "yea,6" << endl; |
| 1134 | from_csv_file.close(); |
| 1135 | |
| 1136 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a VARCHAR, b INTEGER);" )); |
| 1137 | REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "' (ESCAPE '\\');" )); |
| 1138 | } |
| 1139 | |
| 1140 | TEST_CASE("Test copy statement with many empty lines" , "[copy]" ) { |
| 1141 | FileSystem fs; |
| 1142 | unique_ptr<QueryResult> result; |
| 1143 | DuckDB db(nullptr); |
| 1144 | Connection con(db); |
| 1145 | |
| 1146 | auto csv_path = GetCSVPath(); |
| 1147 | |
| 1148 | // generate CSV file with a very long string |
| 1149 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
| 1150 | from_csv_file << "1\n" ; |
| 1151 | for (idx_t i = 0; i < 19999; i++) { |
| 1152 | from_csv_file << "\n" ; |
| 1153 | } |
| 1154 | from_csv_file.close(); |
| 1155 | |
| 1156 | // load CSV file into a table |
| 1157 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER);" )); |
| 1158 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
| 1159 | REQUIRE(CHECK_COLUMN(result, 0, {20000})); |
| 1160 | |
| 1161 | result = con.Query("SELECT SUM(a) FROM test;" ); |
| 1162 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 1163 | } |
| 1164 | |
| 1165 | TEST_CASE("Test different line endings" , "[copy]" ) { |
| 1166 | FileSystem fs; |
| 1167 | unique_ptr<QueryResult> result; |
| 1168 | DuckDB db(nullptr); |
| 1169 | Connection con(db); |
| 1170 | |
| 1171 | auto csv_path = GetCSVPath(); |
| 1172 | |
| 1173 | // generate CSV file with different line endings |
| 1174 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
| 1175 | from_csv_file << 10 << "," |
| 1176 | << "hello" |
| 1177 | << "," << 20 << "\r\n" ; |
| 1178 | from_csv_file << 20 << "," |
| 1179 | << "world" |
| 1180 | << "," << 30 << '\n'; |
| 1181 | from_csv_file << 30 << "," |
| 1182 | << "test" |
| 1183 | << "," << 30 << '\r'; |
| 1184 | from_csv_file.close(); |
| 1185 | |
| 1186 | // load CSV file into a table |
| 1187 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR, c INTEGER);" )); |
| 1188 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
| 1189 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 1190 | |
| 1191 | result = con.Query("SELECT LENGTH(b) FROM test ORDER BY a;" ); |
| 1192 | REQUIRE(CHECK_COLUMN(result, 0, {5, 5, 4})); |
| 1193 | |
| 1194 | result = con.Query("SELECT SUM(a), SUM(c) FROM test;" ); |
| 1195 | REQUIRE(CHECK_COLUMN(result, 0, {60})); |
| 1196 | REQUIRE(CHECK_COLUMN(result, 1, {80})); |
| 1197 | } |
| 1198 | |
| 1199 | TEST_CASE("Test Windows Newlines with a long file" , "[copy]" ) { |
| 1200 | FileSystem fs; |
| 1201 | unique_ptr<QueryResult> result; |
| 1202 | DuckDB db(nullptr); |
| 1203 | Connection con(db); |
| 1204 | |
| 1205 | auto csv_path = GetCSVPath(); |
| 1206 | |
| 1207 | idx_t line_count = 20000; |
| 1208 | int64_t sum_a = 0, sum_c = 0; |
| 1209 | |
| 1210 | // generate a CSV file with many strings |
| 1211 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
| 1212 | for (idx_t i = 0; i < line_count; i++) { |
| 1213 | from_csv_file << i << "," |
| 1214 | << "hello" |
| 1215 | << "," << i + 2 << "\r\n" ; |
| 1216 | |
| 1217 | sum_a += i; |
| 1218 | sum_c += i + 2; |
| 1219 | } |
| 1220 | from_csv_file.close(); |
| 1221 | |
| 1222 | // load CSV file into a table |
| 1223 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR, c INTEGER);" )); |
| 1224 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" ); |
| 1225 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(line_count)})); |
| 1226 | |
| 1227 | result = con.Query("SELECT SUM(a), MIN(LENGTH(b)), MAX(LENGTH(b)), SUM(LENGTH(b)), SUM(c) FROM test;" ); |
| 1228 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(sum_a)})); |
| 1229 | REQUIRE(CHECK_COLUMN(result, 1, {Value::BIGINT(5)})); |
| 1230 | REQUIRE(CHECK_COLUMN(result, 2, {Value::BIGINT(5)})); |
| 1231 | REQUIRE(CHECK_COLUMN(result, 3, {Value::BIGINT(5 * line_count)})); |
| 1232 | REQUIRE(CHECK_COLUMN(result, 4, {Value::BIGINT(sum_c)})); |
| 1233 | |
| 1234 | REQUIRE_NO_FAIL(con.Query("DELETE FROM test;" )); |
| 1235 | // now do the same with a multi-byte quote that is not actually used |
| 1236 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "' QUOTE 'BLABLABLA';" ); |
| 1237 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(line_count)})); |
| 1238 | |
| 1239 | result = con.Query("SELECT SUM(a), MIN(LENGTH(b)), MAX(LENGTH(b)), SUM(LENGTH(b)), SUM(c) FROM test;" ); |
| 1240 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(sum_a)})); |
| 1241 | REQUIRE(CHECK_COLUMN(result, 1, {Value::BIGINT(5)})); |
| 1242 | REQUIRE(CHECK_COLUMN(result, 2, {Value::BIGINT(5)})); |
| 1243 | REQUIRE(CHECK_COLUMN(result, 3, {Value::BIGINT(5 * line_count)})); |
| 1244 | REQUIRE(CHECK_COLUMN(result, 4, {Value::BIGINT(sum_c)})); |
| 1245 | |
| 1246 | REQUIRE_NO_FAIL(con.Query("DROP TABLE test;" )); |
| 1247 | |
| 1248 | // generate a csv file with one value and many empty values |
| 1249 | ofstream from_csv_file_empty(fs.JoinPath(csv_path, "test2.csv" )); |
| 1250 | from_csv_file_empty << 1 << "\r\n" ; |
| 1251 | for (idx_t i = 0; i < line_count - 1; i++) { |
| 1252 | from_csv_file_empty << "\r\n" ; |
| 1253 | } |
| 1254 | from_csv_file_empty.close(); |
| 1255 | |
| 1256 | // load CSV file into a table |
| 1257 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER);" )); |
| 1258 | result = con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test2.csv" ) + "';" ); |
| 1259 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(line_count)})); |
| 1260 | |
| 1261 | result = con.Query("SELECT SUM(a) FROM test;" ); |
| 1262 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(1)})); |
| 1263 | } |
| 1264 | |
| 1265 | TEST_CASE("Test lines that exceed the maximum line size" , "[copy]" ) { |
| 1266 | FileSystem fs; |
| 1267 | unique_ptr<QueryResult> result; |
| 1268 | DuckDB db(nullptr); |
| 1269 | Connection con(db); |
| 1270 | |
| 1271 | auto csv_path = GetCSVPath(); |
| 1272 | |
| 1273 | // generate CSV file with 20 MB string |
| 1274 | ofstream from_csv_file(fs.JoinPath(csv_path, "test.csv" )); |
| 1275 | string big_string(2048576, 'a'); |
| 1276 | from_csv_file << 10 << "," << big_string << "," << 20 << endl; |
| 1277 | from_csv_file.close(); |
| 1278 | |
| 1279 | // value is too big for loading |
| 1280 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b VARCHAR, c INTEGER);" )); |
| 1281 | REQUIRE_FAIL(con.Query("COPY test FROM '" + fs.JoinPath(csv_path, "test.csv" ) + "';" )); |
| 1282 | } |
| 1283 | |
| 1284 | TEST_CASE("Test copy from/to on-time dataset" , "[copy]" ) { |
| 1285 | FileSystem fs; |
| 1286 | unique_ptr<QueryResult> result; |
| 1287 | DuckDB db(nullptr); |
| 1288 | Connection con(db); |
| 1289 | |
| 1290 | auto csv_path = GetCSVPath(); |
| 1291 | auto ontime_csv = fs.JoinPath(csv_path, "ontime.csv" ); |
| 1292 | WriteBinary(ontime_csv, ontime_sample, sizeof(ontime_sample)); |
| 1293 | |
| 1294 | REQUIRE_NO_FAIL(con.Query( |
| 1295 | "CREATE TABLE ontime(year SMALLINT, quarter SMALLINT, month SMALLINT, dayofmonth SMALLINT, dayofweek SMALLINT, " |
| 1296 | "flightdate DATE, uniquecarrier CHAR(7), airlineid DECIMAL(8,2), carrier CHAR(2), tailnum VARCHAR(50), " |
| 1297 | "flightnum VARCHAR(10), originairportid INTEGER, originairportseqid INTEGER, origincitymarketid INTEGER, " |
| 1298 | "origin CHAR(5), origincityname VARCHAR(100), originstate CHAR(2), originstatefips VARCHAR(10), " |
| 1299 | "originstatename VARCHAR(100), originwac DECIMAL(8,2), destairportid INTEGER, destairportseqid INTEGER, " |
| 1300 | "destcitymarketid INTEGER, dest CHAR(5), destcityname VARCHAR(100), deststate CHAR(2), deststatefips " |
| 1301 | "VARCHAR(10), deststatename VARCHAR(100), destwac DECIMAL(8,2), crsdeptime DECIMAL(8,2), deptime DECIMAL(8,2), " |
| 1302 | "depdelay DECIMAL(8,2), depdelayminutes DECIMAL(8,2), depdel15 DECIMAL(8,2), departuredelaygroups " |
| 1303 | "DECIMAL(8,2), deptimeblk VARCHAR(20), taxiout DECIMAL(8,2), wheelsoff DECIMAL(8,2), wheelson DECIMAL(8,2), " |
| 1304 | "taxiin DECIMAL(8,2), crsarrtime DECIMAL(8,2), arrtime DECIMAL(8,2), arrdelay DECIMAL(8,2), arrdelayminutes " |
| 1305 | "DECIMAL(8,2), arrdel15 DECIMAL(8,2), arrivaldelaygroups DECIMAL(8,2), arrtimeblk VARCHAR(20), cancelled " |
| 1306 | "SMALLINT, cancellationcode CHAR(1), diverted SMALLINT, crselapsedtime DECIMAL(8,2), actualelapsedtime " |
| 1307 | "DECIMAL(8,2), airtime DECIMAL(8,2), flights DECIMAL(8,2), distance DECIMAL(8,2), distancegroup SMALLINT, " |
| 1308 | "carrierdelay DECIMAL(8,2), weatherdelay DECIMAL(8,2), nasdelay DECIMAL(8,2), securitydelay DECIMAL(8,2), " |
| 1309 | "lateaircraftdelay DECIMAL(8,2), firstdeptime VARCHAR(10), totaladdgtime VARCHAR(10), longestaddgtime " |
| 1310 | "VARCHAR(10), divairportlandings VARCHAR(10), divreacheddest VARCHAR(10), divactualelapsedtime VARCHAR(10), " |
| 1311 | "divarrdelay VARCHAR(10), divdistance VARCHAR(10), div1airport VARCHAR(10), div1aiportid INTEGER, " |
| 1312 | "div1airportseqid INTEGER, div1wheelson VARCHAR(10), div1totalgtime VARCHAR(10), div1longestgtime VARCHAR(10), " |
| 1313 | "div1wheelsoff VARCHAR(10), div1tailnum VARCHAR(10), div2airport VARCHAR(10), div2airportid INTEGER, " |
| 1314 | "div2airportseqid INTEGER, div2wheelson VARCHAR(10), div2totalgtime VARCHAR(10), div2longestgtime VARCHAR(10), " |
| 1315 | "div2wheelsoff VARCHAR(10), div2tailnum VARCHAR(10), div3airport VARCHAR(10), div3airportid INTEGER, " |
| 1316 | "div3airportseqid INTEGER, div3wheelson VARCHAR(10), div3totalgtime VARCHAR(10), div3longestgtime VARCHAR(10), " |
| 1317 | "div3wheelsoff VARCHAR(10), div3tailnum VARCHAR(10), div4airport VARCHAR(10), div4airportid INTEGER, " |
| 1318 | "div4airportseqid INTEGER, div4wheelson VARCHAR(10), div4totalgtime VARCHAR(10), div4longestgtime VARCHAR(10), " |
| 1319 | "div4wheelsoff VARCHAR(10), div4tailnum VARCHAR(10), div5airport VARCHAR(10), div5airportid INTEGER, " |
| 1320 | "div5airportseqid INTEGER, div5wheelson VARCHAR(10), div5totalgtime VARCHAR(10), div5longestgtime VARCHAR(10), " |
| 1321 | "div5wheelsoff VARCHAR(10), div5tailnum VARCHAR(10));" )); |
| 1322 | |
| 1323 | result = con.Query("COPY ontime FROM '" + ontime_csv + "' DELIMITER ',' HEADER;" ); |
| 1324 | REQUIRE(CHECK_COLUMN(result, 0, {9})); |
| 1325 | |
| 1326 | result = con.Query("SELECT year, uniquecarrier, origin, origincityname, div5longestgtime FROM ontime;" ); |
| 1327 | REQUIRE(CHECK_COLUMN(result, 0, {1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988})); |
| 1328 | REQUIRE(CHECK_COLUMN(result, 1, {"AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" })); |
| 1329 | REQUIRE(CHECK_COLUMN(result, 2, {"JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" })); |
| 1330 | REQUIRE(CHECK_COLUMN(result, 3, |
| 1331 | {"New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" , |
| 1332 | "New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" })); |
| 1333 | REQUIRE(CHECK_COLUMN(result, 4, {Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value()})); |
| 1334 | |
| 1335 | result = con.Query("COPY ontime TO '" + ontime_csv + "' DELIMITER ',' HEADER;" ); |
| 1336 | REQUIRE(CHECK_COLUMN(result, 0, {9})); |
| 1337 | REQUIRE_NO_FAIL(con.Query("DELETE FROM ontime;" )); |
| 1338 | result = con.Query("COPY ontime FROM '" + ontime_csv + "' DELIMITER ',' HEADER;" ); |
| 1339 | REQUIRE(CHECK_COLUMN(result, 0, {9})); |
| 1340 | |
| 1341 | result = con.Query("SELECT year, uniquecarrier, origin, origincityname, div5longestgtime FROM ontime;" ); |
| 1342 | REQUIRE(CHECK_COLUMN(result, 0, {1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988})); |
| 1343 | REQUIRE(CHECK_COLUMN(result, 1, {"AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" , "AA" })); |
| 1344 | REQUIRE(CHECK_COLUMN(result, 2, {"JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" , "JFK" })); |
| 1345 | REQUIRE(CHECK_COLUMN(result, 3, |
| 1346 | {"New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" , |
| 1347 | "New York, NY" , "New York, NY" , "New York, NY" , "New York, NY" })); |
| 1348 | REQUIRE(CHECK_COLUMN(result, 4, {Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value()})); |
| 1349 | } |
| 1350 | |
| 1351 | TEST_CASE("Test copy from/to lineitem csv" , "[copy]" ) { |
| 1352 | FileSystem fs; |
| 1353 | unique_ptr<QueryResult> result; |
| 1354 | DuckDB db(nullptr); |
| 1355 | Connection con(db); |
| 1356 | |
| 1357 | auto csv_path = GetCSVPath(); |
| 1358 | auto lineitem_csv = fs.JoinPath(csv_path, "lineitem.csv" ); |
| 1359 | WriteBinary(lineitem_csv, lineitem_sample, sizeof(lineitem_sample)); |
| 1360 | |
| 1361 | REQUIRE_NO_FAIL(con.Query( |
| 1362 | "CREATE TABLE lineitem(l_orderkey INT NOT NULL, l_partkey INT NOT NULL, l_suppkey INT NOT NULL, l_linenumber " |
| 1363 | "INT NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) " |
| 1364 | "NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR(1) NOT NULL, l_linestatus VARCHAR(1) NOT NULL, " |
| 1365 | "l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR(25) " |
| 1366 | "NOT NULL, l_shipmode VARCHAR(10) NOT NULL, l_comment VARCHAR(44) NOT NULL);" )); |
| 1367 | result = con.Query("COPY lineitem FROM '" + lineitem_csv + "' DELIMITER '|'" ); |
| 1368 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
| 1369 | |
| 1370 | result = con.Query("SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;" ); |
| 1371 | REQUIRE(CHECK_COLUMN(result, 0, {15519, 6731, 6370, 214, 2403, 1564})); |
| 1372 | REQUIRE( |
| 1373 | CHECK_COLUMN(result, 1, |
| 1374 | {"egular courts above the" , "ly final dependencies: slyly bold " , "riously. regular, express dep" , |
| 1375 | "lites. fluffily even de" , " pending foxes. slyly re" , "arefully slyly ex" })); |
| 1376 | |
| 1377 | // test COPY ... TO ... with HEADER |
| 1378 | result = con.Query("COPY lineitem TO '" + lineitem_csv + "' (DELIMITER ' ', HEADER);" ); |
| 1379 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
| 1380 | |
| 1381 | // clear the table |
| 1382 | REQUIRE_NO_FAIL(con.Query("DELETE FROM lineitem;" )); |
| 1383 | result = con.Query("SELECT * FROM lineitem;" ); |
| 1384 | REQUIRE(CHECK_COLUMN(result, 0, {})); |
| 1385 | |
| 1386 | // now copy back into the table |
| 1387 | result = con.Query("COPY lineitem FROM '" + lineitem_csv + "' DELIMITER ' ' HEADER;" ); |
| 1388 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
| 1389 | |
| 1390 | result = con.Query("SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;" ); |
| 1391 | REQUIRE(CHECK_COLUMN(result, 0, {15519, 6731, 6370, 214, 2403, 1564})); |
| 1392 | REQUIRE( |
| 1393 | CHECK_COLUMN(result, 1, |
| 1394 | {"egular courts above the" , "ly final dependencies: slyly bold " , "riously. regular, express dep" , |
| 1395 | "lites. fluffily even de" , " pending foxes. slyly re" , "arefully slyly ex" })); |
| 1396 | } |
| 1397 | |
| 1398 | TEST_CASE("Test copy from web_page csv" , "[copy]" ) { |
| 1399 | FileSystem fs; |
| 1400 | unique_ptr<QueryResult> result; |
| 1401 | DuckDB db(nullptr); |
| 1402 | Connection con(db); |
| 1403 | |
| 1404 | auto csv_path = GetCSVPath(); |
| 1405 | auto webpage_csv = fs.JoinPath(csv_path, "web_page.csv" ); |
| 1406 | WriteBinary(webpage_csv, web_page, sizeof(web_page)); |
| 1407 | |
| 1408 | REQUIRE_NO_FAIL(con.Query( |
| 1409 | "CREATE TABLE web_page(wp_web_page_sk integer not null, wp_web_page_id char(16) not null, wp_rec_start_date " |
| 1410 | "date, wp_rec_end_date date, wp_creation_date_sk integer, wp_access_date_sk integer, wp_autogen_flag char(1), " |
| 1411 | "wp_customer_sk integer, wp_url varchar(100), wp_type char(50), wp_char_count integer, wp_link_count integer, " |
| 1412 | "wp_image_count integer, wp_max_ad_count integer, primary key (wp_web_page_sk));" )); |
| 1413 | |
| 1414 | result = con.Query("COPY web_page FROM '" + webpage_csv + "' DELIMITER '|';" ); |
| 1415 | REQUIRE(CHECK_COLUMN(result, 0, {60})); |
| 1416 | |
| 1417 | result = con.Query("SELECT * FROM web_page ORDER BY wp_web_page_sk LIMIT 3;" ); |
| 1418 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
| 1419 | REQUIRE(CHECK_COLUMN(result, 1, {"AAAAAAAABAAAAAAA" , "AAAAAAAACAAAAAAA" , "AAAAAAAACAAAAAAA" })); |
| 1420 | REQUIRE(CHECK_COLUMN(result, 2, {Value::DATE(1997, 9, 3), Value::DATE(1997, 9, 3), Value::DATE(2000, 9, 3)})); |
| 1421 | REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value::DATE(2000, 9, 2), Value()})); |
| 1422 | REQUIRE(CHECK_COLUMN(result, 4, {2450810, 2450814, 2450814})); |
| 1423 | REQUIRE(CHECK_COLUMN(result, 5, {2452620, 2452580, 2452611})); |
| 1424 | REQUIRE(CHECK_COLUMN(result, 6, {"Y" , "N" , "N" })); |
| 1425 | REQUIRE(CHECK_COLUMN(result, 7, {98539, Value(), Value()})); |
| 1426 | REQUIRE(CHECK_COLUMN(result, 8, {"http://www.foo.com" , "http://www.foo.com" , "http://www.foo.com" })); |
| 1427 | REQUIRE(CHECK_COLUMN(result, 9, {"welcome" , "protected" , "feedback" })); |
| 1428 | REQUIRE(CHECK_COLUMN(result, 10, {2531, 1564, 1564})); |
| 1429 | REQUIRE(CHECK_COLUMN(result, 11, {8, 4, 4})); |
| 1430 | REQUIRE(CHECK_COLUMN(result, 12, {3, 3, 3})); |
| 1431 | REQUIRE(CHECK_COLUMN(result, 13, {4, 1, 4})); |
| 1432 | } |
| 1433 | |
| 1434 | TEST_CASE("Test copy from greek-utf8 csv" , "[copy]" ) { |
| 1435 | FileSystem fs; |
| 1436 | unique_ptr<QueryResult> result; |
| 1437 | DuckDB db(nullptr); |
| 1438 | Connection con(db); |
| 1439 | |
| 1440 | auto csv_path = GetCSVPath(); |
| 1441 | auto csv_file = fs.JoinPath(csv_path, "greek_utf8.csv" ); |
| 1442 | WriteBinary(csv_file, greek_utf8, sizeof(greek_utf8)); |
| 1443 | |
| 1444 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE greek_utf8(i INTEGER, j VARCHAR, k INTEGER);" )); |
| 1445 | |
| 1446 | result = con.Query("COPY greek_utf8 FROM '" + csv_file + "' DELIMITER '|';" ); |
| 1447 | REQUIRE(CHECK_COLUMN(result, 0, {8})); |
| 1448 | |
| 1449 | result = con.Query("SELECT * FROM greek_utf8 ORDER BY 1;" ); |
| 1450 | REQUIRE(CHECK_COLUMN(result, 0, {1689, 1690, 41561, 45804, 51981, 171067, 182773, 607808})); |
| 1451 | REQUIRE(CHECK_COLUMN(result, 1, |
| 1452 | {"\x30\x30\x69\\047\x6d" , "\x30\x30\x69\\047\x76" , "\x32\x30\x31\x35\xe2\x80\x8e" , |
| 1453 | "\x32\x31\xcf\x80" , "\x32\x34\x68\x6f\x75\x72\x73\xe2\x80\xac" , |
| 1454 | "\x61\x72\x64\x65\xcc\x80\x63\x68" , "\x61\xef\xac\x81" , |
| 1455 | "\x70\x6f\x76\x65\x72\x74\x79\xe2\x80\xaa" })); |
| 1456 | REQUIRE(CHECK_COLUMN(result, 2, {2, 2, 1, 1, 1, 2, 1, 1})); |
| 1457 | } |
| 1458 | |
| 1459 | TEST_CASE("Test copy from ncvoter csv" , "[copy]" ) { |
| 1460 | FileSystem fs; |
| 1461 | unique_ptr<QueryResult> result; |
| 1462 | DuckDB db(nullptr); |
| 1463 | Connection con(db); |
| 1464 | |
| 1465 | auto csv_path = GetCSVPath(); |
| 1466 | auto ncvoter_csv = fs.JoinPath(csv_path, "ncvoter.csv" ); |
| 1467 | WriteBinary(ncvoter_csv, ncvoter, sizeof(ncvoter)); |
| 1468 | |
| 1469 | REQUIRE_NO_FAIL(con.Query( |
| 1470 | "CREATE TABLE IF NOT EXISTS ncvoters(county_id INTEGER, county_desc STRING, voter_reg_num STRING,status_cd " |
| 1471 | "STRING, voter_status_desc STRING, reason_cd STRING, voter_status_reason_desc STRING, absent_ind STRING, " |
| 1472 | "name_prefx_cd STRING,last_name STRING, first_name STRING, midl_name STRING, name_sufx_cd STRING, " |
| 1473 | "full_name_rep STRING,full_name_mail STRING, house_num STRING, half_code STRING, street_dir STRING, " |
| 1474 | "street_name STRING, street_type_cd STRING, street_sufx_cd STRING, unit_designator STRING, unit_num STRING, " |
| 1475 | "res_city_desc STRING,state_cd STRING, zip_code STRING, res_street_address STRING, res_city_state_zip STRING, " |
| 1476 | "mail_addr1 STRING, mail_addr2 STRING, mail_addr3 STRING, mail_addr4 STRING, mail_city STRING, mail_state " |
| 1477 | "STRING, mail_zipcode STRING, mail_city_state_zip STRING, area_cd STRING, phone_num STRING, full_phone_number " |
| 1478 | "STRING, drivers_lic STRING, race_code STRING, race_desc STRING, ethnic_code STRING, ethnic_desc STRING, " |
| 1479 | "party_cd STRING, party_desc STRING, sex_code STRING, sex STRING, birth_age STRING, birth_place STRING, " |
| 1480 | "registr_dt STRING, precinct_abbrv STRING, precinct_desc STRING,municipality_abbrv STRING, municipality_desc " |
| 1481 | "STRING, ward_abbrv STRING, ward_desc STRING, cong_dist_abbrv STRING, cong_dist_desc STRING, super_court_abbrv " |
| 1482 | "STRING, super_court_desc STRING, judic_dist_abbrv STRING, judic_dist_desc STRING, nc_senate_abbrv STRING, " |
| 1483 | "nc_senate_desc STRING, nc_house_abbrv STRING, nc_house_desc STRING,county_commiss_abbrv STRING, " |
| 1484 | "county_commiss_desc STRING, township_abbrv STRING, township_desc STRING,school_dist_abbrv STRING, " |
| 1485 | "school_dist_desc STRING, fire_dist_abbrv STRING, fire_dist_desc STRING, water_dist_abbrv STRING, " |
| 1486 | "water_dist_desc STRING, sewer_dist_abbrv STRING, sewer_dist_desc STRING, sanit_dist_abbrv STRING, " |
| 1487 | "sanit_dist_desc STRING, rescue_dist_abbrv STRING, rescue_dist_desc STRING, munic_dist_abbrv STRING, " |
| 1488 | "munic_dist_desc STRING, dist_1_abbrv STRING, dist_1_desc STRING, dist_2_abbrv STRING, dist_2_desc STRING, " |
| 1489 | "confidential_ind STRING, age STRING, ncid STRING, vtd_abbrv STRING, vtd_desc STRING);" )); |
| 1490 | result = con.Query("COPY ncvoters FROM '" + ncvoter_csv + "' DELIMITER '\t';" ); |
| 1491 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
| 1492 | |
| 1493 | result = con.Query("SELECT county_id, county_desc, vtd_desc, name_prefx_cd FROM ncvoters;" ); |
| 1494 | REQUIRE(CHECK_COLUMN(result, 0, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1})); |
| 1495 | REQUIRE(CHECK_COLUMN(result, 1, |
| 1496 | {"ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , "ALAMANCE" , |
| 1497 | "ALAMANCE" , "ALAMANCE" , "ALAMANCE" })); |
| 1498 | REQUIRE(CHECK_COLUMN(result, 2, {"09S" , "09S" , "03W" , "09S" , "1210" , "035" , "124" , "06E" , "035" , "064" })); |
| 1499 | REQUIRE(CHECK_COLUMN(result, 3, |
| 1500 | {Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value(), Value()})); |
| 1501 | } |
| 1502 | |
| 1503 | TEST_CASE("Test date copy" , "[copy]" ) { |
| 1504 | FileSystem fs; |
| 1505 | unique_ptr<QueryResult> result; |
| 1506 | DuckDB db(nullptr); |
| 1507 | Connection con(db); |
| 1508 | |
| 1509 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE date_test(d date);" )); |
| 1510 | |
| 1511 | auto csv_path = GetCSVPath(); |
| 1512 | auto date_csv = fs.JoinPath(csv_path, "date.csv" ); |
| 1513 | WriteCSV(date_csv, "2019-06-05\n" ); |
| 1514 | |
| 1515 | result = con.Query("COPY date_test FROM '" + date_csv + "';" ); |
| 1516 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 1517 | |
| 1518 | result = con.Query("SELECT cast(d as string) FROM date_test;" ); |
| 1519 | REQUIRE(CHECK_COLUMN(result, 0, {"2019-06-05" })); |
| 1520 | } |
| 1521 | |
| 1522 | TEST_CASE("Test cranlogs broken gzip copy and temp table" , "[copy][.]" ) { |
| 1523 | FileSystem fs; |
| 1524 | unique_ptr<QueryResult> result; |
| 1525 | DuckDB db(nullptr); |
| 1526 | Connection con(db); |
| 1527 | |
| 1528 | auto csv_path = GetCSVPath(); |
| 1529 | auto cranlogs_csv = fs.JoinPath(csv_path, "cranlogs.csv.gz" ); |
| 1530 | WriteBinary(cranlogs_csv, tmp2013_06_15, sizeof(tmp2013_06_15)); |
| 1531 | |
| 1532 | REQUIRE_NO_FAIL(con.Query("CREATE TEMPORARY TABLE cranlogs (date date,time string,size int,r_version string,r_arch " |
| 1533 | "string,r_os string,package string,version string,country string,ip_id int)" )); |
| 1534 | |
| 1535 | result = con.Query("COPY cranlogs FROM '" + cranlogs_csv + "' DELIMITER ',' HEADER;" ); |
| 1536 | REQUIRE(CHECK_COLUMN(result, 0, {37459})); |
| 1537 | } |
| 1538 | |
| 1539 | TEST_CASE("Test imdb escapes" , "[copy]" ) { |
| 1540 | FileSystem fs; |
| 1541 | unique_ptr<QueryResult> result; |
| 1542 | DuckDB db(nullptr); |
| 1543 | Connection con(db); |
| 1544 | |
| 1545 | auto csv_path = GetCSVPath(); |
| 1546 | auto imdb_movie_info = fs.JoinPath(csv_path, "imdb_movie_info.csv" ); |
| 1547 | WriteBinary(imdb_movie_info, imdb_movie_info_escaped, sizeof(imdb_movie_info_escaped)); |
| 1548 | |
| 1549 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE movie_info (id integer NOT NULL PRIMARY KEY, movie_id integer NOT NULL, " |
| 1550 | "info_type_id integer NOT NULL, info text NOT NULL, note text);" )); |
| 1551 | |
| 1552 | result = con.Query("COPY movie_info FROM '" + imdb_movie_info + "' DELIMITER ',' ESCAPE '\\';" ); |
| 1553 | REQUIRE(result->success); |
| 1554 | REQUIRE(CHECK_COLUMN(result, 0, {201})); |
| 1555 | |
| 1556 | // TODO: actually check results |
| 1557 | result = con.Query("SELECT * FROM movie_info;" ); |
| 1558 | } |
| 1559 | |
| 1560 | TEST_CASE("Test read CSV function with lineitem" , "[copy]" ) { |
| 1561 | FileSystem fs; |
| 1562 | unique_ptr<QueryResult> result; |
| 1563 | DuckDB db(nullptr); |
| 1564 | Connection con(db); |
| 1565 | |
| 1566 | auto csv_path = GetCSVPath(); |
| 1567 | auto lineitem_csv = fs.JoinPath(csv_path, "lineitem.csv" ); |
| 1568 | WriteBinary(lineitem_csv, lineitem_sample, sizeof(lineitem_sample)); |
| 1569 | |
| 1570 | // create a view using the read_csv function |
| 1571 | REQUIRE_NO_FAIL(con.Query( |
| 1572 | "CREATE VIEW lineitem AS SELECT * FROM read_csv('" + lineitem_csv + |
| 1573 | "', '|', STRUCT_PACK(l_orderkey := 'INT', l_partkey := 'INT', l_suppkey := 'INT', l_linenumber := 'INT', " |
| 1574 | "l_quantity := 'INTEGER', l_extendedprice := 'DOUBLE', l_discount := 'DOUBLE', l_tax := 'DOUBLE', l_returnflag " |
| 1575 | ":= 'VARCHAR', l_linestatus := 'VARCHAR', l_shipdate := 'DATE', l_commitdate := 'DATE', l_receiptdate := " |
| 1576 | "'DATE', l_shipinstruct := 'VARCHAR', l_shipmode := 'VARCHAR', l_comment := 'VARCHAR'));" )); |
| 1577 | |
| 1578 | // each of these will read the CSV again through the view |
| 1579 | result = con.Query("SELECT COUNT(*) FROM lineitem" ); |
| 1580 | REQUIRE(CHECK_COLUMN(result, 0, {10})); |
| 1581 | |
| 1582 | result = con.Query("SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;" ); |
| 1583 | REQUIRE(CHECK_COLUMN(result, 0, {15519, 6731, 6370, 214, 2403, 1564})); |
| 1584 | REQUIRE( |
| 1585 | CHECK_COLUMN(result, 1, |
| 1586 | {"egular courts above the" , "ly final dependencies: slyly bold " , "riously. regular, express dep" , |
| 1587 | "lites. fluffily even de" , " pending foxes. slyly re" , "arefully slyly ex" })); |
| 1588 | |
| 1589 | // test incorrect usage of read_csv function |
| 1590 | // wrong argument type |
| 1591 | REQUIRE_FAIL(con.Query("SELECT * FROM read_csv('" + lineitem_csv + "', '|', STRUCT_PACK(l_orderkey := 5))" )); |
| 1592 | } |
| 1593 | |
| 1594 | TEST_CASE("Test CSV with UTF8 NFC Normalization" , "[copy]" ) { |
| 1595 | FileSystem fs; |
| 1596 | unique_ptr<QueryResult> result; |
| 1597 | DuckDB db(nullptr); |
| 1598 | Connection con(db); |
| 1599 | |
| 1600 | auto csv_path = GetCSVPath(); |
| 1601 | auto nfc_csv = fs.JoinPath(csv_path, "nfc.csv" ); |
| 1602 | |
| 1603 | const char *nfc_content = "\xc3\xbc\n\x75\xcc\x88" ; |
| 1604 | |
| 1605 | WriteBinary(nfc_csv, (const uint8_t *)nfc_content, strlen(nfc_content)); |
| 1606 | |
| 1607 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE nfcstrings (s STRING);" )); |
| 1608 | REQUIRE_NO_FAIL(con.Query("COPY nfcstrings FROM '" + nfc_csv + "';" )); |
| 1609 | |
| 1610 | result = con.Query("SELECT COUNT(*) FROM nfcstrings WHERE s = '\xc3\xbc'" ); |
| 1611 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(2)})); |
| 1612 | } |
| 1613 | |
| 1614 | // http://www.unicode.org/Public/UCD/latest/ucd/NormalizationTest.txt |
| 1615 | TEST_CASE("Test CSV with Unicode NFC Normalization test suite" , "[copy]" ) { |
| 1616 | FileSystem fs; |
| 1617 | unique_ptr<QueryResult> result; |
| 1618 | DuckDB db(nullptr); |
| 1619 | Connection con(db); |
| 1620 | |
| 1621 | auto csv_path = GetCSVPath(); |
| 1622 | auto nfc_csv = fs.JoinPath(csv_path, "nfc_test_suite.csv" ); |
| 1623 | |
| 1624 | WriteBinary(nfc_csv, nfc_normalization, sizeof(nfc_normalization)); |
| 1625 | |
| 1626 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE nfcstrings (source STRING, nfc STRING, nfd STRING);" )); |
| 1627 | REQUIRE_NO_FAIL(con.Query("COPY nfcstrings FROM '" + nfc_csv + "' DELIMITER '|';" )); |
| 1628 | |
| 1629 | result = con.Query("SELECT COUNT(*) FROM nfcstrings" ); |
| 1630 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(18819)})); |
| 1631 | |
| 1632 | result = con.Query("SELECT COUNT(*) FROM nfcstrings WHERE source=nfc" ); |
| 1633 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(18819)})); |
| 1634 | |
| 1635 | result = con.Query("SELECT COUNT(*) FROM nfcstrings WHERE nfc=nfd" ); |
| 1636 | REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(18819)})); |
| 1637 | } |
| 1638 | |
| 1639 | TEST_CASE("Test CSV reading/writing from relations" , "[relation_api]" ) { |
| 1640 | DuckDB db(nullptr); |
| 1641 | Connection con(db); |
| 1642 | unique_ptr<QueryResult> result; |
| 1643 | |
| 1644 | // write a bunch of values to a CSV |
| 1645 | auto csv_file = TestCreatePath("relationtest.csv" ); |
| 1646 | |
| 1647 | con.Values("(1), (2), (3)" , {"i" })->WriteCSV(csv_file); |
| 1648 | |
| 1649 | // now scan the CSV file |
| 1650 | auto csv_scan = con.ReadCSV(csv_file, {"i INTEGER" }); |
| 1651 | result = csv_scan->Execute(); |
| 1652 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
| 1653 | |
| 1654 | REQUIRE_THROWS(con.ReadCSV(csv_file, {"i INTEGER); SELECT 42;--" })); |
| 1655 | } |
| 1656 | |