| 1 | // #define CATCH_CONFIG_RUNNER | 
|---|
| 2 | #include "catch.hpp" | 
|---|
| 3 |  | 
|---|
| 4 | #include "duckdb/execution/operator/persistent/buffered_csv_reader.hpp" | 
|---|
| 5 | #include "duckdb/common/file_system.hpp" | 
|---|
| 6 | #include "duckdb/common/value_operations/value_operations.hpp" | 
|---|
| 7 | #include "compare_result.hpp" | 
|---|
| 8 | #include "duckdb/main/query_result.hpp" | 
|---|
| 9 | #include "test_helpers.hpp" | 
|---|
| 10 | #include "duckdb/parser/parsed_data/copy_info.hpp" | 
|---|
| 11 |  | 
|---|
| 12 | #include <cmath> | 
|---|
| 13 | #include <fstream> | 
|---|
| 14 |  | 
|---|
| 15 | using namespace std; | 
|---|
| 16 |  | 
|---|
| 17 | #define TESTING_DIRECTORY_NAME "duckdb_unittest_tempdir" | 
|---|
| 18 |  | 
|---|
| 19 | namespace duckdb { | 
|---|
| 20 |  | 
|---|
| 21 | bool NO_FAIL(QueryResult &result) { | 
|---|
| 22 | if (!result.success) { | 
|---|
| 23 | fprintf(stderr, "Query failed with message: %s\n", result.error.c_str()); | 
|---|
| 24 | } | 
|---|
| 25 | return result.success; | 
|---|
| 26 | } | 
|---|
| 27 |  | 
|---|
| 28 | bool NO_FAIL(unique_ptr<QueryResult> result) { | 
|---|
| 29 | return NO_FAIL(*result); | 
|---|
| 30 | } | 
|---|
| 31 |  | 
|---|
| 32 | void TestDeleteDirectory(string path) { | 
|---|
| 33 | FileSystem fs; | 
|---|
| 34 | if (fs.DirectoryExists(path)) { | 
|---|
| 35 | fs.RemoveDirectory(path); | 
|---|
| 36 | } | 
|---|
| 37 | } | 
|---|
| 38 |  | 
|---|
| 39 | void TestDeleteFile(string path) { | 
|---|
| 40 | FileSystem fs; | 
|---|
| 41 | if (fs.FileExists(path)) { | 
|---|
| 42 | fs.RemoveFile(path); | 
|---|
| 43 | } | 
|---|
| 44 | } | 
|---|
| 45 |  | 
|---|
| 46 | void DeleteDatabase(string path) { | 
|---|
| 47 | TestDeleteFile(path); | 
|---|
| 48 | TestDeleteFile(path + ".wal"); | 
|---|
| 49 | } | 
|---|
| 50 |  | 
|---|
| 51 | void TestCreateDirectory(string path) { | 
|---|
| 52 | FileSystem fs; | 
|---|
| 53 | fs.CreateDirectory(path); | 
|---|
| 54 | } | 
|---|
| 55 |  | 
|---|
| 56 | string TestCreatePath(string suffix) { | 
|---|
| 57 | FileSystem fs; | 
|---|
| 58 | if (!fs.DirectoryExists(TESTING_DIRECTORY_NAME)) { | 
|---|
| 59 | fs.CreateDirectory(TESTING_DIRECTORY_NAME); | 
|---|
| 60 | } | 
|---|
| 61 | return fs.JoinPath(TESTING_DIRECTORY_NAME, suffix); | 
|---|
| 62 | } | 
|---|
| 63 |  | 
|---|
| 64 | unique_ptr<DBConfig> GetTestConfig() { | 
|---|
| 65 | auto result = make_unique<DBConfig>(); | 
|---|
| 66 | result->checkpoint_wal_size = 0; | 
|---|
| 67 | return result; | 
|---|
| 68 | } | 
|---|
| 69 |  | 
|---|
| 70 | string GetCSVPath() { | 
|---|
| 71 | FileSystem fs; | 
|---|
| 72 | string csv_path = TestCreatePath( "csv_files"); | 
|---|
| 73 | if (fs.DirectoryExists(csv_path)) { | 
|---|
| 74 | fs.RemoveDirectory(csv_path); | 
|---|
| 75 | } | 
|---|
| 76 | fs.CreateDirectory(csv_path); | 
|---|
| 77 | return csv_path; | 
|---|
| 78 | } | 
|---|
| 79 |  | 
|---|
| 80 | void WriteCSV(string path, const char *csv) { | 
|---|
| 81 | ofstream csv_writer(path); | 
|---|
| 82 | csv_writer << csv; | 
|---|
| 83 | csv_writer.close(); | 
|---|
| 84 | } | 
|---|
| 85 |  | 
|---|
| 86 | void WriteBinary(string path, const uint8_t *data, uint64_t length) { | 
|---|
| 87 | ofstream binary_writer(path, ios::binary); | 
|---|
| 88 | binary_writer.write((const char *)data, length); | 
|---|
| 89 | binary_writer.close(); | 
|---|
| 90 | } | 
|---|
| 91 |  | 
|---|
| 92 | bool CHECK_COLUMN(QueryResult &result_, size_t column_number, vector<duckdb::Value> values) { | 
|---|
| 93 | unique_ptr<MaterializedQueryResult> materialized; | 
|---|
| 94 | if (result_.type == QueryResultType::STREAM_RESULT) { | 
|---|
| 95 | materialized = ((StreamQueryResult &)result_).Materialize(); | 
|---|
| 96 | } | 
|---|
| 97 | auto &result = materialized ? *materialized : (MaterializedQueryResult &)result_; | 
|---|
| 98 | if (!result.success) { | 
|---|
| 99 | fprintf(stderr, "Query failed with message: %s\n", result.error.c_str()); | 
|---|
| 100 | return false; | 
|---|
| 101 | } | 
|---|
| 102 | if (!(result.names.size() == result.types.size())) { | 
|---|
| 103 | // column names do not match | 
|---|
| 104 | result.Print(); | 
|---|
| 105 | return false; | 
|---|
| 106 | } | 
|---|
| 107 | if (values.size() == 0) { | 
|---|
| 108 | if (result.collection.count != 0) { | 
|---|
| 109 | result.Print(); | 
|---|
| 110 | return false; | 
|---|
| 111 | } else { | 
|---|
| 112 | return true; | 
|---|
| 113 | } | 
|---|
| 114 | } | 
|---|
| 115 | if (result.collection.count == 0) { | 
|---|
| 116 | result.Print(); | 
|---|
| 117 | return false; | 
|---|
| 118 | } | 
|---|
| 119 | if (column_number >= result.types.size()) { | 
|---|
| 120 | result.Print(); | 
|---|
| 121 | return false; | 
|---|
| 122 | } | 
|---|
| 123 | size_t chunk_index = 0; | 
|---|
| 124 | for (size_t i = 0; i < values.size();) { | 
|---|
| 125 | if (chunk_index >= result.collection.chunks.size()) { | 
|---|
| 126 | // ran out of chunks | 
|---|
| 127 | result.Print(); | 
|---|
| 128 | return false; | 
|---|
| 129 | } | 
|---|
| 130 | // check this vector | 
|---|
| 131 | auto &chunk = *result.collection.chunks[chunk_index]; | 
|---|
| 132 | auto &vector = chunk.data[column_number]; | 
|---|
| 133 | if (i + chunk.size() > values.size()) { | 
|---|
| 134 | // too many values in this vector | 
|---|
| 135 | result.Print(); | 
|---|
| 136 | return false; | 
|---|
| 137 | } | 
|---|
| 138 | for (size_t j = 0; j < chunk.size(); j++) { | 
|---|
| 139 | // NULL <> NULL, hence special handling | 
|---|
| 140 | if (vector.GetValue(j).is_null && values[i + j].is_null) { | 
|---|
| 141 | continue; | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | if (!Value::ValuesAreEqual(vector.GetValue(j), values[i + j])) { | 
|---|
| 145 | // FAIL("Incorrect result! Got " + vector.GetValue(j).ToString() | 
|---|
| 146 | // + | 
|---|
| 147 | //      " but expected " + values[i + j].ToString()); | 
|---|
| 148 | result.Print(); | 
|---|
| 149 | return false; | 
|---|
| 150 | } | 
|---|
| 151 | } | 
|---|
| 152 | chunk_index++; | 
|---|
| 153 | i += chunk.size(); | 
|---|
| 154 | } | 
|---|
| 155 | return true; | 
|---|
| 156 | } | 
|---|
| 157 |  | 
|---|
| 158 | bool CHECK_COLUMN(unique_ptr<duckdb::QueryResult> &result, size_t column_number, vector<duckdb::Value> values) { | 
|---|
| 159 | return CHECK_COLUMN(*result, column_number, values); | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | bool CHECK_COLUMN(unique_ptr<duckdb::MaterializedQueryResult> &result, size_t column_number, | 
|---|
| 163 | vector<duckdb::Value> values) { | 
|---|
| 164 | return CHECK_COLUMN((QueryResult &)*result, column_number, values); | 
|---|
| 165 | } | 
|---|
| 166 |  | 
|---|
| 167 | string compare_csv(duckdb::QueryResult &result, string csv, bool ) { | 
|---|
| 168 | assert(result.type == QueryResultType::MATERIALIZED_RESULT); | 
|---|
| 169 | auto &materialized = (MaterializedQueryResult &)result; | 
|---|
| 170 | if (!materialized.success) { | 
|---|
| 171 | fprintf(stderr, "Query failed with message: %s\n", materialized.error.c_str()); | 
|---|
| 172 | return materialized.error; | 
|---|
| 173 | } | 
|---|
| 174 | string error; | 
|---|
| 175 | if (!compare_result(csv, materialized.collection, materialized.sql_types, header, error)) { | 
|---|
| 176 | return error; | 
|---|
| 177 | } | 
|---|
| 178 | return ""; | 
|---|
| 179 | } | 
|---|
| 180 |  | 
|---|
| 181 | string show_diff(DataChunk &left, DataChunk &right) { | 
|---|
| 182 | if (left.column_count() != right.column_count()) { | 
|---|
| 183 | return StringUtil::Format( "Different column counts: %d vs %d", (int)left.column_count(), | 
|---|
| 184 | (int)right.column_count()); | 
|---|
| 185 | } | 
|---|
| 186 | if (left.size() != right.size()) { | 
|---|
| 187 | return StringUtil::Format( "Different sizes: %zu vs %zu", left.size(), right.size()); | 
|---|
| 188 | } | 
|---|
| 189 | string difference; | 
|---|
| 190 | for (size_t i = 0; i < left.column_count(); i++) { | 
|---|
| 191 | bool has_differences = false; | 
|---|
| 192 | auto &left_vector = left.data[i]; | 
|---|
| 193 | auto &right_vector = right.data[i]; | 
|---|
| 194 | string left_column = StringUtil::Format( "Result\n------\n%s [", TypeIdToString(left_vector.type).c_str()); | 
|---|
| 195 | string right_column = StringUtil::Format( "Expect\n------\n%s [", TypeIdToString(right_vector.type).c_str()); | 
|---|
| 196 | if (left_vector.type == right_vector.type) { | 
|---|
| 197 | for (size_t j = 0; j < left.size(); j++) { | 
|---|
| 198 | auto left_value = left_vector.GetValue(j); | 
|---|
| 199 | auto right_value = right_vector.GetValue(j); | 
|---|
| 200 | if (!Value::ValuesAreEqual(left_value, right_value)) { | 
|---|
| 201 | left_column += left_value.ToString() + ","; | 
|---|
| 202 | right_column += right_value.ToString() + ","; | 
|---|
| 203 | has_differences = true; | 
|---|
| 204 | } else { | 
|---|
| 205 | left_column += "_,"; | 
|---|
| 206 | right_column += "_,"; | 
|---|
| 207 | } | 
|---|
| 208 | } | 
|---|
| 209 | } else { | 
|---|
| 210 | left_column += "..."; | 
|---|
| 211 | right_column += "..."; | 
|---|
| 212 | } | 
|---|
| 213 | left_column += "]\n"; | 
|---|
| 214 | right_column += "]\n"; | 
|---|
| 215 | if (has_differences) { | 
|---|
| 216 | difference += StringUtil::Format( "Difference in column %d:\n", i); | 
|---|
| 217 | difference += left_column + "\n"+ right_column + "\n"; | 
|---|
| 218 | } | 
|---|
| 219 | } | 
|---|
| 220 | return difference; | 
|---|
| 221 | } | 
|---|
| 222 |  | 
|---|
| 223 | bool compare_chunk(DataChunk &left, DataChunk &right) { | 
|---|
| 224 | if (left.column_count() != right.column_count()) { | 
|---|
| 225 | return false; | 
|---|
| 226 | } | 
|---|
| 227 | if (left.size() != right.size()) { | 
|---|
| 228 | return false; | 
|---|
| 229 | } | 
|---|
| 230 | for (size_t i = 0; i < left.column_count(); i++) { | 
|---|
| 231 | auto &left_vector = left.data[i]; | 
|---|
| 232 | auto &right_vector = right.data[i]; | 
|---|
| 233 | if (left_vector.type == right_vector.type) { | 
|---|
| 234 | for (size_t j = 0; j < left.size(); j++) { | 
|---|
| 235 | auto left_value = left_vector.GetValue(j); | 
|---|
| 236 | auto right_value = right_vector.GetValue(j); | 
|---|
| 237 | if (!Value::ValuesAreEqual(left_value, right_value)) { | 
|---|
| 238 | return false; | 
|---|
| 239 | } | 
|---|
| 240 | } | 
|---|
| 241 | } | 
|---|
| 242 | } | 
|---|
| 243 | return true; | 
|---|
| 244 | } | 
|---|
| 245 |  | 
|---|
| 246 | //! Compares the result of a pipe-delimited CSV with the given DataChunk | 
|---|
| 247 | //! Returns true if they are equal, and stores an error_message otherwise | 
|---|
| 248 | bool compare_result(string csv, ChunkCollection &collection, vector<SQLType> sql_types, bool , | 
|---|
| 249 | string &error_message) { | 
|---|
| 250 | assert(collection.count == 0 || collection.types.size() == sql_types.size()); | 
|---|
| 251 |  | 
|---|
| 252 | // set up the CSV reader | 
|---|
| 253 | CopyInfo info; | 
|---|
| 254 | info.delimiter = "|"; | 
|---|
| 255 | info.header = true; | 
|---|
| 256 | info.quote = "\""; | 
|---|
| 257 | info.escape = "\""; | 
|---|
| 258 | // set up the intermediate result chunk | 
|---|
| 259 | vector<TypeId> internal_types; | 
|---|
| 260 | for (auto &type : sql_types) { | 
|---|
| 261 | internal_types.push_back(GetInternalType(type)); | 
|---|
| 262 | } | 
|---|
| 263 | DataChunk parsed_result; | 
|---|
| 264 | parsed_result.Initialize(internal_types); | 
|---|
| 265 |  | 
|---|
| 266 | // convert the CSV string into a stringstream | 
|---|
| 267 | auto source = make_unique<istringstream>(csv); | 
|---|
| 268 |  | 
|---|
| 269 | BufferedCSVReader reader(info, sql_types, move(source)); | 
|---|
| 270 | idx_t collection_index = 0; | 
|---|
| 271 | idx_t tuple_count = 0; | 
|---|
| 272 | while (true) { | 
|---|
| 273 | // parse a chunk from the CSV file | 
|---|
| 274 | try { | 
|---|
| 275 | parsed_result.Reset(); | 
|---|
| 276 | reader.ParseCSV(parsed_result); | 
|---|
| 277 | } catch (Exception &ex) { | 
|---|
| 278 | error_message = "Could not parse CSV: "+ string(ex.what()); | 
|---|
| 279 | return false; | 
|---|
| 280 | } | 
|---|
| 281 | if (parsed_result.size() == 0) { | 
|---|
| 282 | // out of tuples in CSV file | 
|---|
| 283 | if (collection_index < collection.chunks.size()) { | 
|---|
| 284 | error_message = StringUtil::Format( "Too many tuples in result! Found %llu tuples, but expected %llu", | 
|---|
| 285 | collection.count, tuple_count); | 
|---|
| 286 | return false; | 
|---|
| 287 | } | 
|---|
| 288 | return true; | 
|---|
| 289 | } | 
|---|
| 290 | if (collection_index >= collection.chunks.size()) { | 
|---|
| 291 | // ran out of chunks in the collection, but there are still tuples in the result | 
|---|
| 292 | // keep parsing the csv file to get the total expected count | 
|---|
| 293 | while (parsed_result.size() > 0) { | 
|---|
| 294 | tuple_count += parsed_result.size(); | 
|---|
| 295 | parsed_result.Reset(); | 
|---|
| 296 | reader.ParseCSV(parsed_result); | 
|---|
| 297 | } | 
|---|
| 298 | error_message = StringUtil::Format( "Too few tuples in result! Found %llu tuples, but expected %llu", | 
|---|
| 299 | collection.count, tuple_count); | 
|---|
| 300 | return false; | 
|---|
| 301 | } | 
|---|
| 302 | // same counts, compare tuples in chunks | 
|---|
| 303 | if (!compare_chunk(*collection.chunks[collection_index], parsed_result)) { | 
|---|
| 304 | error_message = show_diff(*collection.chunks[collection_index], parsed_result); | 
|---|
| 305 | } | 
|---|
| 306 |  | 
|---|
| 307 | collection_index++; | 
|---|
| 308 | tuple_count += parsed_result.size(); | 
|---|
| 309 | } | 
|---|
| 310 | } | 
|---|
| 311 | } // namespace duckdb | 
|---|
| 312 |  | 
|---|