| 1 | // #define CATCH_CONFIG_RUNNER | 
| 2 | #include "catch.hpp" | 
| 3 |  | 
| 4 | #include "duckdb/execution/operator/persistent/buffered_csv_reader.hpp" | 
| 5 | #include "duckdb/common/file_system.hpp" | 
| 6 | #include "duckdb/common/value_operations/value_operations.hpp" | 
| 7 | #include "compare_result.hpp" | 
| 8 | #include "duckdb/main/query_result.hpp" | 
| 9 | #include "test_helpers.hpp" | 
| 10 | #include "duckdb/parser/parsed_data/copy_info.hpp" | 
| 11 |  | 
| 12 | #include <cmath> | 
| 13 | #include <fstream> | 
| 14 |  | 
| 15 | using namespace std; | 
| 16 |  | 
| 17 | #define TESTING_DIRECTORY_NAME "duckdb_unittest_tempdir" | 
| 18 |  | 
| 19 | namespace duckdb { | 
| 20 |  | 
| 21 | bool NO_FAIL(QueryResult &result) { | 
| 22 | 	if (!result.success) { | 
| 23 | 		fprintf(stderr, "Query failed with message: %s\n" , result.error.c_str()); | 
| 24 | 	} | 
| 25 | 	return result.success; | 
| 26 | } | 
| 27 |  | 
| 28 | bool NO_FAIL(unique_ptr<QueryResult> result) { | 
| 29 | 	return NO_FAIL(*result); | 
| 30 | } | 
| 31 |  | 
| 32 | void TestDeleteDirectory(string path) { | 
| 33 | 	FileSystem fs; | 
| 34 | 	if (fs.DirectoryExists(path)) { | 
| 35 | 		fs.RemoveDirectory(path); | 
| 36 | 	} | 
| 37 | } | 
| 38 |  | 
| 39 | void TestDeleteFile(string path) { | 
| 40 | 	FileSystem fs; | 
| 41 | 	if (fs.FileExists(path)) { | 
| 42 | 		fs.RemoveFile(path); | 
| 43 | 	} | 
| 44 | } | 
| 45 |  | 
| 46 | void DeleteDatabase(string path) { | 
| 47 | 	TestDeleteFile(path); | 
| 48 | 	TestDeleteFile(path + ".wal" ); | 
| 49 | } | 
| 50 |  | 
| 51 | void TestCreateDirectory(string path) { | 
| 52 | 	FileSystem fs; | 
| 53 | 	fs.CreateDirectory(path); | 
| 54 | } | 
| 55 |  | 
| 56 | string TestCreatePath(string suffix) { | 
| 57 | 	FileSystem fs; | 
| 58 | 	if (!fs.DirectoryExists(TESTING_DIRECTORY_NAME)) { | 
| 59 | 		fs.CreateDirectory(TESTING_DIRECTORY_NAME); | 
| 60 | 	} | 
| 61 | 	return fs.JoinPath(TESTING_DIRECTORY_NAME, suffix); | 
| 62 | } | 
| 63 |  | 
| 64 | unique_ptr<DBConfig> GetTestConfig() { | 
| 65 | 	auto result = make_unique<DBConfig>(); | 
| 66 | 	result->checkpoint_wal_size = 0; | 
| 67 | 	return result; | 
| 68 | } | 
| 69 |  | 
| 70 | string GetCSVPath() { | 
| 71 | 	FileSystem fs; | 
| 72 | 	string csv_path = TestCreatePath("csv_files" ); | 
| 73 | 	if (fs.DirectoryExists(csv_path)) { | 
| 74 | 		fs.RemoveDirectory(csv_path); | 
| 75 | 	} | 
| 76 | 	fs.CreateDirectory(csv_path); | 
| 77 | 	return csv_path; | 
| 78 | } | 
| 79 |  | 
| 80 | void WriteCSV(string path, const char *csv) { | 
| 81 | 	ofstream csv_writer(path); | 
| 82 | 	csv_writer << csv; | 
| 83 | 	csv_writer.close(); | 
| 84 | } | 
| 85 |  | 
| 86 | void WriteBinary(string path, const uint8_t *data, uint64_t length) { | 
| 87 | 	ofstream binary_writer(path, ios::binary); | 
| 88 | 	binary_writer.write((const char *)data, length); | 
| 89 | 	binary_writer.close(); | 
| 90 | } | 
| 91 |  | 
| 92 | bool CHECK_COLUMN(QueryResult &result_, size_t column_number, vector<duckdb::Value> values) { | 
| 93 | 	unique_ptr<MaterializedQueryResult> materialized; | 
| 94 | 	if (result_.type == QueryResultType::STREAM_RESULT) { | 
| 95 | 		materialized = ((StreamQueryResult &)result_).Materialize(); | 
| 96 | 	} | 
| 97 | 	auto &result = materialized ? *materialized : (MaterializedQueryResult &)result_; | 
| 98 | 	if (!result.success) { | 
| 99 | 		fprintf(stderr, "Query failed with message: %s\n" , result.error.c_str()); | 
| 100 | 		return false; | 
| 101 | 	} | 
| 102 | 	if (!(result.names.size() == result.types.size())) { | 
| 103 | 		// column names do not match | 
| 104 | 		result.Print(); | 
| 105 | 		return false; | 
| 106 | 	} | 
| 107 | 	if (values.size() == 0) { | 
| 108 | 		if (result.collection.count != 0) { | 
| 109 | 			result.Print(); | 
| 110 | 			return false; | 
| 111 | 		} else { | 
| 112 | 			return true; | 
| 113 | 		} | 
| 114 | 	} | 
| 115 | 	if (result.collection.count == 0) { | 
| 116 | 		result.Print(); | 
| 117 | 		return false; | 
| 118 | 	} | 
| 119 | 	if (column_number >= result.types.size()) { | 
| 120 | 		result.Print(); | 
| 121 | 		return false; | 
| 122 | 	} | 
| 123 | 	size_t chunk_index = 0; | 
| 124 | 	for (size_t i = 0; i < values.size();) { | 
| 125 | 		if (chunk_index >= result.collection.chunks.size()) { | 
| 126 | 			// ran out of chunks | 
| 127 | 			result.Print(); | 
| 128 | 			return false; | 
| 129 | 		} | 
| 130 | 		// check this vector | 
| 131 | 		auto &chunk = *result.collection.chunks[chunk_index]; | 
| 132 | 		auto &vector = chunk.data[column_number]; | 
| 133 | 		if (i + chunk.size() > values.size()) { | 
| 134 | 			// too many values in this vector | 
| 135 | 			result.Print(); | 
| 136 | 			return false; | 
| 137 | 		} | 
| 138 | 		for (size_t j = 0; j < chunk.size(); j++) { | 
| 139 | 			// NULL <> NULL, hence special handling | 
| 140 | 			if (vector.GetValue(j).is_null && values[i + j].is_null) { | 
| 141 | 				continue; | 
| 142 | 			} | 
| 143 |  | 
| 144 | 			if (!Value::ValuesAreEqual(vector.GetValue(j), values[i + j])) { | 
| 145 | 				// FAIL("Incorrect result! Got " + vector.GetValue(j).ToString() | 
| 146 | 				// + | 
| 147 | 				//      " but expected " + values[i + j].ToString()); | 
| 148 | 				result.Print(); | 
| 149 | 				return false; | 
| 150 | 			} | 
| 151 | 		} | 
| 152 | 		chunk_index++; | 
| 153 | 		i += chunk.size(); | 
| 154 | 	} | 
| 155 | 	return true; | 
| 156 | } | 
| 157 |  | 
| 158 | bool CHECK_COLUMN(unique_ptr<duckdb::QueryResult> &result, size_t column_number, vector<duckdb::Value> values) { | 
| 159 | 	return CHECK_COLUMN(*result, column_number, values); | 
| 160 | } | 
| 161 |  | 
| 162 | bool CHECK_COLUMN(unique_ptr<duckdb::MaterializedQueryResult> &result, size_t column_number, | 
| 163 |                   vector<duckdb::Value> values) { | 
| 164 | 	return CHECK_COLUMN((QueryResult &)*result, column_number, values); | 
| 165 | } | 
| 166 |  | 
| 167 | string compare_csv(duckdb::QueryResult &result, string csv, bool ) { | 
| 168 | 	assert(result.type == QueryResultType::MATERIALIZED_RESULT); | 
| 169 | 	auto &materialized = (MaterializedQueryResult &)result; | 
| 170 | 	if (!materialized.success) { | 
| 171 | 		fprintf(stderr, "Query failed with message: %s\n" , materialized.error.c_str()); | 
| 172 | 		return materialized.error; | 
| 173 | 	} | 
| 174 | 	string error; | 
| 175 | 	if (!compare_result(csv, materialized.collection, materialized.sql_types, header, error)) { | 
| 176 | 		return error; | 
| 177 | 	} | 
| 178 | 	return "" ; | 
| 179 | } | 
| 180 |  | 
| 181 | string show_diff(DataChunk &left, DataChunk &right) { | 
| 182 | 	if (left.column_count() != right.column_count()) { | 
| 183 | 		return StringUtil::Format("Different column counts: %d vs %d" , (int)left.column_count(), | 
| 184 | 		                          (int)right.column_count()); | 
| 185 | 	} | 
| 186 | 	if (left.size() != right.size()) { | 
| 187 | 		return StringUtil::Format("Different sizes: %zu vs %zu" , left.size(), right.size()); | 
| 188 | 	} | 
| 189 | 	string difference; | 
| 190 | 	for (size_t i = 0; i < left.column_count(); i++) { | 
| 191 | 		bool has_differences = false; | 
| 192 | 		auto &left_vector = left.data[i]; | 
| 193 | 		auto &right_vector = right.data[i]; | 
| 194 | 		string left_column = StringUtil::Format("Result\n------\n%s [" , TypeIdToString(left_vector.type).c_str()); | 
| 195 | 		string right_column = StringUtil::Format("Expect\n------\n%s [" , TypeIdToString(right_vector.type).c_str()); | 
| 196 | 		if (left_vector.type == right_vector.type) { | 
| 197 | 			for (size_t j = 0; j < left.size(); j++) { | 
| 198 | 				auto left_value = left_vector.GetValue(j); | 
| 199 | 				auto right_value = right_vector.GetValue(j); | 
| 200 | 				if (!Value::ValuesAreEqual(left_value, right_value)) { | 
| 201 | 					left_column += left_value.ToString() + "," ; | 
| 202 | 					right_column += right_value.ToString() + "," ; | 
| 203 | 					has_differences = true; | 
| 204 | 				} else { | 
| 205 | 					left_column += "_," ; | 
| 206 | 					right_column += "_," ; | 
| 207 | 				} | 
| 208 | 			} | 
| 209 | 		} else { | 
| 210 | 			left_column += "..." ; | 
| 211 | 			right_column += "..." ; | 
| 212 | 		} | 
| 213 | 		left_column += "]\n" ; | 
| 214 | 		right_column += "]\n" ; | 
| 215 | 		if (has_differences) { | 
| 216 | 			difference += StringUtil::Format("Difference in column %d:\n" , i); | 
| 217 | 			difference += left_column + "\n"  + right_column + "\n" ; | 
| 218 | 		} | 
| 219 | 	} | 
| 220 | 	return difference; | 
| 221 | } | 
| 222 |  | 
| 223 | bool compare_chunk(DataChunk &left, DataChunk &right) { | 
| 224 | 	if (left.column_count() != right.column_count()) { | 
| 225 | 		return false; | 
| 226 | 	} | 
| 227 | 	if (left.size() != right.size()) { | 
| 228 | 		return false; | 
| 229 | 	} | 
| 230 | 	for (size_t i = 0; i < left.column_count(); i++) { | 
| 231 | 		auto &left_vector = left.data[i]; | 
| 232 | 		auto &right_vector = right.data[i]; | 
| 233 | 		if (left_vector.type == right_vector.type) { | 
| 234 | 			for (size_t j = 0; j < left.size(); j++) { | 
| 235 | 				auto left_value = left_vector.GetValue(j); | 
| 236 | 				auto right_value = right_vector.GetValue(j); | 
| 237 | 				if (!Value::ValuesAreEqual(left_value, right_value)) { | 
| 238 | 					return false; | 
| 239 | 				} | 
| 240 | 			} | 
| 241 | 		} | 
| 242 | 	} | 
| 243 | 	return true; | 
| 244 | } | 
| 245 |  | 
| 246 | //! Compares the result of a pipe-delimited CSV with the given DataChunk | 
| 247 | //! Returns true if they are equal, and stores an error_message otherwise | 
| 248 | bool compare_result(string csv, ChunkCollection &collection, vector<SQLType> sql_types, bool , | 
| 249 |                     string &error_message) { | 
| 250 | 	assert(collection.count == 0 || collection.types.size() == sql_types.size()); | 
| 251 |  | 
| 252 | 	// set up the CSV reader | 
| 253 | 	CopyInfo info; | 
| 254 | 	info.delimiter = "|" ; | 
| 255 | 	info.header = true; | 
| 256 | 	info.quote = "\"" ; | 
| 257 | 	info.escape = "\"" ; | 
| 258 | 	// set up the intermediate result chunk | 
| 259 | 	vector<TypeId> internal_types; | 
| 260 | 	for (auto &type : sql_types) { | 
| 261 | 		internal_types.push_back(GetInternalType(type)); | 
| 262 | 	} | 
| 263 | 	DataChunk parsed_result; | 
| 264 | 	parsed_result.Initialize(internal_types); | 
| 265 |  | 
| 266 | 	// convert the CSV string into a stringstream | 
| 267 | 	auto source = make_unique<istringstream>(csv); | 
| 268 |  | 
| 269 | 	BufferedCSVReader reader(info, sql_types, move(source)); | 
| 270 | 	idx_t collection_index = 0; | 
| 271 | 	idx_t tuple_count = 0; | 
| 272 | 	while (true) { | 
| 273 | 		// parse a chunk from the CSV file | 
| 274 | 		try { | 
| 275 | 			parsed_result.Reset(); | 
| 276 | 			reader.ParseCSV(parsed_result); | 
| 277 | 		} catch (Exception &ex) { | 
| 278 | 			error_message = "Could not parse CSV: "  + string(ex.what()); | 
| 279 | 			return false; | 
| 280 | 		} | 
| 281 | 		if (parsed_result.size() == 0) { | 
| 282 | 			// out of tuples in CSV file | 
| 283 | 			if (collection_index < collection.chunks.size()) { | 
| 284 | 				error_message = StringUtil::Format("Too many tuples in result! Found %llu tuples, but expected %llu" , | 
| 285 | 				                                   collection.count, tuple_count); | 
| 286 | 				return false; | 
| 287 | 			} | 
| 288 | 			return true; | 
| 289 | 		} | 
| 290 | 		if (collection_index >= collection.chunks.size()) { | 
| 291 | 			// ran out of chunks in the collection, but there are still tuples in the result | 
| 292 | 			// keep parsing the csv file to get the total expected count | 
| 293 | 			while (parsed_result.size() > 0) { | 
| 294 | 				tuple_count += parsed_result.size(); | 
| 295 | 				parsed_result.Reset(); | 
| 296 | 				reader.ParseCSV(parsed_result); | 
| 297 | 			} | 
| 298 | 			error_message = StringUtil::Format("Too few tuples in result! Found %llu tuples, but expected %llu" , | 
| 299 | 			                                   collection.count, tuple_count); | 
| 300 | 			return false; | 
| 301 | 		} | 
| 302 | 		// same counts, compare tuples in chunks | 
| 303 | 		if (!compare_chunk(*collection.chunks[collection_index], parsed_result)) { | 
| 304 | 			error_message = show_diff(*collection.chunks[collection_index], parsed_result); | 
| 305 | 		} | 
| 306 |  | 
| 307 | 		collection_index++; | 
| 308 | 		tuple_count += parsed_result.size(); | 
| 309 | 	} | 
| 310 | } | 
| 311 | } // namespace duckdb | 
| 312 |  |