1 | // #define CATCH_CONFIG_RUNNER |
2 | #include "catch.hpp" |
3 | |
4 | #include "duckdb/execution/operator/persistent/buffered_csv_reader.hpp" |
5 | #include "duckdb/common/file_system.hpp" |
6 | #include "duckdb/common/value_operations/value_operations.hpp" |
7 | #include "compare_result.hpp" |
8 | #include "duckdb/main/query_result.hpp" |
9 | #include "test_helpers.hpp" |
10 | #include "duckdb/parser/parsed_data/copy_info.hpp" |
11 | |
12 | #include <cmath> |
13 | #include <fstream> |
14 | |
15 | using namespace std; |
16 | |
17 | #define TESTING_DIRECTORY_NAME "duckdb_unittest_tempdir" |
18 | |
19 | namespace duckdb { |
20 | |
21 | bool NO_FAIL(QueryResult &result) { |
22 | if (!result.success) { |
23 | fprintf(stderr, "Query failed with message: %s\n" , result.error.c_str()); |
24 | } |
25 | return result.success; |
26 | } |
27 | |
28 | bool NO_FAIL(unique_ptr<QueryResult> result) { |
29 | return NO_FAIL(*result); |
30 | } |
31 | |
32 | void TestDeleteDirectory(string path) { |
33 | FileSystem fs; |
34 | if (fs.DirectoryExists(path)) { |
35 | fs.RemoveDirectory(path); |
36 | } |
37 | } |
38 | |
39 | void TestDeleteFile(string path) { |
40 | FileSystem fs; |
41 | if (fs.FileExists(path)) { |
42 | fs.RemoveFile(path); |
43 | } |
44 | } |
45 | |
46 | void DeleteDatabase(string path) { |
47 | TestDeleteFile(path); |
48 | TestDeleteFile(path + ".wal" ); |
49 | } |
50 | |
51 | void TestCreateDirectory(string path) { |
52 | FileSystem fs; |
53 | fs.CreateDirectory(path); |
54 | } |
55 | |
56 | string TestCreatePath(string suffix) { |
57 | FileSystem fs; |
58 | if (!fs.DirectoryExists(TESTING_DIRECTORY_NAME)) { |
59 | fs.CreateDirectory(TESTING_DIRECTORY_NAME); |
60 | } |
61 | return fs.JoinPath(TESTING_DIRECTORY_NAME, suffix); |
62 | } |
63 | |
64 | unique_ptr<DBConfig> GetTestConfig() { |
65 | auto result = make_unique<DBConfig>(); |
66 | result->checkpoint_wal_size = 0; |
67 | return result; |
68 | } |
69 | |
70 | string GetCSVPath() { |
71 | FileSystem fs; |
72 | string csv_path = TestCreatePath("csv_files" ); |
73 | if (fs.DirectoryExists(csv_path)) { |
74 | fs.RemoveDirectory(csv_path); |
75 | } |
76 | fs.CreateDirectory(csv_path); |
77 | return csv_path; |
78 | } |
79 | |
80 | void WriteCSV(string path, const char *csv) { |
81 | ofstream csv_writer(path); |
82 | csv_writer << csv; |
83 | csv_writer.close(); |
84 | } |
85 | |
86 | void WriteBinary(string path, const uint8_t *data, uint64_t length) { |
87 | ofstream binary_writer(path, ios::binary); |
88 | binary_writer.write((const char *)data, length); |
89 | binary_writer.close(); |
90 | } |
91 | |
92 | bool CHECK_COLUMN(QueryResult &result_, size_t column_number, vector<duckdb::Value> values) { |
93 | unique_ptr<MaterializedQueryResult> materialized; |
94 | if (result_.type == QueryResultType::STREAM_RESULT) { |
95 | materialized = ((StreamQueryResult &)result_).Materialize(); |
96 | } |
97 | auto &result = materialized ? *materialized : (MaterializedQueryResult &)result_; |
98 | if (!result.success) { |
99 | fprintf(stderr, "Query failed with message: %s\n" , result.error.c_str()); |
100 | return false; |
101 | } |
102 | if (!(result.names.size() == result.types.size())) { |
103 | // column names do not match |
104 | result.Print(); |
105 | return false; |
106 | } |
107 | if (values.size() == 0) { |
108 | if (result.collection.count != 0) { |
109 | result.Print(); |
110 | return false; |
111 | } else { |
112 | return true; |
113 | } |
114 | } |
115 | if (result.collection.count == 0) { |
116 | result.Print(); |
117 | return false; |
118 | } |
119 | if (column_number >= result.types.size()) { |
120 | result.Print(); |
121 | return false; |
122 | } |
123 | size_t chunk_index = 0; |
124 | for (size_t i = 0; i < values.size();) { |
125 | if (chunk_index >= result.collection.chunks.size()) { |
126 | // ran out of chunks |
127 | result.Print(); |
128 | return false; |
129 | } |
130 | // check this vector |
131 | auto &chunk = *result.collection.chunks[chunk_index]; |
132 | auto &vector = chunk.data[column_number]; |
133 | if (i + chunk.size() > values.size()) { |
134 | // too many values in this vector |
135 | result.Print(); |
136 | return false; |
137 | } |
138 | for (size_t j = 0; j < chunk.size(); j++) { |
139 | // NULL <> NULL, hence special handling |
140 | if (vector.GetValue(j).is_null && values[i + j].is_null) { |
141 | continue; |
142 | } |
143 | |
144 | if (!Value::ValuesAreEqual(vector.GetValue(j), values[i + j])) { |
145 | // FAIL("Incorrect result! Got " + vector.GetValue(j).ToString() |
146 | // + |
147 | // " but expected " + values[i + j].ToString()); |
148 | result.Print(); |
149 | return false; |
150 | } |
151 | } |
152 | chunk_index++; |
153 | i += chunk.size(); |
154 | } |
155 | return true; |
156 | } |
157 | |
158 | bool CHECK_COLUMN(unique_ptr<duckdb::QueryResult> &result, size_t column_number, vector<duckdb::Value> values) { |
159 | return CHECK_COLUMN(*result, column_number, values); |
160 | } |
161 | |
162 | bool CHECK_COLUMN(unique_ptr<duckdb::MaterializedQueryResult> &result, size_t column_number, |
163 | vector<duckdb::Value> values) { |
164 | return CHECK_COLUMN((QueryResult &)*result, column_number, values); |
165 | } |
166 | |
167 | string compare_csv(duckdb::QueryResult &result, string csv, bool ) { |
168 | assert(result.type == QueryResultType::MATERIALIZED_RESULT); |
169 | auto &materialized = (MaterializedQueryResult &)result; |
170 | if (!materialized.success) { |
171 | fprintf(stderr, "Query failed with message: %s\n" , materialized.error.c_str()); |
172 | return materialized.error; |
173 | } |
174 | string error; |
175 | if (!compare_result(csv, materialized.collection, materialized.sql_types, header, error)) { |
176 | return error; |
177 | } |
178 | return "" ; |
179 | } |
180 | |
181 | string show_diff(DataChunk &left, DataChunk &right) { |
182 | if (left.column_count() != right.column_count()) { |
183 | return StringUtil::Format("Different column counts: %d vs %d" , (int)left.column_count(), |
184 | (int)right.column_count()); |
185 | } |
186 | if (left.size() != right.size()) { |
187 | return StringUtil::Format("Different sizes: %zu vs %zu" , left.size(), right.size()); |
188 | } |
189 | string difference; |
190 | for (size_t i = 0; i < left.column_count(); i++) { |
191 | bool has_differences = false; |
192 | auto &left_vector = left.data[i]; |
193 | auto &right_vector = right.data[i]; |
194 | string left_column = StringUtil::Format("Result\n------\n%s [" , TypeIdToString(left_vector.type).c_str()); |
195 | string right_column = StringUtil::Format("Expect\n------\n%s [" , TypeIdToString(right_vector.type).c_str()); |
196 | if (left_vector.type == right_vector.type) { |
197 | for (size_t j = 0; j < left.size(); j++) { |
198 | auto left_value = left_vector.GetValue(j); |
199 | auto right_value = right_vector.GetValue(j); |
200 | if (!Value::ValuesAreEqual(left_value, right_value)) { |
201 | left_column += left_value.ToString() + "," ; |
202 | right_column += right_value.ToString() + "," ; |
203 | has_differences = true; |
204 | } else { |
205 | left_column += "_," ; |
206 | right_column += "_," ; |
207 | } |
208 | } |
209 | } else { |
210 | left_column += "..." ; |
211 | right_column += "..." ; |
212 | } |
213 | left_column += "]\n" ; |
214 | right_column += "]\n" ; |
215 | if (has_differences) { |
216 | difference += StringUtil::Format("Difference in column %d:\n" , i); |
217 | difference += left_column + "\n" + right_column + "\n" ; |
218 | } |
219 | } |
220 | return difference; |
221 | } |
222 | |
223 | bool compare_chunk(DataChunk &left, DataChunk &right) { |
224 | if (left.column_count() != right.column_count()) { |
225 | return false; |
226 | } |
227 | if (left.size() != right.size()) { |
228 | return false; |
229 | } |
230 | for (size_t i = 0; i < left.column_count(); i++) { |
231 | auto &left_vector = left.data[i]; |
232 | auto &right_vector = right.data[i]; |
233 | if (left_vector.type == right_vector.type) { |
234 | for (size_t j = 0; j < left.size(); j++) { |
235 | auto left_value = left_vector.GetValue(j); |
236 | auto right_value = right_vector.GetValue(j); |
237 | if (!Value::ValuesAreEqual(left_value, right_value)) { |
238 | return false; |
239 | } |
240 | } |
241 | } |
242 | } |
243 | return true; |
244 | } |
245 | |
246 | //! Compares the result of a pipe-delimited CSV with the given DataChunk |
247 | //! Returns true if they are equal, and stores an error_message otherwise |
248 | bool compare_result(string csv, ChunkCollection &collection, vector<SQLType> sql_types, bool , |
249 | string &error_message) { |
250 | assert(collection.count == 0 || collection.types.size() == sql_types.size()); |
251 | |
252 | // set up the CSV reader |
253 | CopyInfo info; |
254 | info.delimiter = "|" ; |
255 | info.header = true; |
256 | info.quote = "\"" ; |
257 | info.escape = "\"" ; |
258 | // set up the intermediate result chunk |
259 | vector<TypeId> internal_types; |
260 | for (auto &type : sql_types) { |
261 | internal_types.push_back(GetInternalType(type)); |
262 | } |
263 | DataChunk parsed_result; |
264 | parsed_result.Initialize(internal_types); |
265 | |
266 | // convert the CSV string into a stringstream |
267 | auto source = make_unique<istringstream>(csv); |
268 | |
269 | BufferedCSVReader reader(info, sql_types, move(source)); |
270 | idx_t collection_index = 0; |
271 | idx_t tuple_count = 0; |
272 | while (true) { |
273 | // parse a chunk from the CSV file |
274 | try { |
275 | parsed_result.Reset(); |
276 | reader.ParseCSV(parsed_result); |
277 | } catch (Exception &ex) { |
278 | error_message = "Could not parse CSV: " + string(ex.what()); |
279 | return false; |
280 | } |
281 | if (parsed_result.size() == 0) { |
282 | // out of tuples in CSV file |
283 | if (collection_index < collection.chunks.size()) { |
284 | error_message = StringUtil::Format("Too many tuples in result! Found %llu tuples, but expected %llu" , |
285 | collection.count, tuple_count); |
286 | return false; |
287 | } |
288 | return true; |
289 | } |
290 | if (collection_index >= collection.chunks.size()) { |
291 | // ran out of chunks in the collection, but there are still tuples in the result |
292 | // keep parsing the csv file to get the total expected count |
293 | while (parsed_result.size() > 0) { |
294 | tuple_count += parsed_result.size(); |
295 | parsed_result.Reset(); |
296 | reader.ParseCSV(parsed_result); |
297 | } |
298 | error_message = StringUtil::Format("Too few tuples in result! Found %llu tuples, but expected %llu" , |
299 | collection.count, tuple_count); |
300 | return false; |
301 | } |
302 | // same counts, compare tuples in chunks |
303 | if (!compare_chunk(*collection.chunks[collection_index], parsed_result)) { |
304 | error_message = show_diff(*collection.chunks[collection_index], parsed_result); |
305 | } |
306 | |
307 | collection_index++; |
308 | tuple_count += parsed_result.size(); |
309 | } |
310 | } |
311 | } // namespace duckdb |
312 | |