1// #define CATCH_CONFIG_RUNNER
2#include "catch.hpp"
3
4#include "duckdb/execution/operator/persistent/buffered_csv_reader.hpp"
5#include "duckdb/common/file_system.hpp"
6#include "duckdb/common/value_operations/value_operations.hpp"
7#include "compare_result.hpp"
8#include "duckdb/main/query_result.hpp"
9#include "test_helpers.hpp"
10#include "duckdb/parser/parsed_data/copy_info.hpp"
11
12#include <cmath>
13#include <fstream>
14
15using namespace std;
16
17#define TESTING_DIRECTORY_NAME "duckdb_unittest_tempdir"
18
19namespace duckdb {
20
21bool NO_FAIL(QueryResult &result) {
22 if (!result.success) {
23 fprintf(stderr, "Query failed with message: %s\n", result.error.c_str());
24 }
25 return result.success;
26}
27
28bool NO_FAIL(unique_ptr<QueryResult> result) {
29 return NO_FAIL(*result);
30}
31
32void TestDeleteDirectory(string path) {
33 FileSystem fs;
34 if (fs.DirectoryExists(path)) {
35 fs.RemoveDirectory(path);
36 }
37}
38
39void TestDeleteFile(string path) {
40 FileSystem fs;
41 if (fs.FileExists(path)) {
42 fs.RemoveFile(path);
43 }
44}
45
46void DeleteDatabase(string path) {
47 TestDeleteFile(path);
48 TestDeleteFile(path + ".wal");
49}
50
51void TestCreateDirectory(string path) {
52 FileSystem fs;
53 fs.CreateDirectory(path);
54}
55
56string TestCreatePath(string suffix) {
57 FileSystem fs;
58 if (!fs.DirectoryExists(TESTING_DIRECTORY_NAME)) {
59 fs.CreateDirectory(TESTING_DIRECTORY_NAME);
60 }
61 return fs.JoinPath(TESTING_DIRECTORY_NAME, suffix);
62}
63
64unique_ptr<DBConfig> GetTestConfig() {
65 auto result = make_unique<DBConfig>();
66 result->checkpoint_wal_size = 0;
67 return result;
68}
69
70string GetCSVPath() {
71 FileSystem fs;
72 string csv_path = TestCreatePath("csv_files");
73 if (fs.DirectoryExists(csv_path)) {
74 fs.RemoveDirectory(csv_path);
75 }
76 fs.CreateDirectory(csv_path);
77 return csv_path;
78}
79
80void WriteCSV(string path, const char *csv) {
81 ofstream csv_writer(path);
82 csv_writer << csv;
83 csv_writer.close();
84}
85
86void WriteBinary(string path, const uint8_t *data, uint64_t length) {
87 ofstream binary_writer(path, ios::binary);
88 binary_writer.write((const char *)data, length);
89 binary_writer.close();
90}
91
92bool CHECK_COLUMN(QueryResult &result_, size_t column_number, vector<duckdb::Value> values) {
93 unique_ptr<MaterializedQueryResult> materialized;
94 if (result_.type == QueryResultType::STREAM_RESULT) {
95 materialized = ((StreamQueryResult &)result_).Materialize();
96 }
97 auto &result = materialized ? *materialized : (MaterializedQueryResult &)result_;
98 if (!result.success) {
99 fprintf(stderr, "Query failed with message: %s\n", result.error.c_str());
100 return false;
101 }
102 if (!(result.names.size() == result.types.size())) {
103 // column names do not match
104 result.Print();
105 return false;
106 }
107 if (values.size() == 0) {
108 if (result.collection.count != 0) {
109 result.Print();
110 return false;
111 } else {
112 return true;
113 }
114 }
115 if (result.collection.count == 0) {
116 result.Print();
117 return false;
118 }
119 if (column_number >= result.types.size()) {
120 result.Print();
121 return false;
122 }
123 size_t chunk_index = 0;
124 for (size_t i = 0; i < values.size();) {
125 if (chunk_index >= result.collection.chunks.size()) {
126 // ran out of chunks
127 result.Print();
128 return false;
129 }
130 // check this vector
131 auto &chunk = *result.collection.chunks[chunk_index];
132 auto &vector = chunk.data[column_number];
133 if (i + chunk.size() > values.size()) {
134 // too many values in this vector
135 result.Print();
136 return false;
137 }
138 for (size_t j = 0; j < chunk.size(); j++) {
139 // NULL <> NULL, hence special handling
140 if (vector.GetValue(j).is_null && values[i + j].is_null) {
141 continue;
142 }
143
144 if (!Value::ValuesAreEqual(vector.GetValue(j), values[i + j])) {
145 // FAIL("Incorrect result! Got " + vector.GetValue(j).ToString()
146 // +
147 // " but expected " + values[i + j].ToString());
148 result.Print();
149 return false;
150 }
151 }
152 chunk_index++;
153 i += chunk.size();
154 }
155 return true;
156}
157
158bool CHECK_COLUMN(unique_ptr<duckdb::QueryResult> &result, size_t column_number, vector<duckdb::Value> values) {
159 return CHECK_COLUMN(*result, column_number, values);
160}
161
162bool CHECK_COLUMN(unique_ptr<duckdb::MaterializedQueryResult> &result, size_t column_number,
163 vector<duckdb::Value> values) {
164 return CHECK_COLUMN((QueryResult &)*result, column_number, values);
165}
166
167string compare_csv(duckdb::QueryResult &result, string csv, bool header) {
168 assert(result.type == QueryResultType::MATERIALIZED_RESULT);
169 auto &materialized = (MaterializedQueryResult &)result;
170 if (!materialized.success) {
171 fprintf(stderr, "Query failed with message: %s\n", materialized.error.c_str());
172 return materialized.error;
173 }
174 string error;
175 if (!compare_result(csv, materialized.collection, materialized.sql_types, header, error)) {
176 return error;
177 }
178 return "";
179}
180
181string show_diff(DataChunk &left, DataChunk &right) {
182 if (left.column_count() != right.column_count()) {
183 return StringUtil::Format("Different column counts: %d vs %d", (int)left.column_count(),
184 (int)right.column_count());
185 }
186 if (left.size() != right.size()) {
187 return StringUtil::Format("Different sizes: %zu vs %zu", left.size(), right.size());
188 }
189 string difference;
190 for (size_t i = 0; i < left.column_count(); i++) {
191 bool has_differences = false;
192 auto &left_vector = left.data[i];
193 auto &right_vector = right.data[i];
194 string left_column = StringUtil::Format("Result\n------\n%s [", TypeIdToString(left_vector.type).c_str());
195 string right_column = StringUtil::Format("Expect\n------\n%s [", TypeIdToString(right_vector.type).c_str());
196 if (left_vector.type == right_vector.type) {
197 for (size_t j = 0; j < left.size(); j++) {
198 auto left_value = left_vector.GetValue(j);
199 auto right_value = right_vector.GetValue(j);
200 if (!Value::ValuesAreEqual(left_value, right_value)) {
201 left_column += left_value.ToString() + ",";
202 right_column += right_value.ToString() + ",";
203 has_differences = true;
204 } else {
205 left_column += "_,";
206 right_column += "_,";
207 }
208 }
209 } else {
210 left_column += "...";
211 right_column += "...";
212 }
213 left_column += "]\n";
214 right_column += "]\n";
215 if (has_differences) {
216 difference += StringUtil::Format("Difference in column %d:\n", i);
217 difference += left_column + "\n" + right_column + "\n";
218 }
219 }
220 return difference;
221}
222
223bool compare_chunk(DataChunk &left, DataChunk &right) {
224 if (left.column_count() != right.column_count()) {
225 return false;
226 }
227 if (left.size() != right.size()) {
228 return false;
229 }
230 for (size_t i = 0; i < left.column_count(); i++) {
231 auto &left_vector = left.data[i];
232 auto &right_vector = right.data[i];
233 if (left_vector.type == right_vector.type) {
234 for (size_t j = 0; j < left.size(); j++) {
235 auto left_value = left_vector.GetValue(j);
236 auto right_value = right_vector.GetValue(j);
237 if (!Value::ValuesAreEqual(left_value, right_value)) {
238 return false;
239 }
240 }
241 }
242 }
243 return true;
244}
245
246//! Compares the result of a pipe-delimited CSV with the given DataChunk
247//! Returns true if they are equal, and stores an error_message otherwise
248bool compare_result(string csv, ChunkCollection &collection, vector<SQLType> sql_types, bool has_header,
249 string &error_message) {
250 assert(collection.count == 0 || collection.types.size() == sql_types.size());
251
252 // set up the CSV reader
253 CopyInfo info;
254 info.delimiter = "|";
255 info.header = true;
256 info.quote = "\"";
257 info.escape = "\"";
258 // set up the intermediate result chunk
259 vector<TypeId> internal_types;
260 for (auto &type : sql_types) {
261 internal_types.push_back(GetInternalType(type));
262 }
263 DataChunk parsed_result;
264 parsed_result.Initialize(internal_types);
265
266 // convert the CSV string into a stringstream
267 auto source = make_unique<istringstream>(csv);
268
269 BufferedCSVReader reader(info, sql_types, move(source));
270 idx_t collection_index = 0;
271 idx_t tuple_count = 0;
272 while (true) {
273 // parse a chunk from the CSV file
274 try {
275 parsed_result.Reset();
276 reader.ParseCSV(parsed_result);
277 } catch (Exception &ex) {
278 error_message = "Could not parse CSV: " + string(ex.what());
279 return false;
280 }
281 if (parsed_result.size() == 0) {
282 // out of tuples in CSV file
283 if (collection_index < collection.chunks.size()) {
284 error_message = StringUtil::Format("Too many tuples in result! Found %llu tuples, but expected %llu",
285 collection.count, tuple_count);
286 return false;
287 }
288 return true;
289 }
290 if (collection_index >= collection.chunks.size()) {
291 // ran out of chunks in the collection, but there are still tuples in the result
292 // keep parsing the csv file to get the total expected count
293 while (parsed_result.size() > 0) {
294 tuple_count += parsed_result.size();
295 parsed_result.Reset();
296 reader.ParseCSV(parsed_result);
297 }
298 error_message = StringUtil::Format("Too few tuples in result! Found %llu tuples, but expected %llu",
299 collection.count, tuple_count);
300 return false;
301 }
302 // same counts, compare tuples in chunks
303 if (!compare_chunk(*collection.chunks[collection_index], parsed_result)) {
304 error_message = show_diff(*collection.chunks[collection_index], parsed_result);
305 }
306
307 collection_index++;
308 tuple_count += parsed_result.size();
309 }
310}
311} // namespace duckdb
312