1#include <Common/Exception.h>
2#include <Common/FieldVisitors.h>
3
4#include <Core/Block.h>
5
6#include <IO/WriteBufferFromString.h>
7#include <IO/Operators.h>
8
9#include <Common/typeid_cast.h>
10#include <Common/assert_cast.h>
11
12#include <Columns/ColumnConst.h>
13
14#include <iterator>
15#include <memory>
16
17
18namespace DB
19{
20
21namespace ErrorCodes
22{
23 extern const int POSITION_OUT_OF_BOUND;
24 extern const int NOT_FOUND_COLUMN_IN_BLOCK;
25 extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
26 extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE;
27}
28
29
30Block::Block(std::initializer_list<ColumnWithTypeAndName> il) : data{il}
31{
32 initializeIndexByName();
33}
34
35
36Block::Block(const ColumnsWithTypeAndName & data_) : data{data_}
37{
38 initializeIndexByName();
39}
40
41
42void Block::initializeIndexByName()
43{
44 for (size_t i = 0, size = data.size(); i < size; ++i)
45 index_by_name[data[i].name] = i;
46}
47
48
49void Block::insert(size_t position, const ColumnWithTypeAndName & elem)
50{
51 if (position > data.size())
52 throw Exception("Position out of bound in Block::insert(), max position = "
53 + toString(data.size()), ErrorCodes::POSITION_OUT_OF_BOUND);
54
55 for (auto & name_pos : index_by_name)
56 if (name_pos.second >= position)
57 ++name_pos.second;
58
59 index_by_name.emplace(elem.name, position);
60 data.emplace(data.begin() + position, elem);
61}
62
63void Block::insert(size_t position, ColumnWithTypeAndName && elem)
64{
65 if (position > data.size())
66 throw Exception("Position out of bound in Block::insert(), max position = "
67 + toString(data.size()), ErrorCodes::POSITION_OUT_OF_BOUND);
68
69 for (auto & name_pos : index_by_name)
70 if (name_pos.second >= position)
71 ++name_pos.second;
72
73 index_by_name.emplace(elem.name, position);
74 data.emplace(data.begin() + position, std::move(elem));
75}
76
77
78void Block::insert(const ColumnWithTypeAndName & elem)
79{
80 index_by_name.emplace(elem.name, data.size());
81 data.emplace_back(elem);
82}
83
84void Block::insert(ColumnWithTypeAndName && elem)
85{
86 index_by_name.emplace(elem.name, data.size());
87 data.emplace_back(std::move(elem));
88}
89
90
91void Block::insertUnique(const ColumnWithTypeAndName & elem)
92{
93 if (index_by_name.end() == index_by_name.find(elem.name))
94 insert(elem);
95}
96
97void Block::insertUnique(ColumnWithTypeAndName && elem)
98{
99 if (index_by_name.end() == index_by_name.find(elem.name))
100 insert(std::move(elem));
101}
102
103
104void Block::erase(const std::set<size_t> & positions)
105{
106 for (auto it = positions.rbegin(); it != positions.rend(); ++it)
107 erase(*it);
108}
109
110
111void Block::erase(size_t position)
112{
113 if (data.empty())
114 throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND);
115
116 if (position >= data.size())
117 throw Exception("Position out of bound in Block::erase(), max position = "
118 + toString(data.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND);
119
120 eraseImpl(position);
121}
122
123
124void Block::eraseImpl(size_t position)
125{
126 data.erase(data.begin() + position);
127
128 for (auto it = index_by_name.begin(); it != index_by_name.end();)
129 {
130 if (it->second == position)
131 index_by_name.erase(it++);
132 else
133 {
134 if (it->second > position)
135 --it->second;
136 ++it;
137 }
138 }
139}
140
141
142void Block::erase(const String & name)
143{
144 auto index_it = index_by_name.find(name);
145 if (index_it == index_by_name.end())
146 throw Exception("No such name in Block::erase(): '"
147 + name + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
148
149 eraseImpl(index_it->second);
150}
151
152
153ColumnWithTypeAndName & Block::safeGetByPosition(size_t position)
154{
155 if (data.empty())
156 throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND);
157
158 if (position >= data.size())
159 throw Exception("Position " + toString(position)
160 + " is out of bound in Block::safeGetByPosition(), max position = "
161 + toString(data.size() - 1)
162 + ", there are columns: " + dumpNames(), ErrorCodes::POSITION_OUT_OF_BOUND);
163
164 return data[position];
165}
166
167
168const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const
169{
170 if (data.empty())
171 throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND);
172
173 if (position >= data.size())
174 throw Exception("Position " + toString(position)
175 + " is out of bound in Block::safeGetByPosition(), max position = "
176 + toString(data.size() - 1)
177 + ", there are columns: " + dumpNames(), ErrorCodes::POSITION_OUT_OF_BOUND);
178
179 return data[position];
180}
181
182
183ColumnWithTypeAndName & Block::getByName(const std::string & name)
184{
185 auto it = index_by_name.find(name);
186 if (index_by_name.end() == it)
187 throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
188 , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
189
190 return data[it->second];
191}
192
193
194const ColumnWithTypeAndName & Block::getByName(const std::string & name) const
195{
196 auto it = index_by_name.find(name);
197 if (index_by_name.end() == it)
198 throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
199 , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
200
201 return data[it->second];
202}
203
204
205bool Block::has(const std::string & name) const
206{
207 return index_by_name.end() != index_by_name.find(name);
208}
209
210
211size_t Block::getPositionByName(const std::string & name) const
212{
213 auto it = index_by_name.find(name);
214 if (index_by_name.end() == it)
215 throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
216 , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
217
218 return it->second;
219}
220
221
222void Block::checkNumberOfRows(bool allow_null_columns) const
223{
224 ssize_t rows = -1;
225 for (const auto & elem : data)
226 {
227 if (!elem.column && allow_null_columns)
228 continue;
229
230 if (!elem.column)
231 throw Exception("Column " + elem.name + " in block is nullptr, in method checkNumberOfRows."
232 , ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
233
234 ssize_t size = elem.column->size();
235
236 if (rows == -1)
237 rows = size;
238 else if (rows != size)
239 throw Exception("Sizes of columns doesn't match: "
240 + data.front().name + ": " + toString(rows)
241 + ", " + elem.name + ": " + toString(size)
242 , ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
243 }
244}
245
246
247size_t Block::rows() const
248{
249 for (const auto & elem : data)
250 if (elem.column)
251 return elem.column->size();
252
253 return 0;
254}
255
256
257size_t Block::bytes() const
258{
259 size_t res = 0;
260 for (const auto & elem : data)
261 res += elem.column->byteSize();
262
263 return res;
264}
265
266size_t Block::allocatedBytes() const
267{
268 size_t res = 0;
269 for (const auto & elem : data)
270 res += elem.column->allocatedBytes();
271
272 return res;
273}
274
275std::string Block::dumpNames() const
276{
277 WriteBufferFromOwnString out;
278 for (auto it = data.begin(); it != data.end(); ++it)
279 {
280 if (it != data.begin())
281 out << ", ";
282 out << it->name;
283 }
284 return out.str();
285}
286
287
288std::string Block::dumpStructure() const
289{
290 WriteBufferFromOwnString out;
291 for (auto it = data.begin(); it != data.end(); ++it)
292 {
293 if (it != data.begin())
294 out << ", ";
295 it->dumpStructure(out);
296 }
297 return out.str();
298}
299
300
301Block Block::cloneEmpty() const
302{
303 Block res;
304
305 for (const auto & elem : data)
306 res.insert(elem.cloneEmpty());
307
308 return res;
309}
310
311
312MutableColumns Block::cloneEmptyColumns() const
313{
314 size_t num_columns = data.size();
315 MutableColumns columns(num_columns);
316 for (size_t i = 0; i < num_columns; ++i)
317 columns[i] = data[i].column ? data[i].column->cloneEmpty() : data[i].type->createColumn();
318 return columns;
319}
320
321
322Columns Block::getColumns() const
323{
324 size_t num_columns = data.size();
325 Columns columns(num_columns);
326 for (size_t i = 0; i < num_columns; ++i)
327 columns[i] = data[i].column;
328 return columns;
329}
330
331
332MutableColumns Block::mutateColumns()
333{
334 size_t num_columns = data.size();
335 MutableColumns columns(num_columns);
336 for (size_t i = 0; i < num_columns; ++i)
337 columns[i] = data[i].column ? (*std::move(data[i].column)).mutate() : data[i].type->createColumn();
338 return columns;
339}
340
341
342void Block::setColumns(MutableColumns && columns)
343{
344 /// TODO: assert if |columns| doesn't match |data|!
345 size_t num_columns = data.size();
346 for (size_t i = 0; i < num_columns; ++i)
347 data[i].column = std::move(columns[i]);
348}
349
350
351void Block::setColumns(const Columns & columns)
352{
353 /// TODO: assert if |columns| doesn't match |data|!
354 size_t num_columns = data.size();
355 for (size_t i = 0; i < num_columns; ++i)
356 data[i].column = columns[i];
357}
358
359
360
361Block Block::cloneWithColumns(MutableColumns && columns) const
362{
363 Block res;
364
365 size_t num_columns = data.size();
366 for (size_t i = 0; i < num_columns; ++i)
367 res.insert({ std::move(columns[i]), data[i].type, data[i].name });
368
369 return res;
370}
371
372
373Block Block::cloneWithColumns(const Columns & columns) const
374{
375 Block res;
376
377 size_t num_columns = data.size();
378
379 if (num_columns != columns.size())
380 throw Exception("Cannot clone block with columns because block has " + toString(num_columns) + " columns, "
381 "but " + toString(columns.size()) + " columns given.", ErrorCodes::LOGICAL_ERROR);
382
383 for (size_t i = 0; i < num_columns; ++i)
384 res.insert({ columns[i], data[i].type, data[i].name });
385
386 return res;
387}
388
389
390Block Block::cloneWithoutColumns() const
391{
392 Block res;
393
394 size_t num_columns = data.size();
395 for (size_t i = 0; i < num_columns; ++i)
396 res.insert({ nullptr, data[i].type, data[i].name });
397
398 return res;
399}
400
401
402Block Block::sortColumns() const
403{
404 Block sorted_block;
405
406 for (const auto & name : index_by_name)
407 sorted_block.insert(data[name.second]);
408
409 return sorted_block;
410}
411
412
413const ColumnsWithTypeAndName & Block::getColumnsWithTypeAndName() const
414{
415 return data;
416}
417
418
419NamesAndTypesList Block::getNamesAndTypesList() const
420{
421 NamesAndTypesList res;
422
423 for (const auto & elem : data)
424 res.emplace_back(elem.name, elem.type);
425
426 return res;
427}
428
429
430Names Block::getNames() const
431{
432 Names res;
433 res.reserve(columns());
434
435 for (const auto & elem : data)
436 res.push_back(elem.name);
437
438 return res;
439}
440
441
442DataTypes Block::getDataTypes() const
443{
444 DataTypes res;
445 res.reserve(columns());
446
447 for (const auto & elem : data)
448 res.push_back(elem.type);
449
450 return res;
451}
452
453
454template <typename ReturnType>
455static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, const std::string & context_description)
456{
457 auto on_error = [](const std::string & message [[maybe_unused]], int code [[maybe_unused]])
458 {
459 if constexpr (std::is_same_v<ReturnType, void>)
460 throw Exception(message, code);
461 else
462 return false;
463 };
464
465 size_t columns = rhs.columns();
466 if (lhs.columns() != columns)
467 return on_error("Block structure mismatch in " + context_description + " stream: different number of columns:\n"
468 + lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE);
469
470 for (size_t i = 0; i < columns; ++i)
471 {
472 const auto & expected = rhs.getByPosition(i);
473 const auto & actual = lhs.getByPosition(i);
474
475 if (actual.name != expected.name)
476 return on_error("Block structure mismatch in " + context_description + " stream: different names of columns:\n"
477 + lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE);
478
479 if (!actual.type->equals(*expected.type))
480 return on_error("Block structure mismatch in " + context_description + " stream: different types:\n"
481 + lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE);
482
483 if (!actual.column || !expected.column)
484 continue;
485
486 if (actual.column->getName() != expected.column->getName())
487 return on_error("Block structure mismatch in " + context_description + " stream: different columns:\n"
488 + lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE);
489
490 if (isColumnConst(*actual.column) && isColumnConst(*expected.column))
491 {
492 Field actual_value = assert_cast<const ColumnConst &>(*actual.column).getField();
493 Field expected_value = assert_cast<const ColumnConst &>(*expected.column).getField();
494
495 if (actual_value != expected_value)
496 return on_error("Block structure mismatch in " + context_description + " stream: different values of constants, actual: "
497 + applyVisitor(FieldVisitorToString(), actual_value) + ", expected: " + applyVisitor(FieldVisitorToString(), expected_value),
498 ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE);
499 }
500 }
501
502 return ReturnType(true);
503}
504
505
506bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs)
507{
508 return checkBlockStructure<bool>(lhs, rhs, {});
509}
510
511
512void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, const std::string & context_description)
513{
514 checkBlockStructure<void>(lhs, rhs, context_description);
515}
516
517
518void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out_lhs_diff, std::string & out_rhs_diff)
519{
520 /// The traditional task: the largest common subsequence (LCS).
521 /// Assume that order is important. If this becomes wrong once, let's simplify it: for example, make 2 sets.
522
523 std::vector<std::vector<int>> lcs(lhs.columns() + 1);
524 for (auto & v : lcs)
525 v.resize(rhs.columns() + 1);
526
527 for (size_t i = 1; i <= lhs.columns(); ++i)
528 {
529 for (size_t j = 1; j <= rhs.columns(); ++j)
530 {
531 if (lhs.safeGetByPosition(i - 1) == rhs.safeGetByPosition(j - 1))
532 lcs[i][j] = lcs[i - 1][j - 1] + 1;
533 else
534 lcs[i][j] = std::max(lcs[i - 1][j], lcs[i][j - 1]);
535 }
536 }
537
538 /// Now go back and collect the answer.
539 ColumnsWithTypeAndName left_columns;
540 ColumnsWithTypeAndName right_columns;
541 size_t l = lhs.columns();
542 size_t r = rhs.columns();
543 while (l > 0 && r > 0)
544 {
545 if (lhs.safeGetByPosition(l - 1) == rhs.safeGetByPosition(r - 1))
546 {
547 /// This element is in both sequences, so it does not get into `diff`.
548 --l;
549 --r;
550 }
551 else
552 {
553 /// Small heuristics: most often used when getting a difference for (expected_block, actual_block).
554 /// Therefore, the preference will be given to the field, which is in the left block (expected_block), therefore
555 /// in `diff` the column from `actual_block` will get.
556 if (lcs[l][r - 1] >= lcs[l - 1][r])
557 right_columns.push_back(rhs.safeGetByPosition(--r));
558 else
559 left_columns.push_back(lhs.safeGetByPosition(--l));
560 }
561 }
562
563 while (l > 0)
564 left_columns.push_back(lhs.safeGetByPosition(--l));
565 while (r > 0)
566 right_columns.push_back(rhs.safeGetByPosition(--r));
567
568 WriteBufferFromString lhs_diff_writer(out_lhs_diff);
569 WriteBufferFromString rhs_diff_writer(out_rhs_diff);
570
571 for (auto it = left_columns.rbegin(); it != left_columns.rend(); ++it)
572 {
573 lhs_diff_writer << it->dumpStructure();
574 lhs_diff_writer << ", position: " << lhs.getPositionByName(it->name) << '\n';
575 }
576 for (auto it = right_columns.rbegin(); it != right_columns.rend(); ++it)
577 {
578 rhs_diff_writer << it->dumpStructure();
579 rhs_diff_writer << ", position: " << rhs.getPositionByName(it->name) << '\n';
580 }
581}
582
583
584void Block::clear()
585{
586 info = BlockInfo();
587 data.clear();
588 index_by_name.clear();
589}
590
591void Block::swap(Block & other) noexcept
592{
593 std::swap(info, other.info);
594 data.swap(other.data);
595 index_by_name.swap(other.index_by_name);
596}
597
598
599void Block::updateHash(SipHash & hash) const
600{
601 for (size_t row_no = 0, num_rows = rows(); row_no < num_rows; ++row_no)
602 for (const auto & col : data)
603 col.column->updateHashWithValue(row_no, hash);
604}
605
606}
607