1 | #include <DataStreams/ReplacingSortedBlockInputStream.h> |
2 | #include <Columns/ColumnsNumber.h> |
3 | #include <common/logger_useful.h> |
4 | |
5 | |
6 | namespace DB |
7 | { |
8 | |
9 | namespace ErrorCodes |
10 | { |
11 | extern const int LOGICAL_ERROR; |
12 | } |
13 | |
14 | |
15 | void ReplacingSortedBlockInputStream::insertRow(MutableColumns & merged_columns) |
16 | { |
17 | if (out_row_sources_buf) |
18 | { |
19 | /// true flag value means "skip row" |
20 | current_row_sources[max_pos].setSkipFlag(false); |
21 | |
22 | out_row_sources_buf->write(reinterpret_cast<const char *>(current_row_sources.data()), |
23 | current_row_sources.size() * sizeof(RowSourcePart)); |
24 | current_row_sources.resize(0); |
25 | } |
26 | |
27 | for (size_t i = 0; i < num_columns; ++i) |
28 | merged_columns[i]->insertFrom(*(*selected_row.columns)[i], selected_row.row_num); |
29 | } |
30 | |
31 | |
32 | Block ReplacingSortedBlockInputStream::readImpl() |
33 | { |
34 | if (finished) |
35 | return Block(); |
36 | |
37 | MutableColumns merged_columns; |
38 | init(merged_columns); |
39 | |
40 | if (has_collation) |
41 | throw Exception("Logical error: " + getName() + " does not support collations" , ErrorCodes::LOGICAL_ERROR); |
42 | |
43 | if (merged_columns.empty()) |
44 | return Block(); |
45 | |
46 | merge(merged_columns, queue_without_collation); |
47 | return header.cloneWithColumns(std::move(merged_columns)); |
48 | } |
49 | |
50 | |
51 | void ReplacingSortedBlockInputStream::merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue) |
52 | { |
53 | MergeStopCondition stop_condition(average_block_sizes, max_block_size); |
54 | /// Take the rows in needed order and put them into `merged_columns` until rows no more than `max_block_size` |
55 | while (!queue.empty()) |
56 | { |
57 | SortCursor current = queue.top(); |
58 | size_t current_block_granularity = current->rows; |
59 | |
60 | if (current_key.empty()) |
61 | setPrimaryKeyRef(current_key, current); |
62 | |
63 | setPrimaryKeyRef(next_key, current); |
64 | |
65 | bool key_differs = next_key != current_key; |
66 | |
67 | /// if there are enough rows and the last one is calculated completely |
68 | if (key_differs && stop_condition.checkStop()) |
69 | return; |
70 | |
71 | queue.pop(); |
72 | |
73 | if (key_differs) |
74 | { |
75 | /// Write the data for the previous primary key. |
76 | insertRow(merged_columns); |
77 | stop_condition.addRowWithGranularity(current_block_granularity); |
78 | selected_row.reset(); |
79 | current_key.swap(next_key); |
80 | } |
81 | |
82 | /// Initially, skip all rows. Unskip last on insert. |
83 | size_t current_pos = current_row_sources.size(); |
84 | if (out_row_sources_buf) |
85 | current_row_sources.emplace_back(current.impl->order, true); |
86 | |
87 | /// A non-strict comparison, since we select the last row for the same version values. |
88 | if (version_column_number == -1 |
89 | || selected_row.empty() |
90 | || current->all_columns[version_column_number]->compareAt( |
91 | current->pos, selected_row.row_num, |
92 | *(*selected_row.columns)[version_column_number], |
93 | /* nan_direction_hint = */ 1) >= 0) |
94 | { |
95 | max_pos = current_pos; |
96 | setRowRef(selected_row, current); |
97 | } |
98 | |
99 | if (!current->isLast()) |
100 | { |
101 | current->next(); |
102 | queue.push(current); |
103 | } |
104 | else |
105 | { |
106 | /// We get the next block from the corresponding source, if there is one. |
107 | fetchNextBlock(current, queue); |
108 | } |
109 | } |
110 | |
111 | /// We will write the data for the last primary key. |
112 | if (!selected_row.empty()) |
113 | insertRow(merged_columns); |
114 | |
115 | finished = true; |
116 | } |
117 | |
118 | } |
119 | |