1 | #include <Interpreters/sortBlock.h> |
2 | |
3 | #include <Columns/ColumnString.h> |
4 | #include <Columns/ColumnConst.h> |
5 | #include <Common/typeid_cast.h> |
6 | #include <Functions/FunctionHelpers.h> |
7 | |
8 | #include <pdqsort.h> |
9 | |
10 | namespace DB |
11 | { |
12 | |
13 | namespace ErrorCodes |
14 | { |
15 | extern const int BAD_COLLATION; |
16 | } |
17 | |
18 | static bool isCollationRequired(const SortColumnDescription & description) |
19 | { |
20 | return description.collator != nullptr; |
21 | } |
22 | |
23 | |
24 | ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description) |
25 | { |
26 | size_t size = description.size(); |
27 | ColumnsWithSortDescriptions res; |
28 | res.reserve(size); |
29 | |
30 | for (size_t i = 0; i < size; ++i) |
31 | { |
32 | const IColumn * column = !description[i].column_name.empty() |
33 | ? block.getByName(description[i].column_name).column.get() |
34 | : block.safeGetByPosition(description[i].column_number).column.get(); |
35 | |
36 | res.emplace_back(ColumnWithSortDescription{column, description[i], isColumnConst(*column)}); |
37 | } |
38 | |
39 | return res; |
40 | } |
41 | |
42 | |
43 | struct PartialSortingLess |
44 | { |
45 | const ColumnsWithSortDescriptions & columns; |
46 | |
47 | explicit PartialSortingLess(const ColumnsWithSortDescriptions & columns_) : columns(columns_) {} |
48 | |
49 | bool operator() (size_t a, size_t b) const |
50 | { |
51 | for (ColumnsWithSortDescriptions::const_iterator it = columns.begin(); it != columns.end(); ++it) |
52 | { |
53 | int res; |
54 | if (it->column_const) |
55 | res = 0; |
56 | else |
57 | res = it->description.direction * it->column->compareAt(a, b, *it->column, it->description.nulls_direction); |
58 | if (res < 0) |
59 | return true; |
60 | else if (res > 0) |
61 | return false; |
62 | } |
63 | return false; |
64 | } |
65 | }; |
66 | |
67 | |
68 | struct PartialSortingLessWithCollation |
69 | { |
70 | const ColumnsWithSortDescriptions & columns; |
71 | |
72 | explicit PartialSortingLessWithCollation(const ColumnsWithSortDescriptions & columns_) |
73 | : columns(columns_) |
74 | { |
75 | } |
76 | |
77 | bool operator() (size_t a, size_t b) const |
78 | { |
79 | for (ColumnsWithSortDescriptions::const_iterator it = columns.begin(); it != columns.end(); ++it) |
80 | { |
81 | int res; |
82 | |
83 | if (it->column_const) |
84 | { |
85 | res = 0; |
86 | } |
87 | else if (isCollationRequired(it->description)) |
88 | { |
89 | const ColumnString & column_string = assert_cast<const ColumnString &>(*it->column); |
90 | res = column_string.compareAtWithCollation(a, b, *it->column, *it->description.collator); |
91 | } |
92 | else |
93 | res = it->column->compareAt(a, b, *it->column, it->description.nulls_direction); |
94 | res *= it->description.direction; |
95 | if (res < 0) |
96 | return true; |
97 | else if (res > 0) |
98 | return false; |
99 | } |
100 | return false; |
101 | } |
102 | }; |
103 | |
104 | void sortBlock(Block & block, const SortDescription & description, UInt64 limit) |
105 | { |
106 | if (!block) |
107 | return; |
108 | |
109 | |
110 | /// If only one column to sort by |
111 | if (description.size() == 1) |
112 | { |
113 | |
114 | IColumn::Permutation perm; |
115 | bool reverse = description[0].direction == -1; |
116 | |
117 | const IColumn * column = !description[0].column_name.empty() |
118 | ? block.getByName(description[0].column_name).column.get() |
119 | : block.safeGetByPosition(description[0].column_number).column.get(); |
120 | |
121 | bool is_column_const = false; |
122 | if (isCollationRequired(description[0])) |
123 | { |
124 | /// it it's real string column, than we need sort |
125 | if (const ColumnString * column_string = checkAndGetColumn<ColumnString>(column)) |
126 | column_string->getPermutationWithCollation(*description[0].collator, reverse, limit, perm); |
127 | else if (checkAndGetColumnConstData<ColumnString>(column)) |
128 | is_column_const = true; |
129 | else |
130 | throw Exception("Collations could be specified only for String columns." , ErrorCodes::BAD_COLLATION); |
131 | |
132 | } |
133 | else if (!isColumnConst(*column)) |
134 | column->getPermutation(reverse, limit, description[0].nulls_direction, perm); |
135 | else |
136 | /// we don't need to do anything with const column |
137 | is_column_const = true; |
138 | |
139 | size_t columns = block.columns(); |
140 | for (size_t i = 0; i < columns; ++i) |
141 | { |
142 | if (!is_column_const) |
143 | block.getByPosition(i).column = block.getByPosition(i).column->permute(perm, limit); |
144 | else if (limit != 0) // LIMIT exists |
145 | block.getByPosition(i).column = block.getByPosition(i).column->cut(0, limit); |
146 | } |
147 | } |
148 | else |
149 | { |
150 | size_t size = block.rows(); |
151 | IColumn::Permutation perm(size); |
152 | for (size_t i = 0; i < size; ++i) |
153 | perm[i] = i; |
154 | |
155 | if (limit >= size) |
156 | limit = 0; |
157 | |
158 | bool need_collation = false; |
159 | ColumnsWithSortDescriptions columns_with_sort_desc = getColumnsWithSortDescription(block, description); |
160 | |
161 | for (size_t i = 0, num_sort_columns = description.size(); i < num_sort_columns; ++i) |
162 | { |
163 | const IColumn * column = columns_with_sort_desc[i].column; |
164 | if (isCollationRequired(description[i])) |
165 | { |
166 | if (!checkAndGetColumn<ColumnString>(column) && !checkAndGetColumnConstData<ColumnString>(column)) |
167 | throw Exception("Collations could be specified only for String columns." , ErrorCodes::BAD_COLLATION); |
168 | |
169 | need_collation = true; |
170 | } |
171 | } |
172 | |
173 | if (need_collation) |
174 | { |
175 | PartialSortingLessWithCollation less_with_collation(columns_with_sort_desc); |
176 | |
177 | if (limit) |
178 | std::partial_sort(perm.begin(), perm.begin() + limit, perm.end(), less_with_collation); |
179 | else |
180 | pdqsort(perm.begin(), perm.end(), less_with_collation); |
181 | } |
182 | else |
183 | { |
184 | PartialSortingLess less(columns_with_sort_desc); |
185 | |
186 | if (limit) |
187 | std::partial_sort(perm.begin(), perm.begin() + limit, perm.end(), less); |
188 | else |
189 | pdqsort(perm.begin(), perm.end(), less); |
190 | } |
191 | |
192 | size_t columns = block.columns(); |
193 | for (size_t i = 0; i < columns; ++i) |
194 | { |
195 | block.getByPosition(i).column = block.getByPosition(i).column->permute(perm, limit); |
196 | } |
197 | } |
198 | } |
199 | |
200 | |
201 | void stableGetPermutation(const Block & block, const SortDescription & description, IColumn::Permutation & out_permutation) |
202 | { |
203 | if (!block) |
204 | return; |
205 | |
206 | size_t size = block.rows(); |
207 | out_permutation.resize(size); |
208 | for (size_t i = 0; i < size; ++i) |
209 | out_permutation[i] = i; |
210 | |
211 | ColumnsWithSortDescriptions columns_with_sort_desc = getColumnsWithSortDescription(block, description); |
212 | |
213 | std::stable_sort(out_permutation.begin(), out_permutation.end(), PartialSortingLess(columns_with_sort_desc)); |
214 | } |
215 | |
216 | |
217 | bool isAlreadySorted(const Block & block, const SortDescription & description) |
218 | { |
219 | if (!block) |
220 | return true; |
221 | |
222 | size_t rows = block.rows(); |
223 | |
224 | ColumnsWithSortDescriptions columns_with_sort_desc = getColumnsWithSortDescription(block, description); |
225 | |
226 | PartialSortingLess less(columns_with_sort_desc); |
227 | |
228 | /** If the rows are not too few, then let's make a quick attempt to verify that the block is not sorted. |
229 | * Constants - at random. |
230 | */ |
231 | static constexpr size_t num_rows_to_try = 10; |
232 | if (rows > num_rows_to_try * 5) |
233 | { |
234 | for (size_t i = 1; i < num_rows_to_try; ++i) |
235 | { |
236 | size_t prev_position = rows * (i - 1) / num_rows_to_try; |
237 | size_t curr_position = rows * i / num_rows_to_try; |
238 | |
239 | if (less(curr_position, prev_position)) |
240 | return false; |
241 | } |
242 | } |
243 | |
244 | for (size_t i = 1; i < rows; ++i) |
245 | if (less(i, i - 1)) |
246 | return false; |
247 | |
248 | return true; |
249 | } |
250 | |
251 | |
252 | void stableSortBlock(Block & block, const SortDescription & description) |
253 | { |
254 | if (!block) |
255 | return; |
256 | |
257 | IColumn::Permutation perm; |
258 | stableGetPermutation(block, description, perm); |
259 | |
260 | size_t columns = block.columns(); |
261 | for (size_t i = 0; i < columns; ++i) |
262 | block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->permute(perm, 0); |
263 | } |
264 | |
265 | } |
266 | |