1#ifdef __SSE2__
2 #include <emmintrin.h>
3#endif
4
5#include <Columns/IColumn.h>
6#include <Columns/ColumnVector.h>
7#include <Common/typeid_cast.h>
8#include <Common/HashTable/HashSet.h>
9#include "ColumnsCommon.h"
10
11
12namespace DB
13{
14
15size_t countBytesInFilter(const IColumn::Filter & filt)
16{
17 size_t count = 0;
18
19 /** NOTE: In theory, `filt` should only contain zeros and ones.
20 * But, just in case, here the condition > 0 (to signed bytes) is used.
21 * It would be better to use != 0, then this does not allow SSE2.
22 */
23
24 const Int8 * pos = reinterpret_cast<const Int8 *>(filt.data());
25 const Int8 * end = pos + filt.size();
26
27#if defined(__SSE2__) && defined(__POPCNT__)
28 const __m128i zero16 = _mm_setzero_si128();
29 const Int8 * end64 = pos + filt.size() / 64 * 64;
30
31 for (; pos < end64; pos += 64)
32 count += __builtin_popcountll(
33 static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
34 _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos)),
35 zero16)))
36 | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
37 _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 16)),
38 zero16))) << 16)
39 | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
40 _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 32)),
41 zero16))) << 32)
42 | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
43 _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 48)),
44 zero16))) << 48));
45
46 /// TODO Add duff device for tail?
47#endif
48
49 for (; pos < end; ++pos)
50 count += *pos > 0;
51
52 return count;
53}
54
55std::vector<size_t> countColumnsSizeInSelector(IColumn::ColumnIndex num_columns, const IColumn::Selector & selector)
56{
57 std::vector<size_t> counts(num_columns);
58 for (auto idx : selector)
59 ++counts[idx];
60
61 return counts;
62}
63
64bool memoryIsByte(const void * data, size_t size, uint8_t byte)
65{
66 if (size == 0)
67 return true;
68 auto ptr = reinterpret_cast<const uint8_t *>(data);
69 return *ptr == byte && memcmp(ptr, ptr + 1, size - 1) == 0;
70}
71
72bool memoryIsZero(const void * data, size_t size)
73{
74 return memoryIsByte(data, size, 0x0);
75}
76
77namespace ErrorCodes
78{
79 extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
80}
81
82
83namespace
84{
85 /// Implementation details of filterArraysImpl function, used as template parameter.
86 /// Allow to build or not to build offsets array.
87
88 struct ResultOffsetsBuilder
89 {
90 IColumn::Offsets & res_offsets;
91 IColumn::Offset current_src_offset = 0;
92
93 explicit ResultOffsetsBuilder(IColumn::Offsets * res_offsets_) : res_offsets(*res_offsets_) {}
94
95 void reserve(ssize_t result_size_hint, size_t src_size)
96 {
97 res_offsets.reserve(result_size_hint > 0 ? result_size_hint : src_size);
98 }
99
100 void insertOne(size_t array_size)
101 {
102 current_src_offset += array_size;
103 res_offsets.push_back(current_src_offset);
104 }
105
106 template <size_t SIMD_BYTES>
107 void insertChunk(
108 const IColumn::Offset * src_offsets_pos,
109 bool first,
110 IColumn::Offset chunk_offset,
111 size_t chunk_size)
112 {
113 const auto offsets_size_old = res_offsets.size();
114 res_offsets.resize(offsets_size_old + SIMD_BYTES);
115 memcpy(&res_offsets[offsets_size_old], src_offsets_pos, SIMD_BYTES * sizeof(IColumn::Offset));
116
117 if (!first)
118 {
119 /// difference between current and actual offset
120 const auto diff_offset = chunk_offset - current_src_offset;
121
122 if (diff_offset > 0)
123 {
124 const auto res_offsets_pos = &res_offsets[offsets_size_old];
125
126 /// adjust offsets
127 for (size_t i = 0; i < SIMD_BYTES; ++i)
128 res_offsets_pos[i] -= diff_offset;
129 }
130 }
131 current_src_offset += chunk_size;
132 }
133 };
134
135 struct NoResultOffsetsBuilder
136 {
137 explicit NoResultOffsetsBuilder(IColumn::Offsets *) {}
138 void reserve(ssize_t, size_t) {}
139 void insertOne(size_t) {}
140
141 template <size_t SIMD_BYTES>
142 void insertChunk(
143 const IColumn::Offset *,
144 bool,
145 IColumn::Offset,
146 size_t)
147 {
148 }
149 };
150
151
152 template <typename T, typename ResultOffsetsBuilder>
153 void filterArraysImplGeneric(
154 const PaddedPODArray<T> & src_elems, const IColumn::Offsets & src_offsets,
155 PaddedPODArray<T> & res_elems, IColumn::Offsets * res_offsets,
156 const IColumn::Filter & filt, ssize_t result_size_hint)
157 {
158 const size_t size = src_offsets.size();
159 if (size != filt.size())
160 throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
161
162 ResultOffsetsBuilder result_offsets_builder(res_offsets);
163
164 if (result_size_hint)
165 {
166 result_offsets_builder.reserve(result_size_hint, size);
167
168 if (result_size_hint < 0)
169 res_elems.reserve(src_elems.size());
170 else if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) /// Avoid overflow.
171 res_elems.reserve((result_size_hint * src_elems.size() + size - 1) / size);
172 }
173
174 const UInt8 * filt_pos = filt.data();
175 const auto filt_end = filt_pos + size;
176
177 auto offsets_pos = src_offsets.data();
178 const auto offsets_begin = offsets_pos;
179
180 /// copy array ending at *end_offset_ptr
181 const auto copy_array = [&] (const IColumn::Offset * offset_ptr)
182 {
183 const auto arr_offset = offset_ptr == offsets_begin ? 0 : offset_ptr[-1];
184 const auto arr_size = *offset_ptr - arr_offset;
185
186 result_offsets_builder.insertOne(arr_size);
187
188 const auto elems_size_old = res_elems.size();
189 res_elems.resize(elems_size_old + arr_size);
190 memcpy(&res_elems[elems_size_old], &src_elems[arr_offset], arr_size * sizeof(T));
191 };
192
193 #ifdef __SSE2__
194 const __m128i zero_vec = _mm_setzero_si128();
195 static constexpr size_t SIMD_BYTES = 16;
196 const auto filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
197
198 while (filt_pos < filt_end_aligned)
199 {
200 const auto mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
201 _mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)),
202 zero_vec));
203
204 if (mask == 0)
205 {
206 /// SIMD_BYTES consecutive rows do not pass the filter
207 }
208 else if (mask == 0xffff)
209 {
210 /// SIMD_BYTES consecutive rows pass the filter
211 const auto first = offsets_pos == offsets_begin;
212
213 const auto chunk_offset = first ? 0 : offsets_pos[-1];
214 const auto chunk_size = offsets_pos[SIMD_BYTES - 1] - chunk_offset;
215
216 result_offsets_builder.template insertChunk<SIMD_BYTES>(offsets_pos, first, chunk_offset, chunk_size);
217
218 /// copy elements for SIMD_BYTES arrays at once
219 const auto elems_size_old = res_elems.size();
220 res_elems.resize(elems_size_old + chunk_size);
221 memcpy(&res_elems[elems_size_old], &src_elems[chunk_offset], chunk_size * sizeof(T));
222 }
223 else
224 {
225 for (size_t i = 0; i < SIMD_BYTES; ++i)
226 if (filt_pos[i])
227 copy_array(offsets_pos + i);
228 }
229
230 filt_pos += SIMD_BYTES;
231 offsets_pos += SIMD_BYTES;
232 }
233 #endif
234
235 while (filt_pos < filt_end)
236 {
237 if (*filt_pos)
238 copy_array(offsets_pos);
239
240 ++filt_pos;
241 ++offsets_pos;
242 }
243 }
244}
245
246
247template <typename T>
248void filterArraysImpl(
249 const PaddedPODArray<T> & src_elems, const IColumn::Offsets & src_offsets,
250 PaddedPODArray<T> & res_elems, IColumn::Offsets & res_offsets,
251 const IColumn::Filter & filt, ssize_t result_size_hint)
252{
253 return filterArraysImplGeneric<T, ResultOffsetsBuilder>(src_elems, src_offsets, res_elems, &res_offsets, filt, result_size_hint);
254}
255
256template <typename T>
257void filterArraysImplOnlyData(
258 const PaddedPODArray<T> & src_elems, const IColumn::Offsets & src_offsets,
259 PaddedPODArray<T> & res_elems,
260 const IColumn::Filter & filt, ssize_t result_size_hint)
261{
262 return filterArraysImplGeneric<T, NoResultOffsetsBuilder>(src_elems, src_offsets, res_elems, nullptr, filt, result_size_hint);
263}
264
265
266/// Explicit instantiations - not to place the implementation of the function above in the header file.
267#define INSTANTIATE(TYPE) \
268template void filterArraysImpl<TYPE>( \
269 const PaddedPODArray<TYPE> &, const IColumn::Offsets &, \
270 PaddedPODArray<TYPE> &, IColumn::Offsets &, \
271 const IColumn::Filter &, ssize_t); \
272template void filterArraysImplOnlyData<TYPE>( \
273 const PaddedPODArray<TYPE> &, const IColumn::Offsets &, \
274 PaddedPODArray<TYPE> &, \
275 const IColumn::Filter &, ssize_t);
276
277INSTANTIATE(UInt8)
278INSTANTIATE(UInt16)
279INSTANTIATE(UInt32)
280INSTANTIATE(UInt64)
281INSTANTIATE(Int8)
282INSTANTIATE(Int16)
283INSTANTIATE(Int32)
284INSTANTIATE(Int64)
285INSTANTIATE(Float32)
286INSTANTIATE(Float64)
287
288#undef INSTANTIATE
289
290namespace detail
291{
292 template <typename T>
293 const PaddedPODArray<T> * getIndexesData(const IColumn & indexes)
294 {
295 auto * column = typeid_cast<const ColumnVector<T> *>(&indexes);
296 if (column)
297 return &column->getData();
298
299 return nullptr;
300 }
301
302 template const PaddedPODArray<UInt8> * getIndexesData<UInt8>(const IColumn & indexes);
303 template const PaddedPODArray<UInt16> * getIndexesData<UInt16>(const IColumn & indexes);
304 template const PaddedPODArray<UInt32> * getIndexesData<UInt32>(const IColumn & indexes);
305 template const PaddedPODArray<UInt64> * getIndexesData<UInt64>(const IColumn & indexes);
306}
307
308}
309