1#include <Columns/ColumnFixedString.h>
2#include <Columns/ColumnsCommon.h>
3
4#include <Common/Arena.h>
5#include <Common/SipHash.h>
6#include <Common/memcpySmall.h>
7#include <Common/memcmpSmall.h>
8#include <Common/assert_cast.h>
9
10#include <DataStreams/ColumnGathererStream.h>
11
12#include <IO/WriteHelpers.h>
13
14#ifdef __SSE2__
15 #include <emmintrin.h>
16#endif
17
18
19namespace DB
20{
21
22namespace ErrorCodes
23{
24 extern const int TOO_LARGE_STRING_SIZE;
25 extern const int SIZE_OF_FIXED_STRING_DOESNT_MATCH;
26 extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
27 extern const int PARAMETER_OUT_OF_BOUND;
28}
29
30
31MutableColumnPtr ColumnFixedString::cloneResized(size_t size) const
32{
33 MutableColumnPtr new_col_holder = ColumnFixedString::create(n);
34
35 if (size > 0)
36 {
37 auto & new_col = assert_cast<ColumnFixedString &>(*new_col_holder);
38 new_col.chars.resize(size * n);
39
40 size_t count = std::min(this->size(), size);
41 memcpy(new_col.chars.data(), chars.data(), count * n * sizeof(chars[0]));
42
43 if (size > count)
44 memset(&(new_col.chars[count * n]), '\0', (size - count) * n);
45 }
46
47 return new_col_holder;
48}
49
50void ColumnFixedString::insert(const Field & x)
51{
52 const String & s = DB::get<const String &>(x);
53
54 if (s.size() > n)
55 throw Exception("Too large string '" + s + "' for FixedString column", ErrorCodes::TOO_LARGE_STRING_SIZE);
56
57 size_t old_size = chars.size();
58 chars.resize_fill(old_size + n);
59 memcpy(chars.data() + old_size, s.data(), s.size());
60}
61
62void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
63{
64 const ColumnFixedString & src = assert_cast<const ColumnFixedString &>(src_);
65
66 if (n != src.getN())
67 throw Exception("Size of FixedString doesn't match", ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH);
68
69 size_t old_size = chars.size();
70 chars.resize(old_size + n);
71 memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n);
72}
73
74void ColumnFixedString::insertData(const char * pos, size_t length)
75{
76 if (length > n)
77 throw Exception("Too large string for FixedString column", ErrorCodes::TOO_LARGE_STRING_SIZE);
78
79 size_t old_size = chars.size();
80 chars.resize_fill(old_size + n);
81 memcpy(chars.data() + old_size, pos, length);
82}
83
84StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const
85{
86 auto pos = arena.allocContinue(n, begin);
87 memcpy(pos, &chars[n * index], n);
88 return StringRef(pos, n);
89}
90
91const char * ColumnFixedString::deserializeAndInsertFromArena(const char * pos)
92{
93 size_t old_size = chars.size();
94 chars.resize(old_size + n);
95 memcpy(chars.data() + old_size, pos, n);
96 return pos + n;
97}
98
99void ColumnFixedString::updateHashWithValue(size_t index, SipHash & hash) const
100{
101 hash.update(reinterpret_cast<const char *>(&chars[n * index]), n);
102}
103
104template <bool positive>
105struct ColumnFixedString::less
106{
107 const ColumnFixedString & parent;
108 explicit less(const ColumnFixedString & parent_) : parent(parent_) {}
109 bool operator()(size_t lhs, size_t rhs) const
110 {
111 int res = memcmpSmallAllowOverflow15(parent.chars.data() + lhs * parent.n, parent.chars.data() + rhs * parent.n, parent.n);
112 return positive ? (res < 0) : (res > 0);
113 }
114};
115
116void ColumnFixedString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const
117{
118 size_t s = size();
119 res.resize(s);
120 for (size_t i = 0; i < s; ++i)
121 res[i] = i;
122
123 if (limit >= s)
124 limit = 0;
125
126 if (limit)
127 {
128 if (reverse)
129 std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<false>(*this));
130 else
131 std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<true>(*this));
132 }
133 else
134 {
135 if (reverse)
136 std::sort(res.begin(), res.end(), less<false>(*this));
137 else
138 std::sort(res.begin(), res.end(), less<true>(*this));
139 }
140}
141
142void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
143{
144 const ColumnFixedString & src_concrete = assert_cast<const ColumnFixedString &>(src);
145
146 if (start + length > src_concrete.size())
147 throw Exception("Parameters start = "
148 + toString(start) + ", length = "
149 + toString(length) + " are out of bound in ColumnFixedString::insertRangeFrom method"
150 " (size() = " + toString(src_concrete.size()) + ").",
151 ErrorCodes::PARAMETER_OUT_OF_BOUND);
152
153 size_t old_size = chars.size();
154 chars.resize(old_size + length * n);
155 memcpy(chars.data() + old_size, &src_concrete.chars[start * n], length * n);
156}
157
158ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const
159{
160 size_t col_size = size();
161 if (col_size != filt.size())
162 throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
163
164 auto res = ColumnFixedString::create(n);
165
166 if (result_size_hint)
167 res->chars.reserve(result_size_hint > 0 ? result_size_hint * n : chars.size());
168
169 const UInt8 * filt_pos = filt.data();
170 const UInt8 * filt_end = filt_pos + col_size;
171 const UInt8 * data_pos = chars.data();
172
173#ifdef __SSE2__
174 /** A slightly more optimized version.
175 * Based on the assumption that often pieces of consecutive values
176 * completely pass or do not pass the filter.
177 * Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
178 */
179
180 static constexpr size_t SIMD_BYTES = 16;
181 const __m128i zero16 = _mm_setzero_si128();
182 const UInt8 * filt_end_sse = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES;
183 const size_t chars_per_simd_elements = SIMD_BYTES * n;
184
185 while (filt_pos < filt_end_sse)
186 {
187 int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
188
189 if (0 == mask)
190 {
191 /// Nothing is inserted.
192 data_pos += chars_per_simd_elements;
193 }
194 else if (0xFFFF == mask)
195 {
196 res->chars.insert(data_pos, data_pos + chars_per_simd_elements);
197 data_pos += chars_per_simd_elements;
198 }
199 else
200 {
201 size_t res_chars_size = res->chars.size();
202 for (size_t i = 0; i < SIMD_BYTES; ++i)
203 {
204 if (filt_pos[i])
205 {
206 res->chars.resize(res_chars_size + n);
207 memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos, n);
208 res_chars_size += n;
209 }
210 data_pos += n;
211 }
212 }
213
214 filt_pos += SIMD_BYTES;
215 }
216#endif
217
218 size_t res_chars_size = res->chars.size();
219 while (filt_pos < filt_end)
220 {
221 if (*filt_pos)
222 {
223 res->chars.resize(res_chars_size + n);
224 memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos, n);
225 res_chars_size += n;
226 }
227
228 ++filt_pos;
229 data_pos += n;
230 }
231
232 return res;
233}
234
235ColumnPtr ColumnFixedString::permute(const Permutation & perm, size_t limit) const
236{
237 size_t col_size = size();
238
239 if (limit == 0)
240 limit = col_size;
241 else
242 limit = std::min(col_size, limit);
243
244 if (perm.size() < limit)
245 throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
246
247 if (limit == 0)
248 return ColumnFixedString::create(n);
249
250 auto res = ColumnFixedString::create(n);
251
252 Chars & res_chars = res->chars;
253
254 res_chars.resize(n * limit);
255
256 size_t offset = 0;
257 for (size_t i = 0; i < limit; ++i, offset += n)
258 memcpySmallAllowReadWriteOverflow15(&res_chars[offset], &chars[perm[i] * n], n);
259
260 return res;
261}
262
263
264ColumnPtr ColumnFixedString::index(const IColumn & indexes, size_t limit) const
265{
266 return selectIndexImpl(*this, indexes, limit);
267}
268
269
270template <typename Type>
271ColumnPtr ColumnFixedString::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
272{
273 if (limit == 0)
274 return ColumnFixedString::create(n);
275
276 auto res = ColumnFixedString::create(n);
277
278 Chars & res_chars = res->chars;
279
280 res_chars.resize(n * limit);
281
282 size_t offset = 0;
283 for (size_t i = 0; i < limit; ++i, offset += n)
284 memcpySmallAllowReadWriteOverflow15(&res_chars[offset], &chars[indexes[i] * n], n);
285
286 return res;
287}
288
289ColumnPtr ColumnFixedString::replicate(const Offsets & offsets) const
290{
291 size_t col_size = size();
292 if (col_size != offsets.size())
293 throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
294
295 auto res = ColumnFixedString::create(n);
296
297 if (0 == col_size)
298 return res;
299
300 Chars & res_chars = res->chars;
301 res_chars.resize(n * offsets.back());
302
303 Offset curr_offset = 0;
304 for (size_t i = 0; i < col_size; ++i)
305 for (size_t next_offset = offsets[i]; curr_offset < next_offset; ++curr_offset)
306 memcpySmallAllowReadWriteOverflow15(&res->chars[curr_offset * n], &chars[i * n], n);
307
308 return res;
309}
310
311void ColumnFixedString::gather(ColumnGathererStream & gatherer)
312{
313 gatherer.gather(*this);
314}
315
316void ColumnFixedString::getExtremes(Field & min, Field & max) const
317{
318 min = String();
319 max = String();
320
321 size_t col_size = size();
322
323 if (col_size == 0)
324 return;
325
326 size_t min_idx = 0;
327 size_t max_idx = 0;
328
329 less<true> less_op(*this);
330
331 for (size_t i = 1; i < col_size; ++i)
332 {
333 if (less_op(i, min_idx))
334 min_idx = i;
335 else if (less_op(max_idx, i))
336 max_idx = i;
337 }
338
339 get(min_idx, min);
340 get(max_idx, max);
341}
342
343}
344