1#pragma once
2
3#include <string.h>
4#include <cstddef>
5#include <cassert>
6#include <algorithm>
7#include <memory>
8
9#include <boost/noncopyable.hpp>
10#include <boost/iterator_adaptors.hpp>
11
12#include <common/likely.h>
13#include <common/strong_typedef.h>
14
15#include <Common/Allocator.h>
16#include <Common/Exception.h>
17#include <Common/BitHelpers.h>
18#include <Common/memcpySmall.h>
19
20#ifndef NDEBUG
21 #include <sys/mman.h>
22#endif
23
24#include <Common/PODArray_fwd.h>
25
26
27namespace DB
28{
29
30namespace ErrorCodes
31{
32 extern const int CANNOT_MPROTECT;
33}
34
35/** A dynamic array for POD types.
36 * Designed for a small number of large arrays (rather than a lot of small ones).
37 * To be more precise - for use in ColumnVector.
38 * It differs from std::vector in that it does not initialize the elements.
39 *
40 * Made noncopyable so that there are no accidential copies. You can copy the data using `assign` method.
41 *
42 * Only part of the std::vector interface is supported.
43 *
44 * The default constructor creates an empty object that does not allocate memory.
45 * Then the memory is allocated at least initial_bytes bytes.
46 *
47 * If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector.
48 *
49 * The template parameter `pad_right` - always allocate at the end of the array as many unused bytes.
50 * Can be used to make optimistic reading, writing, copying with unaligned SIMD instructions.
51 *
52 * The template parameter `pad_left` - always allocate memory before 0th element of the array (rounded up to the whole number of elements)
53 * and zero initialize -1th element. It allows to use -1th element that will have value 0.
54 * This gives performance benefits when converting an array of offsets to array of sizes.
55 *
56 * Some methods using allocator have TAllocatorParams variadic arguments.
57 * These arguments will be passed to corresponding methods of TAllocator.
58 * Example: pointer to Arena, that is used for allocations.
59 *
60 * Why Allocator is not passed through constructor, as it is done in C++ standard library?
61 * Because sometimes we have many small objects, that share same allocator with same parameters,
62 * and we must avoid larger object size due to storing the same parameters in each object.
63 * This is required for states of aggregate functions.
64 *
65 * TODO Pass alignment to Allocator.
66 * TODO Allow greater alignment than alignof(T). Example: array of char aligned to page size.
67 */
68static constexpr size_t EmptyPODArraySize = 1024;
69extern const char EmptyPODArray[EmptyPODArraySize];
70
71/** Base class that depend only on size of element, not on element itself.
72 * You can static_cast to this class if you want to insert some data regardless to the actual type T.
73 */
74#pragma GCC diagnostic push
75#pragma GCC diagnostic ignored "-Wnull-dereference"
76
77template <size_t ELEMENT_SIZE, size_t initial_bytes, typename TAllocator, size_t pad_right_, size_t pad_left_>
78class PODArrayBase : private boost::noncopyable, private TAllocator /// empty base optimization
79{
80protected:
81 /// Round padding up to an whole number of elements to simplify arithmetic.
82 static constexpr size_t pad_right = integerRoundUp(pad_right_, ELEMENT_SIZE);
83 /// pad_left is also rounded up to 16 bytes to maintain alignment of allocated memory.
84 static constexpr size_t pad_left = integerRoundUp(integerRoundUp(pad_left_, ELEMENT_SIZE), 16);
85 /// Empty array will point to this static memory as padding.
86 static constexpr char * null = pad_left ? const_cast<char *>(EmptyPODArray) + EmptyPODArraySize : nullptr;
87
88 static_assert(pad_left <= EmptyPODArraySize && "Left Padding exceeds EmptyPODArraySize. Is the element size too large?");
89
90 char * c_start = null; /// Does not include pad_left.
91 char * c_end = null;
92 char * c_end_of_storage = null; /// Does not include pad_right.
93
94 /// The amount of memory occupied by the num_elements of the elements.
95 static size_t byte_size(size_t num_elements) { return num_elements * ELEMENT_SIZE; }
96
97 /// Minimum amount of memory to allocate for num_elements, including padding.
98 static size_t minimum_memory_for_elements(size_t num_elements) { return byte_size(num_elements) + pad_right + pad_left; }
99
100 void alloc_for_num_elements(size_t num_elements)
101 {
102 alloc(roundUpToPowerOfTwoOrZero(minimum_memory_for_elements(num_elements)));
103 }
104
105 template <typename ... TAllocatorParams>
106 void alloc(size_t bytes, TAllocatorParams &&... allocator_params)
107 {
108 c_start = c_end = reinterpret_cast<char *>(TAllocator::alloc(bytes, std::forward<TAllocatorParams>(allocator_params)...)) + pad_left;
109 c_end_of_storage = c_start + bytes - pad_right - pad_left;
110
111 if (pad_left)
112 memset(c_start - ELEMENT_SIZE, 0, ELEMENT_SIZE);
113 }
114
115 void dealloc()
116 {
117 if (c_start == null)
118 return;
119
120 unprotect();
121
122 TAllocator::free(c_start - pad_left, allocated_bytes());
123 }
124
125 template <typename ... TAllocatorParams>
126 void realloc(size_t bytes, TAllocatorParams &&... allocator_params)
127 {
128 if (c_start == null)
129 {
130 alloc(bytes, std::forward<TAllocatorParams>(allocator_params)...);
131 return;
132 }
133
134 unprotect();
135
136 ptrdiff_t end_diff = c_end - c_start;
137
138 c_start = reinterpret_cast<char *>(
139 TAllocator::realloc(c_start - pad_left, allocated_bytes(), bytes, std::forward<TAllocatorParams>(allocator_params)...))
140 + pad_left;
141
142 c_end = c_start + end_diff;
143 c_end_of_storage = c_start + bytes - pad_right - pad_left;
144 }
145
146 bool isInitialized() const
147 {
148 return (c_start != null) && (c_end != null) && (c_end_of_storage != null);
149 }
150
151 bool isAllocatedFromStack() const
152 {
153 constexpr size_t stack_threshold = TAllocator::getStackThreshold();
154 return (stack_threshold > 0) && (allocated_bytes() <= stack_threshold);
155 }
156
157 template <typename ... TAllocatorParams>
158 void reserveForNextSize(TAllocatorParams &&... allocator_params)
159 {
160 if (size() == 0)
161 {
162 // The allocated memory should be multiplication of ELEMENT_SIZE to hold the element, otherwise,
163 // memory issue such as corruption could appear in edge case.
164 realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE),
165 minimum_memory_for_elements(1)),
166 std::forward<TAllocatorParams>(allocator_params)...);
167 }
168 else
169 realloc(allocated_bytes() * 2, std::forward<TAllocatorParams>(allocator_params)...);
170 }
171
172#ifndef NDEBUG
173 /// Make memory region readonly with mprotect if it is large enough.
174 /// The operation is slow and performed only for debug builds.
175 void protectImpl(int prot)
176 {
177 static constexpr size_t PROTECT_PAGE_SIZE = 4096;
178
179 char * left_rounded_up = reinterpret_cast<char *>((reinterpret_cast<intptr_t>(c_start) - pad_left + PROTECT_PAGE_SIZE - 1) / PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE);
180 char * right_rounded_down = reinterpret_cast<char *>((reinterpret_cast<intptr_t>(c_end_of_storage) + pad_right) / PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE);
181
182 if (right_rounded_down > left_rounded_up)
183 {
184 size_t length = right_rounded_down - left_rounded_up;
185 if (0 != mprotect(left_rounded_up, length, prot))
186 throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT);
187 }
188 }
189
190 /// Restore memory protection in destructor or realloc for further reuse by allocator.
191 bool mprotected = false;
192#endif
193
194public:
195 bool empty() const { return c_end == c_start; }
196 size_t size() const { return (c_end - c_start) / ELEMENT_SIZE; }
197 size_t capacity() const { return (c_end_of_storage - c_start) / ELEMENT_SIZE; }
198
199 /// This method is safe to use only for information about memory usage.
200 size_t allocated_bytes() const { return c_end_of_storage - c_start + pad_right + pad_left; }
201
202 void clear() { c_end = c_start; }
203
204 template <typename ... TAllocatorParams>
205 void reserve(size_t n, TAllocatorParams &&... allocator_params)
206 {
207 if (n > capacity())
208 realloc(roundUpToPowerOfTwoOrZero(minimum_memory_for_elements(n)), std::forward<TAllocatorParams>(allocator_params)...);
209 }
210
211 template <typename ... TAllocatorParams>
212 void resize(size_t n, TAllocatorParams &&... allocator_params)
213 {
214 reserve(n, std::forward<TAllocatorParams>(allocator_params)...);
215 resize_assume_reserved(n);
216 }
217
218 void resize_assume_reserved(const size_t n)
219 {
220 c_end = c_start + byte_size(n);
221 }
222
223 const char * raw_data() const
224 {
225 return c_start;
226 }
227
228 template <typename ... TAllocatorParams>
229 void push_back_raw(const char * ptr, TAllocatorParams &&... allocator_params)
230 {
231 if (unlikely(c_end == c_end_of_storage))
232 reserveForNextSize(std::forward<TAllocatorParams>(allocator_params)...);
233
234 memcpy(c_end, ptr, ELEMENT_SIZE);
235 c_end += byte_size(1);
236 }
237
238 void protect()
239 {
240#ifndef NDEBUG
241 protectImpl(PROT_READ);
242 mprotected = true;
243#endif
244 }
245
246 void unprotect()
247 {
248#ifndef NDEBUG
249 if (mprotected)
250 protectImpl(PROT_WRITE);
251 mprotected = false;
252#endif
253 }
254
255 ~PODArrayBase()
256 {
257 dealloc();
258 }
259};
260
261template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_, size_t pad_left_>
262class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>
263{
264protected:
265 using Base = PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>;
266
267 T * t_start() { return reinterpret_cast<T *>(this->c_start); }
268 T * t_end() { return reinterpret_cast<T *>(this->c_end); }
269 T * t_end_of_storage() { return reinterpret_cast<T *>(this->c_end_of_storage); }
270
271 const T * t_start() const { return reinterpret_cast<const T *>(this->c_start); }
272 const T * t_end() const { return reinterpret_cast<const T *>(this->c_end); }
273 const T * t_end_of_storage() const { return reinterpret_cast<const T *>(this->c_end_of_storage); }
274
275public:
276 using value_type = T;
277
278 /// You can not just use `typedef`, because there is ambiguity for the constructors and `assign` functions.
279 struct iterator : public boost::iterator_adaptor<iterator, T*>
280 {
281 iterator() {}
282 iterator(T * ptr_) : iterator::iterator_adaptor_(ptr_) {}
283 };
284
285 struct const_iterator : public boost::iterator_adaptor<const_iterator, const T*>
286 {
287 const_iterator() {}
288 const_iterator(const T * ptr_) : const_iterator::iterator_adaptor_(ptr_) {}
289 };
290
291
292 PODArray() {}
293
294 PODArray(size_t n)
295 {
296 this->alloc_for_num_elements(n);
297 this->c_end += this->byte_size(n);
298 }
299
300 PODArray(size_t n, const T & x)
301 {
302 this->alloc_for_num_elements(n);
303 assign(n, x);
304 }
305
306 PODArray(const_iterator from_begin, const_iterator from_end)
307 {
308 this->alloc_for_num_elements(from_end - from_begin);
309 insert(from_begin, from_end);
310 }
311
312 PODArray(std::initializer_list<T> il) : PODArray(std::begin(il), std::end(il)) {}
313
314 PODArray(PODArray && other)
315 {
316 this->swap(other);
317 }
318
319 PODArray & operator=(PODArray && other)
320 {
321 this->swap(other);
322 return *this;
323 }
324
325 T * data() { return t_start(); }
326 const T * data() const { return t_start(); }
327
328 /// The index is signed to access -1th element without pointer overflow.
329 T & operator[] (ssize_t n)
330 {
331 /// <= size, because taking address of one element past memory range is Ok in C++ (expression like &arr[arr.size()] is perfectly valid).
332 assert((n >= (static_cast<ssize_t>(pad_left_) ? -1 : 0)) && (n <= static_cast<ssize_t>(this->size())));
333 return t_start()[n];
334 }
335
336 const T & operator[] (ssize_t n) const
337 {
338 assert((n >= (static_cast<ssize_t>(pad_left_) ? -1 : 0)) && (n <= static_cast<ssize_t>(this->size())));
339 return t_start()[n];
340 }
341
342 T & front() { return t_start()[0]; }
343 T & back() { return t_end()[-1]; }
344 const T & front() const { return t_start()[0]; }
345 const T & back() const { return t_end()[-1]; }
346
347 iterator begin() { return t_start(); }
348 iterator end() { return t_end(); }
349 const_iterator begin() const { return t_start(); }
350 const_iterator end() const { return t_end(); }
351 const_iterator cbegin() const { return t_start(); }
352 const_iterator cend() const { return t_end(); }
353
354 /// Same as resize, but zeroes new elements.
355 void resize_fill(size_t n)
356 {
357 size_t old_size = this->size();
358 if (n > old_size)
359 {
360 this->reserve(n);
361 memset(this->c_end, 0, this->byte_size(n - old_size));
362 }
363 this->c_end = this->c_start + this->byte_size(n);
364 }
365
366 void resize_fill(size_t n, const T & value)
367 {
368 size_t old_size = this->size();
369 if (n > old_size)
370 {
371 this->reserve(n);
372 std::fill(t_end(), t_end() + n - old_size, value);
373 }
374 this->c_end = this->c_start + this->byte_size(n);
375 }
376
377 template <typename U, typename ... TAllocatorParams>
378 void push_back(U && x, TAllocatorParams &&... allocator_params)
379 {
380 if (unlikely(this->c_end == this->c_end_of_storage))
381 this->reserveForNextSize(std::forward<TAllocatorParams>(allocator_params)...);
382
383 new (t_end()) T(std::forward<U>(x));
384 this->c_end += this->byte_size(1);
385 }
386
387 /** This method doesn't allow to pass parameters for Allocator,
388 * and it couldn't be used if Allocator requires custom parameters.
389 */
390 template <typename... Args>
391 void emplace_back(Args &&... args)
392 {
393 if (unlikely(this->c_end == this->c_end_of_storage))
394 this->reserveForNextSize();
395
396 new (t_end()) T(std::forward<Args>(args)...);
397 this->c_end += this->byte_size(1);
398 }
399
400 void pop_back()
401 {
402 this->c_end -= this->byte_size(1);
403 }
404
405 /// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated.
406 template <typename It1, typename It2, typename ... TAllocatorParams>
407 void insertPrepare(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params)
408 {
409 size_t required_capacity = this->size() + (from_end - from_begin);
410 if (required_capacity > this->capacity())
411 this->reserve(roundUpToPowerOfTwoOrZero(required_capacity), std::forward<TAllocatorParams>(allocator_params)...);
412 }
413
414 /// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated.
415 template <typename It1, typename It2, typename ... TAllocatorParams>
416 void insert(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params)
417 {
418 insertPrepare(from_begin, from_end, std::forward<TAllocatorParams>(allocator_params)...);
419 insert_assume_reserved(from_begin, from_end);
420 }
421
422 /// Works under assumption, that it's possible to read up to 15 excessive bytes after `from_end` and this PODArray is padded.
423 template <typename It1, typename It2, typename ... TAllocatorParams>
424 void insertSmallAllowReadWriteOverflow15(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params)
425 {
426 static_assert(pad_right_ >= 15);
427 insertPrepare(from_begin, from_end, std::forward<TAllocatorParams>(allocator_params)...);
428 size_t bytes_to_copy = this->byte_size(from_end - from_begin);
429 memcpySmallAllowReadWriteOverflow15(this->c_end, reinterpret_cast<const void *>(&*from_begin), bytes_to_copy);
430 this->c_end += bytes_to_copy;
431 }
432
433 template <typename It1, typename It2>
434 void insert(iterator it, It1 from_begin, It2 from_end)
435 {
436 size_t bytes_to_copy = this->byte_size(from_end - from_begin);
437 size_t bytes_to_move = (end() - it) * sizeof(T);
438
439 insertPrepare(from_begin, from_end);
440
441 if (unlikely(bytes_to_move))
442 memcpy(this->c_end + bytes_to_copy - bytes_to_move, this->c_end - bytes_to_move, bytes_to_move);
443
444 memcpy(this->c_end - bytes_to_move, reinterpret_cast<const void *>(&*from_begin), bytes_to_copy);
445 this->c_end += bytes_to_copy;
446 }
447
448 template <typename It1, typename It2>
449 void insert_assume_reserved(It1 from_begin, It2 from_end)
450 {
451 size_t bytes_to_copy = this->byte_size(from_end - from_begin);
452 memcpy(this->c_end, reinterpret_cast<const void *>(&*from_begin), bytes_to_copy);
453 this->c_end += bytes_to_copy;
454 }
455
456 void swap(PODArray & rhs)
457 {
458#ifndef NDEBUG
459 this->unprotect();
460 rhs.unprotect();
461#endif
462
463 /// Swap two PODArray objects, arr1 and arr2, that satisfy the following conditions:
464 /// - The elements of arr1 are stored on stack.
465 /// - The elements of arr2 are stored on heap.
466 auto swap_stack_heap = [this](PODArray & arr1, PODArray & arr2)
467 {
468 size_t stack_size = arr1.size();
469 size_t stack_allocated = arr1.allocated_bytes();
470
471 size_t heap_size = arr2.size();
472 size_t heap_allocated = arr2.allocated_bytes();
473
474 /// Keep track of the stack content we have to copy.
475 char * stack_c_start = arr1.c_start;
476
477 /// arr1 takes ownership of the heap memory of arr2.
478 arr1.c_start = arr2.c_start;
479 arr1.c_end_of_storage = arr1.c_start + heap_allocated - arr1.pad_right;
480 arr1.c_end = arr1.c_start + this->byte_size(heap_size);
481
482 /// Allocate stack space for arr2.
483 arr2.alloc(stack_allocated);
484 /// Copy the stack content.
485 memcpy(arr2.c_start, stack_c_start, this->byte_size(stack_size));
486 arr2.c_end = arr2.c_start + this->byte_size(stack_size);
487 };
488
489 auto do_move = [this](PODArray & src, PODArray & dest)
490 {
491 if (src.isAllocatedFromStack())
492 {
493 dest.dealloc();
494 dest.alloc(src.allocated_bytes());
495 memcpy(dest.c_start, src.c_start, this->byte_size(src.size()));
496 dest.c_end = dest.c_start + (src.c_end - src.c_start);
497
498 src.c_start = Base::null;
499 src.c_end = Base::null;
500 src.c_end_of_storage = Base::null;
501 }
502 else
503 {
504 std::swap(dest.c_start, src.c_start);
505 std::swap(dest.c_end, src.c_end);
506 std::swap(dest.c_end_of_storage, src.c_end_of_storage);
507 }
508 };
509
510 if (!this->isInitialized() && !rhs.isInitialized())
511 {
512 return;
513 }
514 else if (!this->isInitialized() && rhs.isInitialized())
515 {
516 do_move(rhs, *this);
517 return;
518 }
519 else if (this->isInitialized() && !rhs.isInitialized())
520 {
521 do_move(*this, rhs);
522 return;
523 }
524
525 if (this->isAllocatedFromStack() && rhs.isAllocatedFromStack())
526 {
527 size_t min_size = std::min(this->size(), rhs.size());
528 size_t max_size = std::max(this->size(), rhs.size());
529
530 for (size_t i = 0; i < min_size; ++i)
531 std::swap(this->operator[](i), rhs[i]);
532
533 if (this->size() == max_size)
534 {
535 for (size_t i = min_size; i < max_size; ++i)
536 rhs[i] = this->operator[](i);
537 }
538 else
539 {
540 for (size_t i = min_size; i < max_size; ++i)
541 this->operator[](i) = rhs[i];
542 }
543
544 size_t lhs_size = this->size();
545 size_t lhs_allocated = this->allocated_bytes();
546
547 size_t rhs_size = rhs.size();
548 size_t rhs_allocated = rhs.allocated_bytes();
549
550 this->c_end_of_storage = this->c_start + rhs_allocated - Base::pad_right;
551 rhs.c_end_of_storage = rhs.c_start + lhs_allocated - Base::pad_right;
552
553 this->c_end = this->c_start + this->byte_size(rhs_size);
554 rhs.c_end = rhs.c_start + this->byte_size(lhs_size);
555 }
556 else if (this->isAllocatedFromStack() && !rhs.isAllocatedFromStack())
557 {
558 swap_stack_heap(*this, rhs);
559 }
560 else if (!this->isAllocatedFromStack() && rhs.isAllocatedFromStack())
561 {
562 swap_stack_heap(rhs, *this);
563 }
564 else
565 {
566 std::swap(this->c_start, rhs.c_start);
567 std::swap(this->c_end, rhs.c_end);
568 std::swap(this->c_end_of_storage, rhs.c_end_of_storage);
569 }
570 }
571
572 void assign(size_t n, const T & x)
573 {
574 this->resize(n);
575 std::fill(begin(), end(), x);
576 }
577
578 template <typename It1, typename It2>
579 void assign(It1 from_begin, It2 from_end)
580 {
581 size_t required_capacity = from_end - from_begin;
582 if (required_capacity > this->capacity())
583 this->reserve(roundUpToPowerOfTwoOrZero(required_capacity));
584
585 size_t bytes_to_copy = this->byte_size(required_capacity);
586 memcpy(this->c_start, reinterpret_cast<const void *>(&*from_begin), bytes_to_copy);
587 this->c_end = this->c_start + bytes_to_copy;
588 }
589
590 void assign(const PODArray & from)
591 {
592 assign(from.begin(), from.end());
593 }
594
595
596 bool operator== (const PODArray & other) const
597 {
598 if (this->size() != other.size())
599 return false;
600
601 const_iterator this_it = begin();
602 const_iterator that_it = other.begin();
603
604 while (this_it != end())
605 {
606 if (*this_it != *that_it)
607 return false;
608
609 ++this_it;
610 ++that_it;
611 }
612
613 return true;
614 }
615
616 bool operator!= (const PODArray & other) const
617 {
618 return !operator==(other);
619 }
620};
621
622template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_>
623void swap(PODArray<T, initial_bytes, TAllocator, pad_right_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_> & rhs)
624{
625 lhs.swap(rhs);
626}
627#pragma GCC diagnostic pop
628
629}
630