1#pragma once
2
3#include <string.h>
4
5#ifdef NDEBUG
6 #define ALLOCATOR_ASLR 0
7#else
8 #define ALLOCATOR_ASLR 1
9#endif
10
11#include <pcg_random.hpp>
12#include <Common/thread_local_rng.h>
13
14#if !defined(__APPLE__) && !defined(__FreeBSD__)
15#include <malloc.h>
16#endif
17
18#include <cstdlib>
19#include <algorithm>
20#include <sys/mman.h>
21
22#include <Core/Defines.h>
23#ifdef THREAD_SANITIZER
24 /// Thread sanitizer does not intercept mremap. The usage of mremap will lead to false positives.
25 #define DISABLE_MREMAP 1
26#endif
27#include <common/mremap.h>
28
29#include <Common/MemoryTracker.h>
30#include <Common/Exception.h>
31#include <Common/formatReadable.h>
32
33#include <Common/Allocator_fwd.h>
34
35
36/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
37#ifndef MAP_ANONYMOUS
38#define MAP_ANONYMOUS MAP_ANON
39#endif
40
41/**
42 * Many modern allocators (for example, tcmalloc) do not do a mremap for
43 * realloc, even in case of large enough chunks of memory. Although this allows
44 * you to increase performance and reduce memory consumption during realloc.
45 * To fix this, we do mremap manually if the chunk of memory is large enough.
46 * The threshold (64 MB) is chosen quite large, since changing the address
47 * space is very slow, especially in the case of a large number of threads. We
48 * expect that the set of operations mmap/something to do/mremap can only be
49 * performed about 1000 times per second.
50 *
51 * P.S. This is also required, because tcmalloc can not allocate a chunk of
52 * memory greater than 16 GB.
53 */
54#ifdef NDEBUG
55 static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
56#else
57 /**
58 * In debug build, use small mmap threshold to reproduce more memory
59 * stomping bugs. Along with ASLR it will hopefully detect more issues than
60 * ASan. The program may fail due to the limit on number of memory mappings.
61 */
62 static constexpr size_t MMAP_THRESHOLD = 4096;
63#endif
64
65static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
66static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
67
68namespace DB
69{
70namespace ErrorCodes
71{
72 extern const int BAD_ARGUMENTS;
73 extern const int CANNOT_ALLOCATE_MEMORY;
74 extern const int CANNOT_MUNMAP;
75 extern const int CANNOT_MREMAP;
76}
77}
78
79/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
80 * Also used in hash tables.
81 * The interface is different from std::allocator
82 * - the presence of the method realloc, which for large chunks of memory uses mremap;
83 * - passing the size into the `free` method;
84 * - by the presence of the `alignment` argument;
85 * - the possibility of zeroing memory (used in hash tables);
86 * - random hint address for mmap
87 * - mmap_threshold for using mmap less or more
88 */
89template <bool clear_memory_, bool mmap_populate>
90class Allocator
91{
92public:
93 /// Allocate memory range.
94 void * alloc(size_t size, size_t alignment = 0)
95 {
96 CurrentMemoryTracker::alloc(size);
97 return allocNoTrack(size, alignment);
98 }
99
100 /// Free memory range.
101 void free(void * buf, size_t size)
102 {
103 freeNoTrack(buf, size);
104 CurrentMemoryTracker::free(size);
105 }
106
107 /** Enlarge memory range.
108 * Data from old range is moved to the beginning of new range.
109 * Address of memory range could change.
110 */
111 void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0)
112 {
113 if (old_size == new_size)
114 {
115 /// nothing to do.
116 /// BTW, it's not possible to change alignment while doing realloc.
117 }
118 else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD
119 && alignment <= MALLOC_MIN_ALIGNMENT)
120 {
121 /// Resize malloc'd memory region with no special alignment requirement.
122 CurrentMemoryTracker::realloc(old_size, new_size);
123
124 void * new_buf = ::realloc(buf, new_size);
125 if (nullptr == new_buf)
126 DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
127
128 buf = new_buf;
129 if constexpr (clear_memory)
130 if (new_size > old_size)
131 memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
132 }
133 else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
134 {
135 /// Resize mmap'd memory region.
136 CurrentMemoryTracker::realloc(old_size, new_size);
137
138 // On apple and freebsd self-implemented mremap used (common/mremap.h)
139 buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE,
140 PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
141 if (MAP_FAILED == buf)
142 DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_MREMAP);
143
144 /// No need for zero-fill, because mmap guarantees it.
145 }
146 else if (new_size < MMAP_THRESHOLD)
147 {
148 /// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
149 CurrentMemoryTracker::realloc(old_size, new_size);
150
151 void * new_buf = allocNoTrack(new_size, alignment);
152 memcpy(new_buf, buf, std::min(old_size, new_size));
153 freeNoTrack(buf, old_size);
154 buf = new_buf;
155 }
156 else
157 {
158 /// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
159
160 void * new_buf = alloc(new_size, alignment);
161 memcpy(new_buf, buf, std::min(old_size, new_size));
162 free(buf, old_size);
163 buf = new_buf;
164 }
165
166 return buf;
167 }
168
169protected:
170 static constexpr size_t getStackThreshold()
171 {
172 return 0;
173 }
174
175 static constexpr bool clear_memory = clear_memory_;
176
177 // Freshly mmapped pages are copy-on-write references to a global zero page.
178 // On the first write, a page fault occurs, and an actual writable page is
179 // allocated. If we are going to use this memory soon, such as when resizing
180 // hash tables, it makes sense to pre-fault the pages by passing
181 // MAP_POPULATE to mmap(). This takes some time, but should be faster
182 // overall than having a hot loop interrupted by page faults.
183 // It is only supported on Linux.
184 static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
185#if defined(OS_LINUX)
186 | (mmap_populate ? MAP_POPULATE : 0)
187#endif
188 ;
189
190private:
191 void * allocNoTrack(size_t size, size_t alignment)
192 {
193 void * buf;
194
195 if (size >= MMAP_THRESHOLD)
196 {
197 if (alignment > MMAP_MIN_ALIGNMENT)
198 throw DB::Exception("Too large alignment " + formatReadableSizeWithBinarySuffix(alignment) + ": more than page size when allocating "
199 + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::BAD_ARGUMENTS);
200
201 buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE,
202 mmap_flags, -1, 0);
203 if (MAP_FAILED == buf)
204 DB::throwFromErrno("Allocator: Cannot mmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
205
206 /// No need for zero-fill, because mmap guarantees it.
207 }
208 else
209 {
210 if (alignment <= MALLOC_MIN_ALIGNMENT)
211 {
212 if constexpr (clear_memory)
213 buf = ::calloc(size, 1);
214 else
215 buf = ::malloc(size);
216
217 if (nullptr == buf)
218 DB::throwFromErrno("Allocator: Cannot malloc " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
219 }
220 else
221 {
222 buf = nullptr;
223 int res = posix_memalign(&buf, alignment, size);
224
225 if (0 != res)
226 DB::throwFromErrno("Cannot allocate memory (posix_memalign) " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
227
228 if constexpr (clear_memory)
229 memset(buf, 0, size);
230 }
231 }
232 return buf;
233 }
234
235 void freeNoTrack(void * buf, size_t size)
236 {
237 if (size >= MMAP_THRESHOLD)
238 {
239 if (0 != munmap(buf, size))
240 DB::throwFromErrno("Allocator: Cannot munmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_MUNMAP);
241 }
242 else
243 {
244 ::free(buf);
245 }
246 }
247
248#ifndef NDEBUG
249 /// In debug builds, request mmap() at random addresses (a kind of ASLR), to
250 /// reproduce more memory stomping bugs. Note that Linux doesn't do it by
251 /// default. This may lead to worse TLB performance.
252 void * getMmapHint()
253 {
254 return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng));
255 }
256#else
257 void * getMmapHint()
258 {
259 return nullptr;
260 }
261#endif
262};
263
264/** When using AllocatorWithStackMemory, located on the stack,
265 * GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
266 * In fact, the combination of conditions inside AllocatorWithStackMemory does not allow this.
267 */
268#if !__clang__
269#pragma GCC diagnostic push
270#pragma GCC diagnostic ignored "-Wfree-nonheap-object"
271#endif
272
273/** Allocator with optimization to place small memory ranges in automatic memory.
274 */
275template <typename Base, size_t N, size_t Alignment>
276class AllocatorWithStackMemory : private Base
277{
278private:
279 alignas(Alignment) char stack_memory[N];
280
281public:
282 /// Do not use boost::noncopyable to avoid the warning about direct base
283 /// being inaccessible due to ambiguity, when derived classes are also
284 /// noncopiable (-Winaccessible-base).
285 AllocatorWithStackMemory(const AllocatorWithStackMemory&) = delete;
286 AllocatorWithStackMemory & operator = (const AllocatorWithStackMemory&) = delete;
287 AllocatorWithStackMemory() = default;
288 ~AllocatorWithStackMemory() = default;
289
290 void * alloc(size_t size)
291 {
292 if (size <= N)
293 {
294 if constexpr (Base::clear_memory)
295 memset(stack_memory, 0, N);
296 return stack_memory;
297 }
298
299 return Base::alloc(size, Alignment);
300 }
301
302 void free(void * buf, size_t size)
303 {
304 if (size > N)
305 Base::free(buf, size);
306 }
307
308 void * realloc(void * buf, size_t old_size, size_t new_size)
309 {
310 /// Was in stack_memory, will remain there.
311 if (new_size <= N)
312 return buf;
313
314 /// Already was big enough to not fit in stack_memory.
315 if (old_size > N)
316 return Base::realloc(buf, old_size, new_size, Alignment);
317
318 /// Was in stack memory, but now will not fit there.
319 void * new_buf = Base::alloc(new_size, Alignment);
320 memcpy(new_buf, buf, old_size);
321 return new_buf;
322 }
323
324protected:
325 static constexpr size_t getStackThreshold()
326 {
327 return N;
328 }
329};
330
331
332#if !__clang__
333#pragma GCC diagnostic pop
334#endif
335