| 1 | #pragma once |
| 2 | |
| 3 | #include <string.h> |
| 4 | |
| 5 | #ifdef NDEBUG |
| 6 | #define ALLOCATOR_ASLR 0 |
| 7 | #else |
| 8 | #define ALLOCATOR_ASLR 1 |
| 9 | #endif |
| 10 | |
| 11 | #include <pcg_random.hpp> |
| 12 | #include <Common/thread_local_rng.h> |
| 13 | |
| 14 | #if !defined(__APPLE__) && !defined(__FreeBSD__) |
| 15 | #include <malloc.h> |
| 16 | #endif |
| 17 | |
| 18 | #include <cstdlib> |
| 19 | #include <algorithm> |
| 20 | #include <sys/mman.h> |
| 21 | |
| 22 | #include <Core/Defines.h> |
| 23 | #ifdef THREAD_SANITIZER |
| 24 | /// Thread sanitizer does not intercept mremap. The usage of mremap will lead to false positives. |
| 25 | #define DISABLE_MREMAP 1 |
| 26 | #endif |
| 27 | #include <common/mremap.h> |
| 28 | |
| 29 | #include <Common/MemoryTracker.h> |
| 30 | #include <Common/Exception.h> |
| 31 | #include <Common/formatReadable.h> |
| 32 | |
| 33 | #include <Common/Allocator_fwd.h> |
| 34 | |
| 35 | |
| 36 | /// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS |
| 37 | #ifndef MAP_ANONYMOUS |
| 38 | #define MAP_ANONYMOUS MAP_ANON |
| 39 | #endif |
| 40 | |
| 41 | /** |
| 42 | * Many modern allocators (for example, tcmalloc) do not do a mremap for |
| 43 | * realloc, even in case of large enough chunks of memory. Although this allows |
| 44 | * you to increase performance and reduce memory consumption during realloc. |
| 45 | * To fix this, we do mremap manually if the chunk of memory is large enough. |
| 46 | * The threshold (64 MB) is chosen quite large, since changing the address |
| 47 | * space is very slow, especially in the case of a large number of threads. We |
| 48 | * expect that the set of operations mmap/something to do/mremap can only be |
| 49 | * performed about 1000 times per second. |
| 50 | * |
| 51 | * P.S. This is also required, because tcmalloc can not allocate a chunk of |
| 52 | * memory greater than 16 GB. |
| 53 | */ |
| 54 | #ifdef NDEBUG |
| 55 | static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20); |
| 56 | #else |
| 57 | /** |
| 58 | * In debug build, use small mmap threshold to reproduce more memory |
| 59 | * stomping bugs. Along with ASLR it will hopefully detect more issues than |
| 60 | * ASan. The program may fail due to the limit on number of memory mappings. |
| 61 | */ |
| 62 | static constexpr size_t MMAP_THRESHOLD = 4096; |
| 63 | #endif |
| 64 | |
| 65 | static constexpr size_t MMAP_MIN_ALIGNMENT = 4096; |
| 66 | static constexpr size_t MALLOC_MIN_ALIGNMENT = 8; |
| 67 | |
| 68 | namespace DB |
| 69 | { |
| 70 | namespace ErrorCodes |
| 71 | { |
| 72 | extern const int BAD_ARGUMENTS; |
| 73 | extern const int CANNOT_ALLOCATE_MEMORY; |
| 74 | extern const int CANNOT_MUNMAP; |
| 75 | extern const int CANNOT_MREMAP; |
| 76 | } |
| 77 | } |
| 78 | |
| 79 | /** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena. |
| 80 | * Also used in hash tables. |
| 81 | * The interface is different from std::allocator |
| 82 | * - the presence of the method realloc, which for large chunks of memory uses mremap; |
| 83 | * - passing the size into the `free` method; |
| 84 | * - by the presence of the `alignment` argument; |
| 85 | * - the possibility of zeroing memory (used in hash tables); |
| 86 | * - random hint address for mmap |
| 87 | * - mmap_threshold for using mmap less or more |
| 88 | */ |
| 89 | template <bool clear_memory_, bool mmap_populate> |
| 90 | class Allocator |
| 91 | { |
| 92 | public: |
| 93 | /// Allocate memory range. |
| 94 | void * alloc(size_t size, size_t alignment = 0) |
| 95 | { |
| 96 | CurrentMemoryTracker::alloc(size); |
| 97 | return allocNoTrack(size, alignment); |
| 98 | } |
| 99 | |
| 100 | /// Free memory range. |
| 101 | void free(void * buf, size_t size) |
| 102 | { |
| 103 | freeNoTrack(buf, size); |
| 104 | CurrentMemoryTracker::free(size); |
| 105 | } |
| 106 | |
| 107 | /** Enlarge memory range. |
| 108 | * Data from old range is moved to the beginning of new range. |
| 109 | * Address of memory range could change. |
| 110 | */ |
| 111 | void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0) |
| 112 | { |
| 113 | if (old_size == new_size) |
| 114 | { |
| 115 | /// nothing to do. |
| 116 | /// BTW, it's not possible to change alignment while doing realloc. |
| 117 | } |
| 118 | else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD |
| 119 | && alignment <= MALLOC_MIN_ALIGNMENT) |
| 120 | { |
| 121 | /// Resize malloc'd memory region with no special alignment requirement. |
| 122 | CurrentMemoryTracker::realloc(old_size, new_size); |
| 123 | |
| 124 | void * new_buf = ::realloc(buf, new_size); |
| 125 | if (nullptr == new_buf) |
| 126 | DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + "." , DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); |
| 127 | |
| 128 | buf = new_buf; |
| 129 | if constexpr (clear_memory) |
| 130 | if (new_size > old_size) |
| 131 | memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size); |
| 132 | } |
| 133 | else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD) |
| 134 | { |
| 135 | /// Resize mmap'd memory region. |
| 136 | CurrentMemoryTracker::realloc(old_size, new_size); |
| 137 | |
| 138 | // On apple and freebsd self-implemented mremap used (common/mremap.h) |
| 139 | buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, |
| 140 | PROT_READ | PROT_WRITE, mmap_flags, -1, 0); |
| 141 | if (MAP_FAILED == buf) |
| 142 | DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + "." , DB::ErrorCodes::CANNOT_MREMAP); |
| 143 | |
| 144 | /// No need for zero-fill, because mmap guarantees it. |
| 145 | } |
| 146 | else if (new_size < MMAP_THRESHOLD) |
| 147 | { |
| 148 | /// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once. |
| 149 | CurrentMemoryTracker::realloc(old_size, new_size); |
| 150 | |
| 151 | void * new_buf = allocNoTrack(new_size, alignment); |
| 152 | memcpy(new_buf, buf, std::min(old_size, new_size)); |
| 153 | freeNoTrack(buf, old_size); |
| 154 | buf = new_buf; |
| 155 | } |
| 156 | else |
| 157 | { |
| 158 | /// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods. |
| 159 | |
| 160 | void * new_buf = alloc(new_size, alignment); |
| 161 | memcpy(new_buf, buf, std::min(old_size, new_size)); |
| 162 | free(buf, old_size); |
| 163 | buf = new_buf; |
| 164 | } |
| 165 | |
| 166 | return buf; |
| 167 | } |
| 168 | |
| 169 | protected: |
| 170 | static constexpr size_t getStackThreshold() |
| 171 | { |
| 172 | return 0; |
| 173 | } |
| 174 | |
| 175 | static constexpr bool clear_memory = clear_memory_; |
| 176 | |
| 177 | // Freshly mmapped pages are copy-on-write references to a global zero page. |
| 178 | // On the first write, a page fault occurs, and an actual writable page is |
| 179 | // allocated. If we are going to use this memory soon, such as when resizing |
| 180 | // hash tables, it makes sense to pre-fault the pages by passing |
| 181 | // MAP_POPULATE to mmap(). This takes some time, but should be faster |
| 182 | // overall than having a hot loop interrupted by page faults. |
| 183 | // It is only supported on Linux. |
| 184 | static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS |
| 185 | #if defined(OS_LINUX) |
| 186 | | (mmap_populate ? MAP_POPULATE : 0) |
| 187 | #endif |
| 188 | ; |
| 189 | |
| 190 | private: |
| 191 | void * allocNoTrack(size_t size, size_t alignment) |
| 192 | { |
| 193 | void * buf; |
| 194 | |
| 195 | if (size >= MMAP_THRESHOLD) |
| 196 | { |
| 197 | if (alignment > MMAP_MIN_ALIGNMENT) |
| 198 | throw DB::Exception("Too large alignment " + formatReadableSizeWithBinarySuffix(alignment) + ": more than page size when allocating " |
| 199 | + formatReadableSizeWithBinarySuffix(size) + "." , DB::ErrorCodes::BAD_ARGUMENTS); |
| 200 | |
| 201 | buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE, |
| 202 | mmap_flags, -1, 0); |
| 203 | if (MAP_FAILED == buf) |
| 204 | DB::throwFromErrno("Allocator: Cannot mmap " + formatReadableSizeWithBinarySuffix(size) + "." , DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); |
| 205 | |
| 206 | /// No need for zero-fill, because mmap guarantees it. |
| 207 | } |
| 208 | else |
| 209 | { |
| 210 | if (alignment <= MALLOC_MIN_ALIGNMENT) |
| 211 | { |
| 212 | if constexpr (clear_memory) |
| 213 | buf = ::calloc(size, 1); |
| 214 | else |
| 215 | buf = ::malloc(size); |
| 216 | |
| 217 | if (nullptr == buf) |
| 218 | DB::throwFromErrno("Allocator: Cannot malloc " + formatReadableSizeWithBinarySuffix(size) + "." , DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); |
| 219 | } |
| 220 | else |
| 221 | { |
| 222 | buf = nullptr; |
| 223 | int res = posix_memalign(&buf, alignment, size); |
| 224 | |
| 225 | if (0 != res) |
| 226 | DB::throwFromErrno("Cannot allocate memory (posix_memalign) " + formatReadableSizeWithBinarySuffix(size) + "." , DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res); |
| 227 | |
| 228 | if constexpr (clear_memory) |
| 229 | memset(buf, 0, size); |
| 230 | } |
| 231 | } |
| 232 | return buf; |
| 233 | } |
| 234 | |
| 235 | void freeNoTrack(void * buf, size_t size) |
| 236 | { |
| 237 | if (size >= MMAP_THRESHOLD) |
| 238 | { |
| 239 | if (0 != munmap(buf, size)) |
| 240 | DB::throwFromErrno("Allocator: Cannot munmap " + formatReadableSizeWithBinarySuffix(size) + "." , DB::ErrorCodes::CANNOT_MUNMAP); |
| 241 | } |
| 242 | else |
| 243 | { |
| 244 | ::free(buf); |
| 245 | } |
| 246 | } |
| 247 | |
| 248 | #ifndef NDEBUG |
| 249 | /// In debug builds, request mmap() at random addresses (a kind of ASLR), to |
| 250 | /// reproduce more memory stomping bugs. Note that Linux doesn't do it by |
| 251 | /// default. This may lead to worse TLB performance. |
| 252 | void * getMmapHint() |
| 253 | { |
| 254 | return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng)); |
| 255 | } |
| 256 | #else |
| 257 | void * getMmapHint() |
| 258 | { |
| 259 | return nullptr; |
| 260 | } |
| 261 | #endif |
| 262 | }; |
| 263 | |
| 264 | /** When using AllocatorWithStackMemory, located on the stack, |
| 265 | * GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack. |
| 266 | * In fact, the combination of conditions inside AllocatorWithStackMemory does not allow this. |
| 267 | */ |
| 268 | #if !__clang__ |
| 269 | #pragma GCC diagnostic push |
| 270 | #pragma GCC diagnostic ignored "-Wfree-nonheap-object" |
| 271 | #endif |
| 272 | |
| 273 | /** Allocator with optimization to place small memory ranges in automatic memory. |
| 274 | */ |
| 275 | template <typename Base, size_t N, size_t Alignment> |
| 276 | class AllocatorWithStackMemory : private Base |
| 277 | { |
| 278 | private: |
| 279 | alignas(Alignment) char stack_memory[N]; |
| 280 | |
| 281 | public: |
| 282 | /// Do not use boost::noncopyable to avoid the warning about direct base |
| 283 | /// being inaccessible due to ambiguity, when derived classes are also |
| 284 | /// noncopiable (-Winaccessible-base). |
| 285 | AllocatorWithStackMemory(const AllocatorWithStackMemory&) = delete; |
| 286 | AllocatorWithStackMemory & operator = (const AllocatorWithStackMemory&) = delete; |
| 287 | AllocatorWithStackMemory() = default; |
| 288 | ~AllocatorWithStackMemory() = default; |
| 289 | |
| 290 | void * alloc(size_t size) |
| 291 | { |
| 292 | if (size <= N) |
| 293 | { |
| 294 | if constexpr (Base::clear_memory) |
| 295 | memset(stack_memory, 0, N); |
| 296 | return stack_memory; |
| 297 | } |
| 298 | |
| 299 | return Base::alloc(size, Alignment); |
| 300 | } |
| 301 | |
| 302 | void free(void * buf, size_t size) |
| 303 | { |
| 304 | if (size > N) |
| 305 | Base::free(buf, size); |
| 306 | } |
| 307 | |
| 308 | void * realloc(void * buf, size_t old_size, size_t new_size) |
| 309 | { |
| 310 | /// Was in stack_memory, will remain there. |
| 311 | if (new_size <= N) |
| 312 | return buf; |
| 313 | |
| 314 | /// Already was big enough to not fit in stack_memory. |
| 315 | if (old_size > N) |
| 316 | return Base::realloc(buf, old_size, new_size, Alignment); |
| 317 | |
| 318 | /// Was in stack memory, but now will not fit there. |
| 319 | void * new_buf = Base::alloc(new_size, Alignment); |
| 320 | memcpy(new_buf, buf, old_size); |
| 321 | return new_buf; |
| 322 | } |
| 323 | |
| 324 | protected: |
| 325 | static constexpr size_t getStackThreshold() |
| 326 | { |
| 327 | return N; |
| 328 | } |
| 329 | }; |
| 330 | |
| 331 | |
| 332 | #if !__clang__ |
| 333 | #pragma GCC diagnostic pop |
| 334 | #endif |
| 335 | |