1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | #ident "$Id$" |
3 | /*====== |
4 | This file is part of PerconaFT. |
5 | |
6 | |
7 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
8 | |
9 | PerconaFT is free software: you can redistribute it and/or modify |
10 | it under the terms of the GNU General Public License, version 2, |
11 | as published by the Free Software Foundation. |
12 | |
13 | PerconaFT is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | GNU General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
20 | |
21 | ---------------------------------------- |
22 | |
23 | PerconaFT is free software: you can redistribute it and/or modify |
24 | it under the terms of the GNU Affero General Public License, version 3, |
25 | as published by the Free Software Foundation. |
26 | |
27 | PerconaFT is distributed in the hope that it will be useful, |
28 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
29 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
30 | GNU Affero General Public License for more details. |
31 | |
32 | You should have received a copy of the GNU Affero General Public License |
33 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
34 | ======= */ |
35 | |
36 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
37 | |
38 | #pragma once |
39 | |
40 | /** |
41 | * These functions are extracted from Facebook's folly library, which |
42 | * integrates well with jemalloc. See |
43 | * https://github.com/facebook/folly/blob/master/folly/Malloc.h |
44 | */ |
45 | |
46 | #include <algorithm> |
47 | #include <cassert> |
48 | #include <cstdlib> |
49 | |
50 | #ifdef HAVE_BITS_FUNCTEXCEPT_H |
51 | |
52 | # include <bits/functexcept.h> |
53 | |
54 | #else |
55 | |
56 | # include <stdexcept> |
57 | |
58 | namespace std { |
59 | |
60 | void __throw_bad_alloc(); |
61 | |
62 | } |
63 | |
64 | #endif |
65 | |
66 | /** |
67 | * Declare *allocx() and mallctl() as weak symbols. These will be provided by |
68 | * jemalloc if we are using jemalloc, or will be NULL if we are using another |
69 | * malloc implementation. |
70 | */ |
71 | extern "C" void* mallocx(size_t, int) |
72 | __attribute__((__weak__)); |
73 | extern "C" void* rallocx(void*, size_t, int) |
74 | __attribute__((__weak__)); |
75 | extern "C" size_t xallocx(void*, size_t, size_t, int) |
76 | __attribute__((__weak__)); |
77 | extern "C" size_t sallocx(const void*, int) |
78 | __attribute__((__weak__)); |
79 | extern "C" void dallocx(void*, int) |
80 | __attribute__((__weak__)); |
81 | extern "C" size_t nallocx(size_t, int) |
82 | __attribute__((__weak__)); |
83 | extern "C" int mallctl(const char*, void*, size_t*, void*, size_t) |
84 | __attribute__((__weak__)); |
85 | |
86 | namespace malloc_utils { |
87 | |
88 | bool usingJEMallocSlow(); |
89 | |
90 | /** |
91 | * Determine if we are using jemalloc or not. |
92 | */ |
93 | inline bool usingJEMalloc() { |
94 | // Checking for rallocx != NULL is not sufficient; we may be in a |
95 | // dlopen()ed module that depends on libjemalloc, so rallocx is |
96 | // resolved, but the main program might be using a different |
97 | // memory allocator. Look at the implementation of |
98 | // usingJEMallocSlow() for the (hacky) details. |
99 | static const bool result = usingJEMallocSlow(); |
100 | return result; |
101 | } |
102 | |
103 | /** |
104 | * For jemalloc's size classes, see |
105 | * http://www.canonware.com/download/jemalloc/jemalloc-latest/doc/jemalloc.html |
106 | */ |
107 | inline size_t goodMallocSize(size_t minSize) noexcept { |
108 | if (!usingJEMalloc()) { |
109 | // Not using jemalloc - no smarts |
110 | return minSize; |
111 | } |
112 | size_t goodSize; |
113 | if (minSize <= 64) { |
114 | // Choose smallest allocation to be 64 bytes - no tripping |
115 | // over cache line boundaries, and small string optimization |
116 | // takes care of short strings anyway. |
117 | goodSize = 64; |
118 | } else if (minSize <= 512) { |
119 | // Round up to the next multiple of 64; we don't want to trip |
120 | // over cache line boundaries. |
121 | goodSize = (minSize + 63) & ~size_t(63); |
122 | } else if (minSize <= 3584) { |
123 | // Round up to the next multiple of 256. For some size |
124 | // classes jemalloc will additionally round up to the nearest |
125 | // multiple of 512, hence the nallocx() call. |
126 | goodSize = nallocx((minSize + 255) & ~size_t(255), 0); |
127 | } else if (minSize <= 4072 * 1024) { |
128 | // Round up to the next multiple of 4KB |
129 | goodSize = (minSize + 4095) & ~size_t(4095); |
130 | } else { |
131 | // Holy Moly |
132 | // Round up to the next multiple of 4MB |
133 | goodSize = (minSize + 4194303) & ~size_t(4194303); |
134 | } |
135 | assert(nallocx(goodSize, 0) == goodSize); |
136 | return goodSize; |
137 | } |
138 | |
139 | static const size_t jemallocMinInPlaceExpandable = 4096; |
140 | |
141 | /** |
142 | * Trivial wrappers around malloc, calloc, realloc that check for |
143 | * allocation failure and throw std::bad_alloc in that case. |
144 | */ |
145 | inline void* checkedMalloc(size_t size) { |
146 | void* p = malloc(size); |
147 | if (!p) std::__throw_bad_alloc(); |
148 | return p; |
149 | } |
150 | |
151 | inline void* checkedCalloc(size_t n, size_t size) { |
152 | void* p = calloc(n, size); |
153 | if (!p) std::__throw_bad_alloc(); |
154 | return p; |
155 | } |
156 | |
157 | inline void* checkedRealloc(void* ptr, size_t size) { |
158 | void* p = realloc(ptr, size); |
159 | if (!p) std::__throw_bad_alloc(); |
160 | return p; |
161 | } |
162 | |
163 | /** |
164 | * This function tries to reallocate a buffer of which only the first |
165 | * currentSize bytes are used. The problem with using realloc is that |
166 | * if currentSize is relatively small _and_ if realloc decides it |
167 | * needs to move the memory chunk to a new buffer, then realloc ends |
168 | * up copying data that is not used. It's impossible to hook into |
169 | * GNU's malloc to figure whether expansion will occur in-place or as |
170 | * a malloc-copy-free troika. (If an expand_in_place primitive would |
171 | * be available, smartRealloc would use it.) As things stand, this |
172 | * routine just tries to call realloc() (thus benefitting of potential |
173 | * copy-free coalescing) unless there's too much slack memory. |
174 | */ |
175 | inline void* smartRealloc(void* p, |
176 | const size_t currentSize, |
177 | const size_t currentCapacity, |
178 | const size_t newCapacity, |
179 | size_t &realNewCapacity) { |
180 | assert(p); |
181 | assert(currentSize <= currentCapacity && |
182 | currentCapacity < newCapacity); |
183 | |
184 | if (usingJEMalloc()) { |
185 | // using jemalloc's API. Don't forget that jemalloc can never |
186 | // grow in place blocks smaller than 4096 bytes. |
187 | // |
188 | // NB: newCapacity may not be precisely equal to a jemalloc |
189 | // size class, i.e. newCapacity is not guaranteed to be the |
190 | // result of a goodMallocSize() call, therefore xallocx() may |
191 | // return more than newCapacity bytes of space. Use >= rather |
192 | // than == to check whether xallocx() successfully expanded in |
193 | // place. |
194 | size_t realNewCapacity_; |
195 | if (currentCapacity >= jemallocMinInPlaceExpandable && |
196 | (realNewCapacity_ = xallocx(p, newCapacity, 0, 0)) >= newCapacity) { |
197 | // Managed to expand in place |
198 | realNewCapacity = realNewCapacity_; |
199 | return p; |
200 | } |
201 | // Cannot expand; must move |
202 | char * const result = static_cast<char *>(checkedMalloc(newCapacity)); |
203 | char *cp = static_cast<char *>(p); |
204 | std::copy(cp, cp + currentSize, result); |
205 | free(p); |
206 | realNewCapacity = newCapacity; |
207 | return result; |
208 | } |
209 | |
210 | // No jemalloc no honey |
211 | auto const slack = currentCapacity - currentSize; |
212 | if (slack * 2 > currentSize) { |
213 | // Too much slack, malloc-copy-free cycle: |
214 | char * const result = static_cast<char *>(checkedMalloc(newCapacity)); |
215 | char *cp = static_cast<char *>(p); |
216 | std::copy(cp, cp + currentSize, result); |
217 | free(p); |
218 | realNewCapacity = newCapacity; |
219 | return result; |
220 | } |
221 | // If there's not too much slack, we realloc in hope of coalescing |
222 | realNewCapacity = newCapacity; |
223 | return checkedRealloc(p, newCapacity); |
224 | } |
225 | |
226 | } // namespace malloc_utils |
227 | |