1 | /* ---------------------------------------------------------------------------- |
2 | Copyright (c) 2019, Microsoft Research, Daan Leijen |
3 | This is free software; you can redistribute it and/or modify it under the |
4 | terms of the MIT license. A copy of the license can be found in the file |
5 | "LICENSE" at the root of this distribution. |
6 | -----------------------------------------------------------------------------*/ |
7 | |
8 | /* ---------------------------------------------------------------------------- |
9 | This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) |
10 | and the segment and huge object allocation by mimalloc. There may be multiple |
11 | implementations of this (one could be the identity going directly to the OS, |
12 | another could be a simple cache etc), but the current one uses large "regions". |
13 | In contrast to the rest of mimalloc, the "regions" are shared between threads and |
14 | need to be accessed using atomic operations. |
15 | We need this memory layer between the raw OS calls because of: |
16 | 1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order |
17 | to reuse memory effectively. |
18 | 2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of |
19 | an OS allocation/free is still (much) too expensive relative to the accesses in that |
20 | object :-( (`malloc-large` tests this). This means we need a cheaper way to |
21 | reuse memory. |
22 | 3. This layer can help with a NUMA aware allocation in the future. |
23 | |
24 | Possible issues: |
25 | - (2) can potentially be addressed too with a small cache per thread which is much |
26 | simpler. Generally though that requires shrinking of huge pages, and may overuse |
27 | memory per thread. (and is not compatible with `sbrk`). |
28 | - Since the current regions are per-process, we need atomic operations to |
29 | claim blocks which may be contended |
30 | - In the worst case, we need to search the whole region map (16KiB for 256GiB) |
31 | linearly. At what point will direct OS calls be faster? Is there a way to |
32 | do this better without adding too much complexity? |
33 | -----------------------------------------------------------------------------*/ |
34 | #include "mimalloc.h" |
35 | #include "mimalloc-internal.h" |
36 | #include "mimalloc-atomic.h" |
37 | |
38 | #include <string.h> // memset |
39 | |
40 | // Internal raw OS interface |
41 | size_t _mi_os_large_page_size(); |
42 | bool _mi_os_protect(void* addr, size_t size); |
43 | bool _mi_os_unprotect(void* addr, size_t size); |
44 | bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); |
45 | bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); |
46 | bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); |
47 | bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); |
48 | void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); |
49 | void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); |
50 | void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); |
51 | bool _mi_os_is_huge_reserved(void* p); |
52 | |
53 | // Constants |
54 | #if (MI_INTPTR_SIZE==8) |
55 | #define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map |
56 | #elif (MI_INTPTR_SIZE==4) |
57 | #define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map |
58 | #else |
59 | #error "define the maximum heap space allowed for regions on this platform" |
60 | #endif |
61 | |
62 | #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE |
63 | |
64 | #define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8) |
65 | #define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS) |
66 | #define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB |
67 | #define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) |
68 | #define MI_REGION_MAP_FULL UINTPTR_MAX |
69 | |
70 | |
71 | typedef uintptr_t mi_region_info_t; |
72 | |
73 | static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) { |
74 | return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0)); |
75 | } |
76 | |
77 | static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) { |
78 | if (is_large) *is_large = ((info&0x02) != 0); |
79 | if (is_committed) *is_committed = ((info&0x01) != 0); |
80 | return (void*)(info & ~0x03); |
81 | } |
82 | |
83 | |
84 | // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with |
85 | // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. |
86 | typedef struct mem_region_s { |
87 | volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block |
88 | volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags |
89 | volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd |
90 | } mem_region_t; |
91 | |
92 | |
93 | // The region map; 16KiB for a 256GiB HEAP_REGION_MAX |
94 | // TODO: in the future, maintain a map per NUMA node for numa aware allocation |
95 | static mem_region_t regions[MI_REGION_MAX]; |
96 | |
97 | static volatile _Atomic(uintptr_t) regions_count; // = 0; // allocated regions |
98 | |
99 | |
100 | /* ---------------------------------------------------------------------------- |
101 | Utility functions |
102 | -----------------------------------------------------------------------------*/ |
103 | |
104 | // Blocks (of 4MiB) needed for the given size. |
105 | static size_t mi_region_block_count(size_t size) { |
106 | mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); |
107 | return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE; |
108 | } |
109 | |
110 | // The bit mask for a given number of blocks at a specified bit index. |
111 | static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) { |
112 | mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS); |
113 | return ((((uintptr_t)1 << blocks) - 1) << bitidx); |
114 | } |
115 | |
116 | // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. |
117 | static size_t mi_good_commit_size(size_t size) { |
118 | if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; |
119 | return _mi_align_up(size, _mi_os_large_page_size()); |
120 | } |
121 | |
122 | // Return if a pointer points into a region reserved by us. |
123 | bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { |
124 | if (p==NULL) return false; |
125 | size_t count = mi_atomic_read_relaxed(®ions_count); |
126 | for (size_t i = 0; i < count; i++) { |
127 | uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(®ions[i].info), NULL, NULL); |
128 | if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; |
129 | } |
130 | return false; |
131 | } |
132 | |
133 | |
134 | /* ---------------------------------------------------------------------------- |
135 | Commit from a region |
136 | -----------------------------------------------------------------------------*/ |
137 | |
138 | // Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`. |
139 | // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written |
140 | // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. |
141 | // (not being able to claim is not considered an error so check for `p != NULL` afterwards). |
142 | static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, |
143 | size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) |
144 | { |
145 | size_t mask = mi_region_block_mask(blocks,bitidx); |
146 | mi_assert_internal(mask != 0); |
147 | mi_assert_internal((mask & mi_atomic_read_relaxed(®ion->map)) == mask); |
148 | mi_assert_internal(®ions[idx] == region); |
149 | |
150 | // ensure the region is reserved |
151 | mi_region_info_t info = mi_atomic_read(®ion->info); |
152 | if (info == 0) |
153 | { |
154 | bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); |
155 | bool region_large = *allow_large; |
156 | void* start = NULL; |
157 | if (region_large) { |
158 | start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN); |
159 | if (start != NULL) { region_commit = true; } |
160 | } |
161 | if (start == NULL) { |
162 | start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, ®ion_large, tld); |
163 | } |
164 | mi_assert_internal(!(region_large && !*allow_large)); |
165 | |
166 | if (start == NULL) { |
167 | // failure to allocate from the OS! unclaim the blocks and fail |
168 | size_t map; |
169 | do { |
170 | map = mi_atomic_read_relaxed(®ion->map); |
171 | } while (!mi_atomic_cas_weak(®ion->map, map & ~mask, map)); |
172 | return false; |
173 | } |
174 | |
175 | // set the newly allocated region |
176 | info = mi_region_info_create(start,region_large,region_commit); |
177 | if (mi_atomic_cas_strong(®ion->info, info, 0)) { |
178 | // update the region count |
179 | mi_atomic_increment(®ions_count); |
180 | } |
181 | else { |
182 | // failed, another thread allocated just before us! |
183 | // we assign it to a later slot instead (up to 4 tries). |
184 | for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { |
185 | if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { |
186 | mi_atomic_increment(®ions_count); |
187 | start = NULL; |
188 | break; |
189 | } |
190 | } |
191 | if (start != NULL) { |
192 | // free it if we didn't succeed to save it to some other region |
193 | _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); |
194 | } |
195 | // and continue with the memory at our index |
196 | info = mi_atomic_read(®ion->info); |
197 | } |
198 | } |
199 | mi_assert_internal(info == mi_atomic_read(®ion->info)); |
200 | mi_assert_internal(info != 0); |
201 | |
202 | // Commit the blocks to memory |
203 | bool region_is_committed = false; |
204 | bool region_is_large = false; |
205 | void* start = mi_region_info_read(info,®ion_is_large,®ion_is_committed); |
206 | mi_assert_internal(!(region_is_large && !*allow_large)); |
207 | mi_assert_internal(start!=NULL); |
208 | |
209 | // set dirty bits |
210 | uintptr_t m; |
211 | do { |
212 | m = mi_atomic_read(®ion->dirty_mask); |
213 | } while (!mi_atomic_cas_weak(®ion->dirty_mask, m | mask, m)); |
214 | *is_zero = ((m & mask) == 0); // no dirty bit set in our claimed range? |
215 | |
216 | void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); |
217 | if (*commit && !region_is_committed) { |
218 | // ensure commit |
219 | bool commit_zero = false; |
220 | _mi_os_commit(blocks_start, mi_good_commit_size(size), &commit_zero, tld->stats); // only commit needed size (unless using large OS pages) |
221 | if (commit_zero) *is_zero = true; |
222 | } |
223 | else if (!*commit && region_is_committed) { |
224 | // but even when no commit is requested, we might have committed anyway (in a huge OS page for example) |
225 | *commit = true; |
226 | } |
227 | |
228 | // and return the allocation |
229 | mi_assert_internal(blocks_start != NULL); |
230 | *allow_large = region_is_large; |
231 | *p = blocks_start; |
232 | *id = (idx*MI_REGION_MAP_BITS) + bitidx; |
233 | return true; |
234 | } |
235 | |
236 | // Use bit scan forward to quickly find the first zero bit if it is available |
237 | #if defined(_MSC_VER) |
238 | #define MI_HAVE_BITSCAN |
239 | #include <intrin.h> |
240 | static inline size_t mi_bsf(uintptr_t x) { |
241 | if (x==0) return 8*MI_INTPTR_SIZE; |
242 | DWORD idx; |
243 | #if (MI_INTPTR_SIZE==8) |
244 | _BitScanForward64(&idx, x); |
245 | #else |
246 | _BitScanForward(&idx, x); |
247 | #endif |
248 | return idx; |
249 | } |
250 | static inline size_t mi_bsr(uintptr_t x) { |
251 | if (x==0) return 8*MI_INTPTR_SIZE; |
252 | DWORD idx; |
253 | #if (MI_INTPTR_SIZE==8) |
254 | _BitScanReverse64(&idx, x); |
255 | #else |
256 | _BitScanReverse(&idx, x); |
257 | #endif |
258 | return idx; |
259 | } |
260 | #elif defined(__GNUC__) || defined(__clang__) |
261 | #define MI_HAVE_BITSCAN |
262 | static inline size_t mi_bsf(uintptr_t x) { |
263 | return (x==0 ? 8*MI_INTPTR_SIZE : __builtin_ctzl(x)); |
264 | } |
265 | static inline size_t mi_bsr(uintptr_t x) { |
266 | return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x)); |
267 | } |
268 | #endif |
269 | |
270 | // Allocate `blocks` in a `region` at `idx` of a given `size`. |
271 | // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written |
272 | // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. |
273 | // (not being able to claim is not considered an error so check for `p != NULL` afterwards). |
274 | static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, |
275 | bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) |
276 | { |
277 | mi_assert_internal(p != NULL && id != NULL); |
278 | mi_assert_internal(blocks < MI_REGION_MAP_BITS); |
279 | |
280 | const uintptr_t mask = mi_region_block_mask(blocks, 0); |
281 | const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; |
282 | uintptr_t map = mi_atomic_read(®ion->map); |
283 | if (map==MI_REGION_MAP_FULL) return true; |
284 | |
285 | #ifdef MI_HAVE_BITSCAN |
286 | size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible |
287 | #else |
288 | size_t bitidx = 0; // otherwise start at 0 |
289 | #endif |
290 | uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx |
291 | |
292 | // scan linearly for a free range of zero bits |
293 | while(bitidx <= bitidx_max) { |
294 | if ((map & m) == 0) { // are the mask bits free at bitidx? |
295 | mi_assert_internal((m >> bitidx) == mask); // no overflow? |
296 | uintptr_t newmap = map | m; |
297 | mi_assert_internal((newmap^map) >> bitidx == mask); |
298 | if (!mi_atomic_cas_weak(®ion->map, newmap, map)) { // TODO: use strong cas here? |
299 | // no success, another thread claimed concurrently.. keep going |
300 | map = mi_atomic_read(®ion->map); |
301 | continue; |
302 | } |
303 | else { |
304 | // success, we claimed the bits |
305 | // now commit the block memory -- this can still fail |
306 | return mi_region_commit_blocks(region, idx, bitidx, blocks, |
307 | size, commit, allow_large, is_zero, p, id, tld); |
308 | } |
309 | } |
310 | else { |
311 | // on to the next bit range |
312 | #ifdef MI_HAVE_BITSCAN |
313 | size_t shift = (blocks == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); |
314 | mi_assert_internal(shift > 0 && shift <= blocks); |
315 | #else |
316 | size_t shift = 1; |
317 | #endif |
318 | bitidx += shift; |
319 | m <<= shift; |
320 | } |
321 | } |
322 | // no error, but also no bits found |
323 | return true; |
324 | } |
325 | |
326 | // Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim. |
327 | // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written |
328 | // if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. |
329 | // (not being able to claim is not considered an error so check for `p != NULL` afterwards). |
330 | static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, |
331 | bool* commit, bool* allow_large, bool* is_zero, |
332 | void** p, size_t* id, mi_os_tld_t* tld) |
333 | { |
334 | // check if there are available blocks in the region.. |
335 | mi_assert_internal(idx < MI_REGION_MAX); |
336 | mem_region_t* region = ®ions[idx]; |
337 | uintptr_t m = mi_atomic_read_relaxed(®ion->map); |
338 | if (m != MI_REGION_MAP_FULL) { // some bits are zero |
339 | bool ok = (*commit || *allow_large); // committing or allow-large is always ok |
340 | if (!ok) { |
341 | // otherwise skip incompatible regions if possible. |
342 | // this is not guaranteed due to multiple threads allocating at the same time but |
343 | // that's ok. In secure mode, large is never allowed for any thread, so that works out; |
344 | // otherwise we might just not be able to reset/decommit individual pages sometimes. |
345 | mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); |
346 | bool is_large; |
347 | bool is_committed; |
348 | void* start = mi_region_info_read(info,&is_large,&is_committed); |
349 | ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation? |
350 | } |
351 | if (ok) { |
352 | return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, is_zero, p, id, tld); |
353 | } |
354 | } |
355 | return true; // no error, but no success either |
356 | } |
357 | |
358 | /* ---------------------------------------------------------------------------- |
359 | Allocation |
360 | -----------------------------------------------------------------------------*/ |
361 | |
362 | // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. |
363 | // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) |
364 | void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, |
365 | size_t* id, mi_os_tld_t* tld) |
366 | { |
367 | mi_assert_internal(id != NULL && tld != NULL); |
368 | mi_assert_internal(size > 0); |
369 | *id = SIZE_MAX; |
370 | *is_zero = false; |
371 | bool default_large = false; |
372 | if (large==NULL) large = &default_large; // ensure `large != NULL` |
373 | |
374 | // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`) |
375 | if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) { |
376 | *is_zero = true; |
377 | return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld); // round up size |
378 | } |
379 | |
380 | // always round size to OS page size multiple (so commit/decommit go over the entire range) |
381 | // TODO: use large OS page size here? |
382 | size = _mi_align_up(size, _mi_os_page_size()); |
383 | |
384 | // calculate the number of needed blocks |
385 | size_t blocks = mi_region_block_count(size); |
386 | mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); |
387 | |
388 | // find a range of free blocks |
389 | void* p = NULL; |
390 | size_t count = mi_atomic_read(®ions_count); |
391 | size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention? |
392 | for (size_t visited = 0; visited < count; visited++, idx++) { |
393 | if (idx >= count) idx = 0; // wrap around |
394 | if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error |
395 | if (p != NULL) break; |
396 | } |
397 | |
398 | if (p == NULL) { |
399 | // no free range in existing regions -- try to extend beyond the count.. but at most 8 regions |
400 | for (idx = count; idx < mi_atomic_read_relaxed(®ions_count) + 8 && idx < MI_REGION_MAX; idx++) { |
401 | if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error |
402 | if (p != NULL) break; |
403 | } |
404 | } |
405 | |
406 | if (p == NULL) { |
407 | // we could not find a place to allocate, fall back to the os directly |
408 | _mi_warning_message("unable to allocate from region: size %zu\n" , size); |
409 | *is_zero = true; |
410 | p = _mi_os_alloc_aligned(size, alignment, commit, large, tld); |
411 | } |
412 | else { |
413 | tld->region_idx = idx; // next start of search? currently not used as we use first-fit |
414 | } |
415 | |
416 | mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); |
417 | return p; |
418 | } |
419 | |
420 | |
421 | |
422 | /* ---------------------------------------------------------------------------- |
423 | Free |
424 | -----------------------------------------------------------------------------*/ |
425 | |
426 | // Free previously allocated memory with a given id. |
427 | void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { |
428 | mi_assert_internal(size > 0 && stats != NULL); |
429 | if (p==NULL) return; |
430 | if (size==0) return; |
431 | if (id == SIZE_MAX) { |
432 | // was a direct OS allocation, pass through |
433 | _mi_os_free(p, size, stats); |
434 | } |
435 | else { |
436 | // allocated in a region |
437 | mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return; |
438 | // we can align the size up to page size (as we allocate that way too) |
439 | // this ensures we fully commit/decommit/reset |
440 | size = _mi_align_up(size, _mi_os_page_size()); |
441 | size_t idx = (id / MI_REGION_MAP_BITS); |
442 | size_t bitidx = (id % MI_REGION_MAP_BITS); |
443 | size_t blocks = mi_region_block_count(size); |
444 | size_t mask = mi_region_block_mask(blocks, bitidx); |
445 | mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? |
446 | mem_region_t* region = ®ions[idx]; |
447 | mi_assert_internal((mi_atomic_read_relaxed(®ion->map) & mask) == mask ); // claimed? |
448 | mi_region_info_t info = mi_atomic_read(®ion->info); |
449 | bool is_large; |
450 | bool is_eager_committed; |
451 | void* start = mi_region_info_read(info,&is_large,&is_eager_committed); |
452 | mi_assert_internal(start != NULL); |
453 | void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); |
454 | mi_assert_internal(blocks_start == p); // not a pointer in our area? |
455 | mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS); |
456 | if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`? |
457 | |
458 | // decommit (or reset) the blocks to reduce the working set. |
459 | // TODO: implement delayed decommit/reset as these calls are too expensive |
460 | // if the memory is reused soon. |
461 | // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large |
462 | if (!is_large) { |
463 | if (mi_option_is_enabled(mi_option_segment_reset)) { |
464 | if (!is_eager_committed && // cannot reset large pages |
465 | (mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead |
466 | mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments |
467 | { |
468 | _mi_os_reset(p, size, stats); |
469 | //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? |
470 | } |
471 | } |
472 | } |
473 | if (!is_eager_committed) { |
474 | // adjust commit statistics as we commit again when re-using the same slot |
475 | _mi_stat_decrease(&stats->committed, mi_good_commit_size(size)); |
476 | } |
477 | |
478 | // TODO: should we free empty regions? currently only done _mi_mem_collect. |
479 | // this frees up virtual address space which might be useful on 32-bit systems? |
480 | |
481 | // and unclaim |
482 | uintptr_t map; |
483 | uintptr_t newmap; |
484 | do { |
485 | map = mi_atomic_read_relaxed(®ion->map); |
486 | newmap = map & ~mask; |
487 | } while (!mi_atomic_cas_weak(®ion->map, newmap, map)); |
488 | } |
489 | } |
490 | |
491 | |
492 | /* ---------------------------------------------------------------------------- |
493 | collection |
494 | -----------------------------------------------------------------------------*/ |
495 | void _mi_mem_collect(mi_stats_t* stats) { |
496 | // free every region that has no segments in use. |
497 | for (size_t i = 0; i < regions_count; i++) { |
498 | mem_region_t* region = ®ions[i]; |
499 | if (mi_atomic_read_relaxed(®ion->map) == 0) { |
500 | // if no segments used, try to claim the whole region |
501 | uintptr_t m; |
502 | do { |
503 | m = mi_atomic_read_relaxed(®ion->map); |
504 | } while(m == 0 && !mi_atomic_cas_weak(®ion->map, ~((uintptr_t)0), 0 )); |
505 | if (m == 0) { |
506 | // on success, free the whole region (unless it was huge reserved) |
507 | bool is_eager_committed; |
508 | void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); |
509 | if (start != NULL && !_mi_os_is_huge_reserved(start)) { |
510 | _mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats); |
511 | } |
512 | // and release |
513 | mi_atomic_write(®ion->info,0); |
514 | mi_atomic_write(®ion->map,0); |
515 | } |
516 | } |
517 | } |
518 | } |
519 | |
520 | /* ---------------------------------------------------------------------------- |
521 | Other |
522 | -----------------------------------------------------------------------------*/ |
523 | |
524 | bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { |
525 | return _mi_os_commit(p, size, is_zero, stats); |
526 | } |
527 | |
528 | bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) { |
529 | return _mi_os_decommit(p, size, stats); |
530 | } |
531 | |
532 | bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) { |
533 | return _mi_os_reset(p, size, stats); |
534 | } |
535 | |
536 | bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { |
537 | return _mi_os_unreset(p, size, is_zero, stats); |
538 | } |
539 | |
540 | bool _mi_mem_protect(void* p, size_t size) { |
541 | return _mi_os_protect(p, size); |
542 | } |
543 | |
544 | bool _mi_mem_unprotect(void* p, size_t size) { |
545 | return _mi_os_unprotect(p, size); |
546 | } |
547 | |