1#define JEMALLOC_BACKGROUND_THREAD_C_
2#include "jemalloc/internal/jemalloc_preamble.h"
3#include "jemalloc/internal/jemalloc_internal_includes.h"
4
5#include "jemalloc/internal/assert.h"
6
7JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
8
9/******************************************************************************/
10/* Data. */
11
12/* This option should be opt-in only. */
13#define BACKGROUND_THREAD_DEFAULT false
14/* Read-only after initialization. */
15bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
16size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT;
17
18/* Used for thread creation, termination and stats. */
19malloc_mutex_t background_thread_lock;
20/* Indicates global state. Atomic because decay reads this w/o locking. */
21atomic_b_t background_thread_enabled_state;
22size_t n_background_threads;
23size_t max_background_threads;
24/* Thread info per-index. */
25background_thread_info_t *background_thread_info;
26
27/******************************************************************************/
28
29#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
30#include <dlfcn.h>
31
32static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
33 void *(*)(void *), void *__restrict);
34
35static void
36pthread_create_wrapper_init(void) {
37#ifdef JEMALLOC_LAZY_LOCK
38 if (!isthreaded) {
39 isthreaded = true;
40 }
41#endif
42}
43
44int
45pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
46 void *(*start_routine)(void *), void *__restrict arg) {
47 pthread_create_wrapper_init();
48
49 return pthread_create_fptr(thread, attr, start_routine, arg);
50}
51#endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
52
53#ifndef JEMALLOC_BACKGROUND_THREAD
54#define NOT_REACHED { not_reached(); }
55bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
56bool background_threads_enable(tsd_t *tsd) NOT_REACHED
57bool background_threads_disable(tsd_t *tsd) NOT_REACHED
58void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
59 arena_decay_t *decay, size_t npages_new) NOT_REACHED
60void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
61void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
62void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
63void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
64bool background_thread_stats_read(tsdn_t *tsdn,
65 background_thread_stats_t *stats) NOT_REACHED
66void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
67#undef NOT_REACHED
68#else
69
70static bool background_thread_enabled_at_fork;
71
72static void
73background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
74 background_thread_wakeup_time_set(tsdn, info, 0);
75 info->npages_to_purge_new = 0;
76 if (config_stats) {
77 info->tot_n_runs = 0;
78 nstime_init(&info->tot_sleep_time, 0);
79 }
80}
81
82static inline bool
83set_current_thread_affinity(int cpu) {
84#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
85 cpu_set_t cpuset;
86 CPU_ZERO(&cpuset);
87 CPU_SET(cpu, &cpuset);
88 int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
89
90 return (ret != 0);
91#else
92 return false;
93#endif
94}
95
96/* Threshold for determining when to wake up the background thread. */
97#define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
98#define BILLION UINT64_C(1000000000)
99/* Minimal sleep interval 100 ms. */
100#define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
101
102static inline size_t
103decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
104 size_t i;
105 uint64_t sum = 0;
106 for (i = 0; i < interval; i++) {
107 sum += decay->backlog[i] * h_steps[i];
108 }
109 for (; i < SMOOTHSTEP_NSTEPS; i++) {
110 sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
111 }
112
113 return (size_t)(sum >> SMOOTHSTEP_BFP);
114}
115
116static uint64_t
117arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
118 extents_t *extents) {
119 if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
120 /* Use minimal interval if decay is contended. */
121 return BACKGROUND_THREAD_MIN_INTERVAL_NS;
122 }
123
124 uint64_t interval;
125 ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
126 if (decay_time <= 0) {
127 /* Purging is eagerly done or disabled currently. */
128 interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
129 goto label_done;
130 }
131
132 uint64_t decay_interval_ns = nstime_ns(&decay->interval);
133 assert(decay_interval_ns > 0);
134 size_t npages = extents_npages_get(extents);
135 if (npages == 0) {
136 unsigned i;
137 for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
138 if (decay->backlog[i] > 0) {
139 break;
140 }
141 }
142 if (i == SMOOTHSTEP_NSTEPS) {
143 /* No dirty pages recorded. Sleep indefinitely. */
144 interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
145 goto label_done;
146 }
147 }
148 if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
149 /* Use max interval. */
150 interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
151 goto label_done;
152 }
153
154 size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
155 size_t ub = SMOOTHSTEP_NSTEPS;
156 /* Minimal 2 intervals to ensure reaching next epoch deadline. */
157 lb = (lb < 2) ? 2 : lb;
158 if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
159 (lb + 2 > ub)) {
160 interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
161 goto label_done;
162 }
163
164 assert(lb + 2 <= ub);
165 size_t npurge_lb, npurge_ub;
166 npurge_lb = decay_npurge_after_interval(decay, lb);
167 if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
168 interval = decay_interval_ns * lb;
169 goto label_done;
170 }
171 npurge_ub = decay_npurge_after_interval(decay, ub);
172 if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
173 interval = decay_interval_ns * ub;
174 goto label_done;
175 }
176
177 unsigned n_search = 0;
178 size_t target, npurge;
179 while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
180 && (lb + 2 < ub)) {
181 target = (lb + ub) / 2;
182 npurge = decay_npurge_after_interval(decay, target);
183 if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
184 ub = target;
185 npurge_ub = npurge;
186 } else {
187 lb = target;
188 npurge_lb = npurge;
189 }
190 assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
191 }
192 interval = decay_interval_ns * (ub + lb) / 2;
193label_done:
194 interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
195 BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
196 malloc_mutex_unlock(tsdn, &decay->mtx);
197
198 return interval;
199}
200
201/* Compute purge interval for background threads. */
202static uint64_t
203arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
204 uint64_t i1, i2;
205 i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
206 &arena->extents_dirty);
207 if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
208 return i1;
209 }
210 i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
211 &arena->extents_muzzy);
212
213 return i1 < i2 ? i1 : i2;
214}
215
216static void
217background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
218 uint64_t interval) {
219 if (config_stats) {
220 info->tot_n_runs++;
221 }
222 info->npages_to_purge_new = 0;
223
224 struct timeval tv;
225 /* Specific clock required by timedwait. */
226 gettimeofday(&tv, NULL);
227 nstime_t before_sleep;
228 nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
229
230 int ret;
231 if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
232 assert(background_thread_indefinite_sleep(info));
233 ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
234 assert(ret == 0);
235 } else {
236 assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
237 interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
238 /* We need malloc clock (can be different from tv). */
239 nstime_t next_wakeup;
240 nstime_init(&next_wakeup, 0);
241 nstime_update(&next_wakeup);
242 nstime_iadd(&next_wakeup, interval);
243 assert(nstime_ns(&next_wakeup) <
244 BACKGROUND_THREAD_INDEFINITE_SLEEP);
245 background_thread_wakeup_time_set(tsdn, info,
246 nstime_ns(&next_wakeup));
247
248 nstime_t ts_wakeup;
249 nstime_copy(&ts_wakeup, &before_sleep);
250 nstime_iadd(&ts_wakeup, interval);
251 struct timespec ts;
252 ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
253 ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
254
255 assert(!background_thread_indefinite_sleep(info));
256 ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
257 assert(ret == ETIMEDOUT || ret == 0);
258 background_thread_wakeup_time_set(tsdn, info,
259 BACKGROUND_THREAD_INDEFINITE_SLEEP);
260 }
261 if (config_stats) {
262 gettimeofday(&tv, NULL);
263 nstime_t after_sleep;
264 nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
265 if (nstime_compare(&after_sleep, &before_sleep) > 0) {
266 nstime_subtract(&after_sleep, &before_sleep);
267 nstime_add(&info->tot_sleep_time, &after_sleep);
268 }
269 }
270}
271
272static bool
273background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
274 if (unlikely(info->state == background_thread_paused)) {
275 malloc_mutex_unlock(tsdn, &info->mtx);
276 /* Wait on global lock to update status. */
277 malloc_mutex_lock(tsdn, &background_thread_lock);
278 malloc_mutex_unlock(tsdn, &background_thread_lock);
279 malloc_mutex_lock(tsdn, &info->mtx);
280 return true;
281 }
282
283 return false;
284}
285
286static inline void
287background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
288 uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
289 unsigned narenas = narenas_total_get();
290
291 for (unsigned i = ind; i < narenas; i += max_background_threads) {
292 arena_t *arena = arena_get(tsdn, i, false);
293 if (!arena) {
294 continue;
295 }
296 arena_decay(tsdn, arena, true, false);
297 if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
298 /* Min interval will be used. */
299 continue;
300 }
301 uint64_t interval = arena_decay_compute_purge_interval(tsdn,
302 arena);
303 assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
304 if (min_interval > interval) {
305 min_interval = interval;
306 }
307 }
308 background_thread_sleep(tsdn, info, min_interval);
309}
310
311static bool
312background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
313 if (info == &background_thread_info[0]) {
314 malloc_mutex_assert_owner(tsd_tsdn(tsd),
315 &background_thread_lock);
316 } else {
317 malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
318 &background_thread_lock);
319 }
320
321 pre_reentrancy(tsd, NULL);
322 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
323 bool has_thread;
324 assert(info->state != background_thread_paused);
325 if (info->state == background_thread_started) {
326 has_thread = true;
327 info->state = background_thread_stopped;
328 pthread_cond_signal(&info->cond);
329 } else {
330 has_thread = false;
331 }
332 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
333
334 if (!has_thread) {
335 post_reentrancy(tsd);
336 return false;
337 }
338 void *ret;
339 if (pthread_join(info->thread, &ret)) {
340 post_reentrancy(tsd);
341 return true;
342 }
343 assert(ret == NULL);
344 n_background_threads--;
345 post_reentrancy(tsd);
346
347 return false;
348}
349
350static void *background_thread_entry(void *ind_arg);
351
352static int
353background_thread_create_signals_masked(pthread_t *thread,
354 const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
355 /*
356 * Mask signals during thread creation so that the thread inherits
357 * an empty signal set.
358 */
359 sigset_t set;
360 sigfillset(&set);
361 sigset_t oldset;
362 int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
363 if (mask_err != 0) {
364 return mask_err;
365 }
366 int create_err = pthread_create_wrapper(thread, attr, start_routine,
367 arg);
368 /*
369 * Restore the signal mask. Failure to restore the signal mask here
370 * changes program behavior.
371 */
372 int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
373 if (restore_err != 0) {
374 malloc_printf("<jemalloc>: background thread creation "
375 "failed (%d), and signal mask restoration failed "
376 "(%d)\n", create_err, restore_err);
377 if (opt_abort) {
378 abort();
379 }
380 }
381 return create_err;
382}
383
384static bool
385check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
386 bool *created_threads) {
387 bool ret = false;
388 if (likely(*n_created == n_background_threads)) {
389 return ret;
390 }
391
392 tsdn_t *tsdn = tsd_tsdn(tsd);
393 malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
394 for (unsigned i = 1; i < max_background_threads; i++) {
395 if (created_threads[i]) {
396 continue;
397 }
398 background_thread_info_t *info = &background_thread_info[i];
399 malloc_mutex_lock(tsdn, &info->mtx);
400 /*
401 * In case of the background_thread_paused state because of
402 * arena reset, delay the creation.
403 */
404 bool create = (info->state == background_thread_started);
405 malloc_mutex_unlock(tsdn, &info->mtx);
406 if (!create) {
407 continue;
408 }
409
410 pre_reentrancy(tsd, NULL);
411 int err = background_thread_create_signals_masked(&info->thread,
412 NULL, background_thread_entry, (void *)(uintptr_t)i);
413 post_reentrancy(tsd);
414
415 if (err == 0) {
416 (*n_created)++;
417 created_threads[i] = true;
418 } else {
419 malloc_printf("<jemalloc>: background thread "
420 "creation failed (%d)\n", err);
421 if (opt_abort) {
422 abort();
423 }
424 }
425 /* Return to restart the loop since we unlocked. */
426 ret = true;
427 break;
428 }
429 malloc_mutex_lock(tsdn, &background_thread_info[0].mtx);
430
431 return ret;
432}
433
434static void
435background_thread0_work(tsd_t *tsd) {
436 /* Thread0 is also responsible for launching / terminating threads. */
437 VARIABLE_ARRAY(bool, created_threads, max_background_threads);
438 unsigned i;
439 for (i = 1; i < max_background_threads; i++) {
440 created_threads[i] = false;
441 }
442 /* Start working, and create more threads when asked. */
443 unsigned n_created = 1;
444 while (background_thread_info[0].state != background_thread_stopped) {
445 if (background_thread_pause_check(tsd_tsdn(tsd),
446 &background_thread_info[0])) {
447 continue;
448 }
449 if (check_background_thread_creation(tsd, &n_created,
450 (bool *)&created_threads)) {
451 continue;
452 }
453 background_work_sleep_once(tsd_tsdn(tsd),
454 &background_thread_info[0], 0);
455 }
456
457 /*
458 * Shut down other threads at exit. Note that the ctl thread is holding
459 * the global background_thread mutex (and is waiting) for us.
460 */
461 assert(!background_thread_enabled());
462 for (i = 1; i < max_background_threads; i++) {
463 background_thread_info_t *info = &background_thread_info[i];
464 assert(info->state != background_thread_paused);
465 if (created_threads[i]) {
466 background_threads_disable_single(tsd, info);
467 } else {
468 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
469 if (info->state != background_thread_stopped) {
470 /* The thread was not created. */
471 assert(info->state ==
472 background_thread_started);
473 n_background_threads--;
474 info->state = background_thread_stopped;
475 }
476 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
477 }
478 }
479 background_thread_info[0].state = background_thread_stopped;
480 assert(n_background_threads == 1);
481}
482
483static void
484background_work(tsd_t *tsd, unsigned ind) {
485 background_thread_info_t *info = &background_thread_info[ind];
486
487 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
488 background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
489 BACKGROUND_THREAD_INDEFINITE_SLEEP);
490 if (ind == 0) {
491 background_thread0_work(tsd);
492 } else {
493 while (info->state != background_thread_stopped) {
494 if (background_thread_pause_check(tsd_tsdn(tsd),
495 info)) {
496 continue;
497 }
498 background_work_sleep_once(tsd_tsdn(tsd), info, ind);
499 }
500 }
501 assert(info->state == background_thread_stopped);
502 background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
503 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
504}
505
506static void *
507background_thread_entry(void *ind_arg) {
508 unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
509 assert(thread_ind < max_background_threads);
510#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
511 pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
512#elif defined(__FreeBSD__)
513 pthread_set_name_np(pthread_self(), "jemalloc_bg_thd");
514#endif
515 if (opt_percpu_arena != percpu_arena_disabled) {
516 set_current_thread_affinity((int)thread_ind);
517 }
518 /*
519 * Start periodic background work. We use internal tsd which avoids
520 * side effects, for example triggering new arena creation (which in
521 * turn triggers another background thread creation).
522 */
523 background_work(tsd_internal_fetch(), thread_ind);
524 assert(pthread_equal(pthread_self(),
525 background_thread_info[thread_ind].thread));
526
527 return NULL;
528}
529
530static void
531background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
532 malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
533 info->state = background_thread_started;
534 background_thread_info_init(tsd_tsdn(tsd), info);
535 n_background_threads++;
536}
537
538/* Create a new background thread if needed. */
539bool
540background_thread_create(tsd_t *tsd, unsigned arena_ind) {
541 assert(have_background_thread);
542 malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
543
544 /* We create at most NCPUs threads. */
545 size_t thread_ind = arena_ind % max_background_threads;
546 background_thread_info_t *info = &background_thread_info[thread_ind];
547
548 bool need_new_thread;
549 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
550 need_new_thread = background_thread_enabled() &&
551 (info->state == background_thread_stopped);
552 if (need_new_thread) {
553 background_thread_init(tsd, info);
554 }
555 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
556 if (!need_new_thread) {
557 return false;
558 }
559 if (arena_ind != 0) {
560 /* Threads are created asynchronously by Thread 0. */
561 background_thread_info_t *t0 = &background_thread_info[0];
562 malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
563 assert(t0->state == background_thread_started);
564 pthread_cond_signal(&t0->cond);
565 malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
566
567 return false;
568 }
569
570 pre_reentrancy(tsd, NULL);
571 /*
572 * To avoid complications (besides reentrancy), create internal
573 * background threads with the underlying pthread_create.
574 */
575 int err = background_thread_create_signals_masked(&info->thread, NULL,
576 background_thread_entry, (void *)thread_ind);
577 post_reentrancy(tsd);
578
579 if (err != 0) {
580 malloc_printf("<jemalloc>: arena 0 background thread creation "
581 "failed (%d)\n", err);
582 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
583 info->state = background_thread_stopped;
584 n_background_threads--;
585 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
586
587 return true;
588 }
589
590 return false;
591}
592
593bool
594background_threads_enable(tsd_t *tsd) {
595 assert(n_background_threads == 0);
596 assert(background_thread_enabled());
597 malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
598
599 VARIABLE_ARRAY(bool, marked, max_background_threads);
600 unsigned i, nmarked;
601 for (i = 0; i < max_background_threads; i++) {
602 marked[i] = false;
603 }
604 nmarked = 0;
605 /* Thread 0 is required and created at the end. */
606 marked[0] = true;
607 /* Mark the threads we need to create for thread 0. */
608 unsigned n = narenas_total_get();
609 for (i = 1; i < n; i++) {
610 if (marked[i % max_background_threads] ||
611 arena_get(tsd_tsdn(tsd), i, false) == NULL) {
612 continue;
613 }
614 background_thread_info_t *info = &background_thread_info[
615 i % max_background_threads];
616 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
617 assert(info->state == background_thread_stopped);
618 background_thread_init(tsd, info);
619 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
620 marked[i % max_background_threads] = true;
621 if (++nmarked == max_background_threads) {
622 break;
623 }
624 }
625
626 return background_thread_create(tsd, 0);
627}
628
629bool
630background_threads_disable(tsd_t *tsd) {
631 assert(!background_thread_enabled());
632 malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
633
634 /* Thread 0 will be responsible for terminating other threads. */
635 if (background_threads_disable_single(tsd,
636 &background_thread_info[0])) {
637 return true;
638 }
639 assert(n_background_threads == 0);
640
641 return false;
642}
643
644/* Check if we need to signal the background thread early. */
645void
646background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
647 arena_decay_t *decay, size_t npages_new) {
648 background_thread_info_t *info = arena_background_thread_info_get(
649 arena);
650 if (malloc_mutex_trylock(tsdn, &info->mtx)) {
651 /*
652 * Background thread may hold the mutex for a long period of
653 * time. We'd like to avoid the variance on application
654 * threads. So keep this non-blocking, and leave the work to a
655 * future epoch.
656 */
657 return;
658 }
659
660 if (info->state != background_thread_started) {
661 goto label_done;
662 }
663 if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
664 goto label_done;
665 }
666
667 ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
668 if (decay_time <= 0) {
669 /* Purging is eagerly done or disabled currently. */
670 goto label_done_unlock2;
671 }
672 uint64_t decay_interval_ns = nstime_ns(&decay->interval);
673 assert(decay_interval_ns > 0);
674
675 nstime_t diff;
676 nstime_init(&diff, background_thread_wakeup_time_get(info));
677 if (nstime_compare(&diff, &decay->epoch) <= 0) {
678 goto label_done_unlock2;
679 }
680 nstime_subtract(&diff, &decay->epoch);
681 if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
682 goto label_done_unlock2;
683 }
684
685 if (npages_new > 0) {
686 size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
687 /*
688 * Compute how many new pages we would need to purge by the next
689 * wakeup, which is used to determine if we should signal the
690 * background thread.
691 */
692 uint64_t npurge_new;
693 if (n_epoch >= SMOOTHSTEP_NSTEPS) {
694 npurge_new = npages_new;
695 } else {
696 uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
697 assert(h_steps_max >=
698 h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
699 npurge_new = npages_new * (h_steps_max -
700 h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
701 npurge_new >>= SMOOTHSTEP_BFP;
702 }
703 info->npages_to_purge_new += npurge_new;
704 }
705
706 bool should_signal;
707 if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
708 should_signal = true;
709 } else if (unlikely(background_thread_indefinite_sleep(info)) &&
710 (extents_npages_get(&arena->extents_dirty) > 0 ||
711 extents_npages_get(&arena->extents_muzzy) > 0 ||
712 info->npages_to_purge_new > 0)) {
713 should_signal = true;
714 } else {
715 should_signal = false;
716 }
717
718 if (should_signal) {
719 info->npages_to_purge_new = 0;
720 pthread_cond_signal(&info->cond);
721 }
722label_done_unlock2:
723 malloc_mutex_unlock(tsdn, &decay->mtx);
724label_done:
725 malloc_mutex_unlock(tsdn, &info->mtx);
726}
727
728void
729background_thread_prefork0(tsdn_t *tsdn) {
730 malloc_mutex_prefork(tsdn, &background_thread_lock);
731 background_thread_enabled_at_fork = background_thread_enabled();
732}
733
734void
735background_thread_prefork1(tsdn_t *tsdn) {
736 for (unsigned i = 0; i < max_background_threads; i++) {
737 malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
738 }
739}
740
741void
742background_thread_postfork_parent(tsdn_t *tsdn) {
743 for (unsigned i = 0; i < max_background_threads; i++) {
744 malloc_mutex_postfork_parent(tsdn,
745 &background_thread_info[i].mtx);
746 }
747 malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
748}
749
750void
751background_thread_postfork_child(tsdn_t *tsdn) {
752 for (unsigned i = 0; i < max_background_threads; i++) {
753 malloc_mutex_postfork_child(tsdn,
754 &background_thread_info[i].mtx);
755 }
756 malloc_mutex_postfork_child(tsdn, &background_thread_lock);
757 if (!background_thread_enabled_at_fork) {
758 return;
759 }
760
761 /* Clear background_thread state (reset to disabled for child). */
762 malloc_mutex_lock(tsdn, &background_thread_lock);
763 n_background_threads = 0;
764 background_thread_enabled_set(tsdn, false);
765 for (unsigned i = 0; i < max_background_threads; i++) {
766 background_thread_info_t *info = &background_thread_info[i];
767 malloc_mutex_lock(tsdn, &info->mtx);
768 info->state = background_thread_stopped;
769 int ret = pthread_cond_init(&info->cond, NULL);
770 assert(ret == 0);
771 background_thread_info_init(tsdn, info);
772 malloc_mutex_unlock(tsdn, &info->mtx);
773 }
774 malloc_mutex_unlock(tsdn, &background_thread_lock);
775}
776
777bool
778background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
779 assert(config_stats);
780 malloc_mutex_lock(tsdn, &background_thread_lock);
781 if (!background_thread_enabled()) {
782 malloc_mutex_unlock(tsdn, &background_thread_lock);
783 return true;
784 }
785
786 stats->num_threads = n_background_threads;
787 uint64_t num_runs = 0;
788 nstime_init(&stats->run_interval, 0);
789 for (unsigned i = 0; i < max_background_threads; i++) {
790 background_thread_info_t *info = &background_thread_info[i];
791 malloc_mutex_lock(tsdn, &info->mtx);
792 if (info->state != background_thread_stopped) {
793 num_runs += info->tot_n_runs;
794 nstime_add(&stats->run_interval, &info->tot_sleep_time);
795 }
796 malloc_mutex_unlock(tsdn, &info->mtx);
797 }
798 stats->num_runs = num_runs;
799 if (num_runs > 0) {
800 nstime_idivide(&stats->run_interval, num_runs);
801 }
802 malloc_mutex_unlock(tsdn, &background_thread_lock);
803
804 return false;
805}
806
807#undef BACKGROUND_THREAD_NPAGES_THRESHOLD
808#undef BILLION
809#undef BACKGROUND_THREAD_MIN_INTERVAL_NS
810
811static bool
812pthread_create_fptr_init(void) {
813 if (pthread_create_fptr != NULL) {
814 return false;
815 }
816 /*
817 * Try the next symbol first, because 1) when use lazy_lock we have a
818 * wrapper for pthread_create; and 2) application may define its own
819 * wrapper as well (and can call malloc within the wrapper).
820 */
821 pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
822 if (pthread_create_fptr == NULL) {
823 if (config_lazy_lock) {
824 malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
825 "\"pthread_create\")\n");
826 abort();
827 } else {
828 /* Fall back to the default symbol. */
829 pthread_create_fptr = pthread_create;
830 }
831 }
832
833 return false;
834}
835
836/*
837 * When lazy lock is enabled, we need to make sure setting isthreaded before
838 * taking any background_thread locks. This is called early in ctl (instead of
839 * wait for the pthread_create calls to trigger) because the mutex is required
840 * before creating background threads.
841 */
842void
843background_thread_ctl_init(tsdn_t *tsdn) {
844 malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
845#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
846 pthread_create_fptr_init();
847 pthread_create_wrapper_init();
848#endif
849}
850
851#endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
852
853bool
854background_thread_boot0(void) {
855 if (!have_background_thread && opt_background_thread) {
856 malloc_printf("<jemalloc>: option background_thread currently "
857 "supports pthread only\n");
858 return true;
859 }
860#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
861 if ((config_lazy_lock || opt_background_thread) &&
862 pthread_create_fptr_init()) {
863 return true;
864 }
865#endif
866 return false;
867}
868
869bool
870background_thread_boot1(tsdn_t *tsdn) {
871#ifdef JEMALLOC_BACKGROUND_THREAD
872 assert(have_background_thread);
873 assert(narenas_total_get() > 0);
874
875 if (opt_max_background_threads == MAX_BACKGROUND_THREAD_LIMIT &&
876 ncpus < MAX_BACKGROUND_THREAD_LIMIT) {
877 opt_max_background_threads = ncpus;
878 }
879 max_background_threads = opt_max_background_threads;
880
881 background_thread_enabled_set(tsdn, opt_background_thread);
882 if (malloc_mutex_init(&background_thread_lock,
883 "background_thread_global",
884 WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
885 malloc_mutex_rank_exclusive)) {
886 return true;
887 }
888
889 background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
890 b0get(), opt_max_background_threads *
891 sizeof(background_thread_info_t), CACHELINE);
892 if (background_thread_info == NULL) {
893 return true;
894 }
895
896 for (unsigned i = 0; i < max_background_threads; i++) {
897 background_thread_info_t *info = &background_thread_info[i];
898 /* Thread mutex is rank_inclusive because of thread0. */
899 if (malloc_mutex_init(&info->mtx, "background_thread",
900 WITNESS_RANK_BACKGROUND_THREAD,
901 malloc_mutex_address_ordered)) {
902 return true;
903 }
904 if (pthread_cond_init(&info->cond, NULL)) {
905 return true;
906 }
907 malloc_mutex_lock(tsdn, &info->mtx);
908 info->state = background_thread_stopped;
909 background_thread_info_init(tsdn, info);
910 malloc_mutex_unlock(tsdn, &info->mtx);
911 }
912#endif
913
914 return false;
915}
916