1#define JEMALLOC_TSD_C_
2#include "jemalloc/internal/jemalloc_preamble.h"
3#include "jemalloc/internal/jemalloc_internal_includes.h"
4
5#include "jemalloc/internal/assert.h"
6#include "jemalloc/internal/mutex.h"
7#include "jemalloc/internal/rtree.h"
8
9/******************************************************************************/
10/* Data. */
11
12static unsigned ncleanups;
13static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
14
15/* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */
16JEMALLOC_DIAGNOSTIC_PUSH
17JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
18
19#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
20__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
21__thread bool JEMALLOC_TLS_MODEL tsd_initialized = false;
22bool tsd_booted = false;
23#elif (defined(JEMALLOC_TLS))
24__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
25pthread_key_t tsd_tsd;
26bool tsd_booted = false;
27#elif (defined(_WIN32))
28DWORD tsd_tsd;
29tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
30bool tsd_booted = false;
31#else
32
33/*
34 * This contains a mutex, but it's pretty convenient to allow the mutex code to
35 * have a dependency on tsd. So we define the struct here, and only refer to it
36 * by pointer in the header.
37 */
38struct tsd_init_head_s {
39 ql_head(tsd_init_block_t) blocks;
40 malloc_mutex_t lock;
41};
42
43pthread_key_t tsd_tsd;
44tsd_init_head_t tsd_init_head = {
45 ql_head_initializer(blocks),
46 MALLOC_MUTEX_INITIALIZER
47};
48
49tsd_wrapper_t tsd_boot_wrapper = {
50 false,
51 TSD_INITIALIZER
52};
53bool tsd_booted = false;
54#endif
55
56JEMALLOC_DIAGNOSTIC_POP
57
58/******************************************************************************/
59
60/* A list of all the tsds in the nominal state. */
61typedef ql_head(tsd_t) tsd_list_t;
62static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
63static malloc_mutex_t tsd_nominal_tsds_lock;
64
65/* How many slow-path-enabling features are turned on. */
66static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0);
67
68static bool
69tsd_in_nominal_list(tsd_t *tsd) {
70 tsd_t *tsd_list;
71 bool found = false;
72 /*
73 * We don't know that tsd is nominal; it might not be safe to get data
74 * out of it here.
75 */
76 malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
77 ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
78 if (tsd == tsd_list) {
79 found = true;
80 break;
81 }
82 }
83 malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock);
84 return found;
85}
86
87static void
88tsd_add_nominal(tsd_t *tsd) {
89 assert(!tsd_in_nominal_list(tsd));
90 assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
91 ql_elm_new(tsd, TSD_MANGLE(tcache).tsd_link);
92 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
93 ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link);
94 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
95}
96
97static void
98tsd_remove_nominal(tsd_t *tsd) {
99 assert(tsd_in_nominal_list(tsd));
100 assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
101 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
102 ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link);
103 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
104}
105
106static void
107tsd_force_recompute(tsdn_t *tsdn) {
108 /*
109 * The stores to tsd->state here need to synchronize with the exchange
110 * in tsd_slow_update.
111 */
112 atomic_fence(ATOMIC_RELEASE);
113 malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
114 tsd_t *remote_tsd;
115 ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
116 assert(atomic_load_u8(&remote_tsd->state, ATOMIC_RELAXED)
117 <= tsd_state_nominal_max);
118 atomic_store_u8(&remote_tsd->state, tsd_state_nominal_recompute,
119 ATOMIC_RELAXED);
120 }
121 malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
122}
123
124void
125tsd_global_slow_inc(tsdn_t *tsdn) {
126 atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
127 /*
128 * We unconditionally force a recompute, even if the global slow count
129 * was already positive. If we didn't, then it would be possible for us
130 * to return to the user, have the user synchronize externally with some
131 * other thread, and then have that other thread not have picked up the
132 * update yet (since the original incrementing thread might still be
133 * making its way through the tsd list).
134 */
135 tsd_force_recompute(tsdn);
136}
137
138void tsd_global_slow_dec(tsdn_t *tsdn) {
139 atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
140 /* See the note in ..._inc(). */
141 tsd_force_recompute(tsdn);
142}
143
144static bool
145tsd_local_slow(tsd_t *tsd) {
146 return !tsd_tcache_enabled_get(tsd)
147 || tsd_reentrancy_level_get(tsd) > 0;
148}
149
150bool
151tsd_global_slow() {
152 return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
153}
154
155/******************************************************************************/
156
157static uint8_t
158tsd_state_compute(tsd_t *tsd) {
159 if (!tsd_nominal(tsd)) {
160 return tsd_state_get(tsd);
161 }
162 /* We're in *a* nominal state; but which one? */
163 if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) {
164 return tsd_state_nominal_slow;
165 } else {
166 return tsd_state_nominal;
167 }
168}
169
170void
171tsd_slow_update(tsd_t *tsd) {
172 uint8_t old_state;
173 do {
174 uint8_t new_state = tsd_state_compute(tsd);
175 old_state = atomic_exchange_u8(&tsd->state, new_state,
176 ATOMIC_ACQUIRE);
177 } while (old_state == tsd_state_nominal_recompute);
178}
179
180void
181tsd_state_set(tsd_t *tsd, uint8_t new_state) {
182 /* Only the tsd module can change the state *to* recompute. */
183 assert(new_state != tsd_state_nominal_recompute);
184 uint8_t old_state = atomic_load_u8(&tsd->state, ATOMIC_RELAXED);
185 if (old_state > tsd_state_nominal_max) {
186 /*
187 * Not currently in the nominal list, but it might need to be
188 * inserted there.
189 */
190 assert(!tsd_in_nominal_list(tsd));
191 atomic_store_u8(&tsd->state, new_state, ATOMIC_RELAXED);
192 if (new_state <= tsd_state_nominal_max) {
193 tsd_add_nominal(tsd);
194 }
195 } else {
196 /*
197 * We're currently nominal. If the new state is non-nominal,
198 * great; we take ourselves off the list and just enter the new
199 * state.
200 */
201 assert(tsd_in_nominal_list(tsd));
202 if (new_state > tsd_state_nominal_max) {
203 tsd_remove_nominal(tsd);
204 atomic_store_u8(&tsd->state, new_state, ATOMIC_RELAXED);
205 } else {
206 /*
207 * This is the tricky case. We're transitioning from
208 * one nominal state to another. The caller can't know
209 * about any races that are occuring at the same time,
210 * so we always have to recompute no matter what.
211 */
212 tsd_slow_update(tsd);
213 }
214 }
215}
216
217static bool
218tsd_data_init(tsd_t *tsd) {
219 /*
220 * We initialize the rtree context first (before the tcache), since the
221 * tcache initialization depends on it.
222 */
223 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
224
225 /*
226 * A nondeterministic seed based on the address of tsd reduces
227 * the likelihood of lockstep non-uniform cache index
228 * utilization among identical concurrent processes, but at the
229 * cost of test repeatability. For debug builds, instead use a
230 * deterministic seed.
231 */
232 *tsd_offset_statep_get(tsd) = config_debug ? 0 :
233 (uint64_t)(uintptr_t)tsd;
234
235 return tsd_tcache_enabled_data_init(tsd);
236}
237
238static void
239assert_tsd_data_cleanup_done(tsd_t *tsd) {
240 assert(!tsd_nominal(tsd));
241 assert(!tsd_in_nominal_list(tsd));
242 assert(*tsd_arenap_get_unsafe(tsd) == NULL);
243 assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
244 assert(*tsd_arenas_tdata_bypassp_get_unsafe(tsd) == true);
245 assert(*tsd_arenas_tdatap_get_unsafe(tsd) == NULL);
246 assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
247 assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
248}
249
250static bool
251tsd_data_init_nocleanup(tsd_t *tsd) {
252 assert(tsd_state_get(tsd) == tsd_state_reincarnated ||
253 tsd_state_get(tsd) == tsd_state_minimal_initialized);
254 /*
255 * During reincarnation, there is no guarantee that the cleanup function
256 * will be called (deallocation may happen after all tsd destructors).
257 * We set up tsd in a way that no cleanup is needed.
258 */
259 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
260 *tsd_arenas_tdata_bypassp_get(tsd) = true;
261 *tsd_tcache_enabledp_get_unsafe(tsd) = false;
262 *tsd_reentrancy_levelp_get(tsd) = 1;
263 assert_tsd_data_cleanup_done(tsd);
264
265 return false;
266}
267
268tsd_t *
269tsd_fetch_slow(tsd_t *tsd, bool minimal) {
270 assert(!tsd_fast(tsd));
271
272 if (tsd_state_get(tsd) == tsd_state_nominal_slow) {
273 /*
274 * On slow path but no work needed. Note that we can't
275 * necessarily *assert* that we're slow, because we might be
276 * slow because of an asynchronous modification to global state,
277 * which might be asynchronously modified *back*.
278 */
279 } else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) {
280 tsd_slow_update(tsd);
281 } else if (tsd_state_get(tsd) == tsd_state_uninitialized) {
282 if (!minimal) {
283 if (tsd_booted) {
284 tsd_state_set(tsd, tsd_state_nominal);
285 tsd_slow_update(tsd);
286 /* Trigger cleanup handler registration. */
287 tsd_set(tsd);
288 tsd_data_init(tsd);
289 }
290 } else {
291 tsd_state_set(tsd, tsd_state_minimal_initialized);
292 tsd_set(tsd);
293 tsd_data_init_nocleanup(tsd);
294 }
295 } else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) {
296 if (!minimal) {
297 /* Switch to fully initialized. */
298 tsd_state_set(tsd, tsd_state_nominal);
299 assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
300 (*tsd_reentrancy_levelp_get(tsd))--;
301 tsd_slow_update(tsd);
302 tsd_data_init(tsd);
303 } else {
304 assert_tsd_data_cleanup_done(tsd);
305 }
306 } else if (tsd_state_get(tsd) == tsd_state_purgatory) {
307 tsd_state_set(tsd, tsd_state_reincarnated);
308 tsd_set(tsd);
309 tsd_data_init_nocleanup(tsd);
310 } else {
311 assert(tsd_state_get(tsd) == tsd_state_reincarnated);
312 }
313
314 return tsd;
315}
316
317void *
318malloc_tsd_malloc(size_t size) {
319 return a0malloc(CACHELINE_CEILING(size));
320}
321
322void
323malloc_tsd_dalloc(void *wrapper) {
324 a0dalloc(wrapper);
325}
326
327#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
328#ifndef _WIN32
329JEMALLOC_EXPORT
330#endif
331void
332_malloc_thread_cleanup(void) {
333 bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
334 unsigned i;
335
336 for (i = 0; i < ncleanups; i++) {
337 pending[i] = true;
338 }
339
340 do {
341 again = false;
342 for (i = 0; i < ncleanups; i++) {
343 if (pending[i]) {
344 pending[i] = cleanups[i]();
345 if (pending[i]) {
346 again = true;
347 }
348 }
349 }
350 } while (again);
351}
352#endif
353
354void
355malloc_tsd_cleanup_register(bool (*f)(void)) {
356 assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
357 cleanups[ncleanups] = f;
358 ncleanups++;
359}
360
361static void
362tsd_do_data_cleanup(tsd_t *tsd) {
363 prof_tdata_cleanup(tsd);
364 iarena_cleanup(tsd);
365 arena_cleanup(tsd);
366 arenas_tdata_cleanup(tsd);
367 tcache_cleanup(tsd);
368 witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
369}
370
371void
372tsd_cleanup(void *arg) {
373 tsd_t *tsd = (tsd_t *)arg;
374
375 switch (tsd_state_get(tsd)) {
376 case tsd_state_uninitialized:
377 /* Do nothing. */
378 break;
379 case tsd_state_minimal_initialized:
380 /* This implies the thread only did free() in its life time. */
381 /* Fall through. */
382 case tsd_state_reincarnated:
383 /*
384 * Reincarnated means another destructor deallocated memory
385 * after the destructor was called. Cleanup isn't required but
386 * is still called for testing and completeness.
387 */
388 assert_tsd_data_cleanup_done(tsd);
389 /* Fall through. */
390 case tsd_state_nominal:
391 case tsd_state_nominal_slow:
392 tsd_do_data_cleanup(tsd);
393 tsd_state_set(tsd, tsd_state_purgatory);
394 tsd_set(tsd);
395 break;
396 case tsd_state_purgatory:
397 /*
398 * The previous time this destructor was called, we set the
399 * state to tsd_state_purgatory so that other destructors
400 * wouldn't cause re-creation of the tsd. This time, do
401 * nothing, and do not request another callback.
402 */
403 break;
404 default:
405 not_reached();
406 }
407#ifdef JEMALLOC_JET
408 test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
409 int *data = tsd_test_datap_get_unsafe(tsd);
410 if (test_callback != NULL) {
411 test_callback(data);
412 }
413#endif
414}
415
416tsd_t *
417malloc_tsd_boot0(void) {
418 tsd_t *tsd;
419
420 ncleanups = 0;
421 if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
422 WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
423 return NULL;
424 }
425 if (tsd_boot0()) {
426 return NULL;
427 }
428 tsd = tsd_fetch();
429 *tsd_arenas_tdata_bypassp_get(tsd) = true;
430 return tsd;
431}
432
433void
434malloc_tsd_boot1(void) {
435 tsd_boot1();
436 tsd_t *tsd = tsd_fetch();
437 /* malloc_slow has been set properly. Update tsd_slow. */
438 tsd_slow_update(tsd);
439 *tsd_arenas_tdata_bypassp_get(tsd) = false;
440}
441
442#ifdef _WIN32
443static BOOL WINAPI
444_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
445 switch (fdwReason) {
446#ifdef JEMALLOC_LAZY_LOCK
447 case DLL_THREAD_ATTACH:
448 isthreaded = true;
449 break;
450#endif
451 case DLL_THREAD_DETACH:
452 _malloc_thread_cleanup();
453 break;
454 default:
455 break;
456 }
457 return true;
458}
459
460/*
461 * We need to be able to say "read" here (in the "pragma section"), but have
462 * hooked "read". We won't read for the rest of the file, so we can get away
463 * with unhooking.
464 */
465#ifdef read
466# undef read
467#endif
468
469#ifdef _MSC_VER
470# ifdef _M_IX86
471# pragma comment(linker, "/INCLUDE:__tls_used")
472# pragma comment(linker, "/INCLUDE:_tls_callback")
473# else
474# pragma comment(linker, "/INCLUDE:_tls_used")
475# pragma comment(linker, "/INCLUDE:tls_callback")
476# endif
477# pragma section(".CRT$XLY",long,read)
478#endif
479JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
480BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL,
481 DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
482#endif
483
484#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
485 !defined(_WIN32))
486void *
487tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
488 pthread_t self = pthread_self();
489 tsd_init_block_t *iter;
490
491 /* Check whether this thread has already inserted into the list. */
492 malloc_mutex_lock(TSDN_NULL, &head->lock);
493 ql_foreach(iter, &head->blocks, link) {
494 if (iter->thread == self) {
495 malloc_mutex_unlock(TSDN_NULL, &head->lock);
496 return iter->data;
497 }
498 }
499 /* Insert block into list. */
500 ql_elm_new(block, link);
501 block->thread = self;
502 ql_tail_insert(&head->blocks, block, link);
503 malloc_mutex_unlock(TSDN_NULL, &head->lock);
504 return NULL;
505}
506
507void
508tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
509 malloc_mutex_lock(TSDN_NULL, &head->lock);
510 ql_remove(&head->blocks, block, link);
511 malloc_mutex_unlock(TSDN_NULL, &head->lock);
512}
513#endif
514
515void
516tsd_prefork(tsd_t *tsd) {
517 malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
518}
519
520void
521tsd_postfork_parent(tsd_t *tsd) {
522 malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
523}
524
525void
526tsd_postfork_child(tsd_t *tsd) {
527 malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
528 ql_new(&tsd_nominal_tsds);
529
530 if (tsd_state_get(tsd) <= tsd_state_nominal_max) {
531 tsd_add_nominal(tsd);
532 }
533}
534