| 1 | /* Register destructors for C++ TLS variables declared with thread_local. | 
|---|
| 2 | Copyright (C) 2013-2020 Free Software Foundation, Inc. | 
|---|
| 3 | This file is part of the GNU C Library. | 
|---|
| 4 |  | 
|---|
| 5 | The GNU C Library is free software; you can redistribute it and/or | 
|---|
| 6 | modify it under the terms of the GNU Lesser General Public | 
|---|
| 7 | License as published by the Free Software Foundation; either | 
|---|
| 8 | version 2.1 of the License, or (at your option) any later version. | 
|---|
| 9 |  | 
|---|
| 10 | The GNU C Library is distributed in the hope that it will be useful, | 
|---|
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|---|
| 13 | Lesser General Public License for more details. | 
|---|
| 14 |  | 
|---|
| 15 | You should have received a copy of the GNU Lesser General Public | 
|---|
| 16 | License along with the GNU C Library; if not, see | 
|---|
| 17 | <https://www.gnu.org/licenses/>.  */ | 
|---|
| 18 |  | 
|---|
| 19 | /* CONCURRENCY NOTES: | 
|---|
| 20 |  | 
|---|
| 21 | This documents concurrency for the non-POD TLS destructor registration, | 
|---|
| 22 | calling and destruction.  The functions __cxa_thread_atexit_impl, | 
|---|
| 23 | _dl_close_worker and __call_tls_dtors are the three main routines that may | 
|---|
| 24 | run concurrently and access shared data.  The shared data in all possible | 
|---|
| 25 | combinations of all three functions are the link map list, a link map for a | 
|---|
| 26 | DSO and the link map member l_tls_dtor_count. | 
|---|
| 27 |  | 
|---|
| 28 | __cxa_thread_atexit_impl acquires the dl_load_lock before accessing any | 
|---|
| 29 | shared state and hence multiple of its instances can safely execute | 
|---|
| 30 | concurrently. | 
|---|
| 31 |  | 
|---|
| 32 | _dl_close_worker acquires the dl_load_lock before accessing any shared state | 
|---|
| 33 | as well and hence can concurrently execute multiple of its own instances as | 
|---|
| 34 | well as those of __cxa_thread_atexit_impl safely.  Not all accesses to | 
|---|
| 35 | l_tls_dtor_count are protected by the dl_load_lock, so we need to | 
|---|
| 36 | synchronize using atomics. | 
|---|
| 37 |  | 
|---|
| 38 | __call_tls_dtors accesses the l_tls_dtor_count without taking the lock; it | 
|---|
| 39 | decrements the value by one.  It does not need the big lock because it does | 
|---|
| 40 | not access any other shared state except for the current DSO link map and | 
|---|
| 41 | its member l_tls_dtor_count. | 
|---|
| 42 |  | 
|---|
| 43 | Correspondingly, _dl_close_worker loads l_tls_dtor_count and if it is zero, | 
|---|
| 44 | unloads the DSO, thus deallocating the current link map.  This is the goal | 
|---|
| 45 | of maintaining l_tls_dtor_count - to unload the DSO and free resources if | 
|---|
| 46 | there are no pending destructors to be called. | 
|---|
| 47 |  | 
|---|
| 48 | We want to eliminate the inconsistent state where the DSO is unloaded in | 
|---|
| 49 | _dl_close_worker before it is used in __call_tls_dtors.  This could happen | 
|---|
| 50 | if __call_tls_dtors uses the link map after it sets l_tls_dtor_count to 0, | 
|---|
| 51 | since _dl_close_worker will conclude from the 0 l_tls_dtor_count value that | 
|---|
| 52 | it is safe to unload the DSO.  Hence, to ensure that this does not happen, | 
|---|
| 53 | the following conditions must be met: | 
|---|
| 54 |  | 
|---|
| 55 | 1. In _dl_close_worker, the l_tls_dtor_count load happens before the DSO is | 
|---|
| 56 | unloaded and its link map is freed | 
|---|
| 57 | 2. The link map dereference in __call_tls_dtors happens before the | 
|---|
| 58 | l_tls_dtor_count dereference. | 
|---|
| 59 |  | 
|---|
| 60 | To ensure this, the l_tls_dtor_count decrement in __call_tls_dtors should | 
|---|
| 61 | have release semantics and the load in _dl_close_worker should have acquire | 
|---|
| 62 | semantics. | 
|---|
| 63 |  | 
|---|
| 64 | Concurrent executions of __call_tls_dtors should only ensure that the value | 
|---|
| 65 | is accessed atomically; no reordering constraints need to be considered. | 
|---|
| 66 | Likewise for the increment of l_tls_dtor_count in __cxa_thread_atexit_impl. | 
|---|
| 67 |  | 
|---|
| 68 | There is still a possibility on concurrent execution of _dl_close_worker and | 
|---|
| 69 | __call_tls_dtors where _dl_close_worker reads the value of l_tls_dtor_count | 
|---|
| 70 | as 1, __call_tls_dtors decrements the value of l_tls_dtor_count but | 
|---|
| 71 | _dl_close_worker does not unload the DSO, having read the old value.  This | 
|---|
| 72 | is not very different from a case where __call_tls_dtors is called after | 
|---|
| 73 | _dl_close_worker on the DSO and hence is an accepted execution.  */ | 
|---|
| 74 |  | 
|---|
| 75 | #include <stdlib.h> | 
|---|
| 76 | #include <ldsodefs.h> | 
|---|
| 77 |  | 
|---|
| 78 | typedef void (*dtor_func) (void *); | 
|---|
| 79 |  | 
|---|
| 80 | struct dtor_list | 
|---|
| 81 | { | 
|---|
| 82 | dtor_func func; | 
|---|
| 83 | void *obj; | 
|---|
| 84 | struct link_map *map; | 
|---|
| 85 | struct dtor_list *next; | 
|---|
| 86 | }; | 
|---|
| 87 |  | 
|---|
| 88 | static __thread struct dtor_list *tls_dtor_list; | 
|---|
| 89 | static __thread void *dso_symbol_cache; | 
|---|
| 90 | static __thread struct link_map *lm_cache; | 
|---|
| 91 |  | 
|---|
| 92 | /* Register a destructor for TLS variables declared with the 'thread_local' | 
|---|
| 93 | keyword.  This function is only called from code generated by the C++ | 
|---|
| 94 | compiler.  FUNC is the destructor function and OBJ is the object to be | 
|---|
| 95 | passed to the destructor.  DSO_SYMBOL is the __dso_handle symbol that each | 
|---|
| 96 | DSO has at a unique address in its map, added from crtbegin.o during the | 
|---|
| 97 | linking phase.  */ | 
|---|
| 98 | int | 
|---|
| 99 | __cxa_thread_atexit_impl (dtor_func func, void *obj, void *dso_symbol) | 
|---|
| 100 | { | 
|---|
| 101 | #ifdef PTR_MANGLE | 
|---|
| 102 | PTR_MANGLE (func); | 
|---|
| 103 | #endif | 
|---|
| 104 |  | 
|---|
| 105 | /* Prepend.  */ | 
|---|
| 106 | struct dtor_list *new = calloc (1, sizeof (struct dtor_list)); | 
|---|
| 107 | new->func = func; | 
|---|
| 108 | new->obj = obj; | 
|---|
| 109 | new->next = tls_dtor_list; | 
|---|
| 110 | tls_dtor_list = new; | 
|---|
| 111 |  | 
|---|
| 112 | /* We have to acquire the big lock to prevent a racing dlclose from pulling | 
|---|
| 113 | our DSO from underneath us while we're setting up our destructor.  */ | 
|---|
| 114 | __rtld_lock_lock_recursive (GL(dl_load_lock)); | 
|---|
| 115 |  | 
|---|
| 116 | /* See if we already encountered the DSO.  */ | 
|---|
| 117 | if (__glibc_unlikely (dso_symbol_cache != dso_symbol)) | 
|---|
| 118 | { | 
|---|
| 119 | ElfW(Addr) caller = (ElfW(Addr)) dso_symbol; | 
|---|
| 120 |  | 
|---|
| 121 | struct link_map *l = _dl_find_dso_for_object (caller); | 
|---|
| 122 |  | 
|---|
| 123 | /* If the address is not recognized the call comes from the main | 
|---|
| 124 | program (we hope).  */ | 
|---|
| 125 | lm_cache = l ? l : GL(dl_ns)[LM_ID_BASE]._ns_loaded; | 
|---|
| 126 | } | 
|---|
| 127 |  | 
|---|
| 128 | /* This increment may only be concurrently observed either by the decrement | 
|---|
| 129 | in __call_tls_dtors since the other l_tls_dtor_count access in | 
|---|
| 130 | _dl_close_worker is protected by the dl_load_lock.  The execution in | 
|---|
| 131 | __call_tls_dtors does not really depend on this value beyond the fact that | 
|---|
| 132 | it should be atomic, so Relaxed MO should be sufficient.  */ | 
|---|
| 133 | atomic_fetch_add_relaxed (&lm_cache->l_tls_dtor_count, 1); | 
|---|
| 134 | __rtld_lock_unlock_recursive (GL(dl_load_lock)); | 
|---|
| 135 |  | 
|---|
| 136 | new->map = lm_cache; | 
|---|
| 137 |  | 
|---|
| 138 | return 0; | 
|---|
| 139 | } | 
|---|
| 140 |  | 
|---|
| 141 | /* Call the destructors.  This is called either when a thread returns from the | 
|---|
| 142 | initial function or when the process exits via the exit function.  */ | 
|---|
| 143 | void | 
|---|
| 144 | __call_tls_dtors (void) | 
|---|
| 145 | { | 
|---|
| 146 | while (tls_dtor_list) | 
|---|
| 147 | { | 
|---|
| 148 | struct dtor_list *cur = tls_dtor_list; | 
|---|
| 149 | dtor_func func = cur->func; | 
|---|
| 150 | #ifdef PTR_DEMANGLE | 
|---|
| 151 | PTR_DEMANGLE (func); | 
|---|
| 152 | #endif | 
|---|
| 153 |  | 
|---|
| 154 | tls_dtor_list = tls_dtor_list->next; | 
|---|
| 155 | func (cur->obj); | 
|---|
| 156 |  | 
|---|
| 157 | /* Ensure that the MAP dereference happens before | 
|---|
| 158 | l_tls_dtor_count decrement.  That way, we protect this access from a | 
|---|
| 159 | potential DSO unload in _dl_close_worker, which happens when | 
|---|
| 160 | l_tls_dtor_count is 0.  See CONCURRENCY NOTES for more detail.  */ | 
|---|
| 161 | atomic_fetch_add_release (&cur->map->l_tls_dtor_count, -1); | 
|---|
| 162 | free (cur); | 
|---|
| 163 | } | 
|---|
| 164 | } | 
|---|
| 165 | libc_hidden_def (__call_tls_dtors) | 
|---|
| 166 |  | 
|---|