| commit f4f8f4d4e0f92488431b268c8cd9555730b9afe9 |
| Author: Szabolcs Nagy <szabolcs.nagy@arm.com> |
| Date: Wed Dec 30 19:19:37 2020 +0000 |
| |
| elf: Use relaxed atomics for racy accesses [BZ #19329] |
| |
| This is a follow up patch to the fix for bug 19329. This adds relaxed |
| MO atomics to accesses that were previously data races but are now |
| race conditions, and where relaxed MO is sufficient. |
| |
| The race conditions all follow the pattern that the write is behind the |
| dlopen lock, but a read can happen concurrently (e.g. during tls access) |
| without holding the lock. For slotinfo entries the read value only |
| matters if it reads from a synchronized write in dlopen or dlclose, |
| otherwise the related dtv entry is not valid to access so it is fine |
| to leave it in an inconsistent state. The same applies for |
| GL(dl_tls_max_dtv_idx) and GL(dl_tls_generation), but there the |
| algorithm relies on the fact that the read of the last synchronized |
| write is an increasing value. |
| |
| Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> |
| |
| diff --git a/elf/dl-close.c b/elf/dl-close.c |
| index 1ece0ae1dd062d1e..7d2dc2272cd643f5 100644 |
| |
| |
| @@ -79,9 +79,10 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp, |
| { |
| assert (old_map->l_tls_modid == idx); |
| |
| - /* Mark the entry as unused. */ |
| - listp->slotinfo[idx - disp].gen = GL(dl_tls_generation) + 1; |
| - listp->slotinfo[idx - disp].map = NULL; |
| + /* Mark the entry as unused. These can be read concurrently. */ |
| + atomic_store_relaxed (&listp->slotinfo[idx - disp].gen, |
| + GL(dl_tls_generation) + 1); |
| + atomic_store_relaxed (&listp->slotinfo[idx - disp].map, NULL); |
| } |
| |
| /* If this is not the last currently used entry no need to look |
| @@ -96,8 +97,8 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp, |
| |
| if (listp->slotinfo[idx - disp].map != NULL) |
| { |
| - /* Found a new last used index. */ |
| - GL(dl_tls_max_dtv_idx) = idx; |
| + /* Found a new last used index. This can be read concurrently. */ |
| + atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), idx); |
| return true; |
| } |
| } |
| @@ -571,7 +572,9 @@ _dl_close_worker (struct link_map *map, bool force) |
| GL(dl_tls_dtv_slotinfo_list), 0, |
| imap->l_init_called)) |
| /* All dynamically loaded modules with TLS are unloaded. */ |
| - GL(dl_tls_max_dtv_idx) = GL(dl_tls_static_nelem); |
| + /* Can be read concurrently. */ |
| + atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), |
| + GL(dl_tls_static_nelem)); |
| |
| if (imap->l_tls_offset != NO_TLS_OFFSET |
| && imap->l_tls_offset != FORCED_DYNAMIC_TLS_OFFSET) |
| @@ -769,8 +772,11 @@ _dl_close_worker (struct link_map *map, bool force) |
| /* If we removed any object which uses TLS bump the generation counter. */ |
| if (any_tls) |
| { |
| - if (__glibc_unlikely (++GL(dl_tls_generation) == 0)) |
| + size_t newgen = GL(dl_tls_generation) + 1; |
| + if (__glibc_unlikely (newgen == 0)) |
| _dl_fatal_printf ("TLS generation counter wrapped! Please report as described in "REPORT_BUGS_TO".\n"); |
| + /* Can be read concurrently. */ |
| + atomic_store_relaxed (&GL(dl_tls_generation), newgen); |
| |
| if (tls_free_end == GL(dl_tls_static_used)) |
| GL(dl_tls_static_used) = tls_free_start; |
| diff --git a/elf/dl-open.c b/elf/dl-open.c |
| index b052bb0bc2cd17aa..a67fb3aee40860e1 100644 |
| |
| |
| @@ -395,9 +395,12 @@ update_tls_slotinfo (struct link_map *new) |
| } |
| } |
| |
| - if (__builtin_expect (++GL(dl_tls_generation) == 0, 0)) |
| + size_t newgen = GL(dl_tls_generation) + 1; |
| + if (__glibc_unlikely (newgen == 0)) |
| _dl_fatal_printf (N_("\ |
| TLS generation counter wrapped! Please report this.")); |
| + /* Can be read concurrently. */ |
| + atomic_store_relaxed (&GL(dl_tls_generation), newgen); |
| |
| /* We need a second pass for static tls data, because |
| _dl_update_slotinfo must not be run while calls to |
| diff --git a/elf/dl-tls.c b/elf/dl-tls.c |
| index da83cd6ae2ee6504..801eafad3961573c 100644 |
| |
| |
| @@ -175,7 +175,9 @@ _dl_next_tls_modid (void) |
| /* No gaps, allocate a new entry. */ |
| nogaps: |
| |
| - result = ++GL(dl_tls_max_dtv_idx); |
| + result = GL(dl_tls_max_dtv_idx) + 1; |
| + /* Can be read concurrently. */ |
| + atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), result); |
| } |
| |
| return result; |
| @@ -359,10 +361,12 @@ allocate_dtv (void *result) |
| dtv_t *dtv; |
| size_t dtv_length; |
| |
| + /* Relaxed MO, because the dtv size is later rechecked, not relied on. */ |
| + size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx)); |
| /* We allocate a few more elements in the dtv than are needed for the |
| initial set of modules. This should avoid in most cases expansions |
| of the dtv. */ |
| - dtv_length = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS; |
| + dtv_length = max_modid + DTV_SURPLUS; |
| dtv = calloc (dtv_length + 2, sizeof (dtv_t)); |
| if (dtv != NULL) |
| { |
| @@ -767,7 +771,7 @@ _dl_update_slotinfo (unsigned long int req_modid) |
| if (modid > max_modid) |
| break; |
| |
| - size_t gen = listp->slotinfo[cnt].gen; |
| + size_t gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen); |
| |
| if (gen > new_gen) |
| /* Not relevant. */ |
| @@ -779,7 +783,8 @@ _dl_update_slotinfo (unsigned long int req_modid) |
| continue; |
| |
| /* If there is no map this means the entry is empty. */ |
| - struct link_map *map = listp->slotinfo[cnt].map; |
| + struct link_map *map |
| + = atomic_load_relaxed (&listp->slotinfo[cnt].map); |
| /* Check whether the current dtv array is large enough. */ |
| if (dtv[-1].counter < modid) |
| { |
| @@ -923,7 +928,12 @@ __tls_get_addr (GET_ADDR_ARGS) |
| { |
| dtv_t *dtv = THREAD_DTV (); |
| |
| - if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation))) |
| + /* Update is needed if dtv[0].counter < the generation of the accessed |
| + module. The global generation counter is used here as it is easier |
| + to check. Synchronization for the relaxed MO access is guaranteed |
| + by user code, see CONCURRENCY NOTES in _dl_update_slotinfo. */ |
| + size_t gen = atomic_load_relaxed (&GL(dl_tls_generation)); |
| + if (__glibc_unlikely (dtv[0].counter != gen)) |
| return update_get_addr (GET_ADDR_PARAM); |
| |
| void *p = dtv[GET_ADDR_MODULE].pointer.val; |
| @@ -946,7 +956,10 @@ _dl_tls_get_addr_soft (struct link_map *l) |
| return NULL; |
| |
| dtv_t *dtv = THREAD_DTV (); |
| - if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation))) |
| + /* This may be called without holding the GL(dl_load_lock). Reading |
| + arbitrary gen value is fine since this is best effort code. */ |
| + size_t gen = atomic_load_relaxed (&GL(dl_tls_generation)); |
| + if (__glibc_unlikely (dtv[0].counter != gen)) |
| { |
| /* This thread's DTV is not completely current, |
| but it might already cover this module. */ |
| @@ -1032,7 +1045,9 @@ cannot create TLS data structures")); |
| /* Add the information into the slotinfo data structure. */ |
| if (do_add) |
| { |
| - listp->slotinfo[idx].map = l; |
| - listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1; |
| + /* Can be read concurrently. See _dl_update_slotinfo. */ |
| + atomic_store_relaxed (&listp->slotinfo[idx].map, l); |
| + atomic_store_relaxed (&listp->slotinfo[idx].gen, |
| + GL(dl_tls_generation) + 1); |
| } |
| } |
| diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c |
| index 533ee2b3a6e85ad8..bc543dcc264ea361 100644 |
| |
| |
| @@ -40,7 +40,8 @@ __tls_get_addr_slow (GET_ADDR_ARGS) |
| { |
| dtv_t *dtv = THREAD_DTV (); |
| |
| - if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation))) |
| + size_t gen = atomic_load_relaxed (&GL(dl_tls_generation)); |
| + if (__glibc_unlikely (dtv[0].counter != gen)) |
| return update_get_addr (GET_ADDR_PARAM); |
| |
| return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL); |