You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
484 lines
17 KiB
484 lines
17 KiB
commit e5d262effe3a87164308a3f37e61b32d0348692a
|
|
Author: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
|
|
Date: Fri Nov 30 18:05:32 2018 -0200
|
|
|
|
Fix _dl_profile_fixup data-dependency issue (Bug 23690)
|
|
|
|
There is a data-dependency between the fields of struct l_reloc_result
|
|
and the field used as the initialization guard. Users of the guard
|
|
expect writes to the structure to be observable when they also observe
|
|
the guard initialized. The solution for this problem is to use an acquire
|
|
and release load and store to ensure previous writes to the structure are
|
|
observable if the guard is initialized.
|
|
|
|
The previous implementation used DL_FIXUP_VALUE_ADDR (l_reloc_result->addr)
|
|
as the initialization guard, making it impossible for some architectures
|
|
to load and store it atomically, i.e. hppa and ia64, due to its larger size.
|
|
|
|
This commit adds an unsigned int to l_reloc_result to be used as the new
|
|
initialization guard of the struct, making it possible to load and store
|
|
it atomically in all architectures. The fix ensures that the values
|
|
observed in l_reloc_result are consistent and do not lead to crashes.
|
|
The algorithm is documented in the code in elf/dl-runtime.c
|
|
(_dl_profile_fixup). Not all data races have been eliminated.
|
|
|
|
Tested with build-many-glibcs and on powerpc, powerpc64, and powerpc64le.
|
|
|
|
[BZ #23690]
|
|
* elf/dl-runtime.c (_dl_profile_fixup): Guarantee memory
|
|
modification order when accessing reloc_result->addr.
|
|
* include/link.h (reloc_result): Add field init.
|
|
* nptl/Makefile (tests): Add tst-audit-threads.
|
|
(modules-names): Add tst-audit-threads-mod1 and
|
|
tst-audit-threads-mod2.
|
|
Add rules to build tst-audit-threads.
|
|
* nptl/tst-audit-threads-mod1.c: New file.
|
|
* nptl/tst-audit-threads-mod2.c: Likewise.
|
|
* nptl/tst-audit-threads.c: Likewise.
|
|
* nptl/tst-audit-threads.h: Likewise.
|
|
|
|
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
|
|
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
|
|
|
|
diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c
|
|
index 63bbc89776..3d2f4a7a76 100644
|
|
--- a/elf/dl-runtime.c
|
|
+++ b/elf/dl-runtime.c
|
|
@@ -183,10 +183,36 @@ _dl_profile_fixup (
|
|
/* This is the address in the array where we store the result of previous
|
|
relocations. */
|
|
struct reloc_result *reloc_result = &l->l_reloc_result[reloc_index];
|
|
- DL_FIXUP_VALUE_TYPE *resultp = &reloc_result->addr;
|
|
|
|
- DL_FIXUP_VALUE_TYPE value = *resultp;
|
|
- if (DL_FIXUP_VALUE_CODE_ADDR (value) == 0)
|
|
+ /* CONCURRENCY NOTES:
|
|
+
|
|
+ Multiple threads may be calling the same PLT sequence and with
|
|
+ LD_AUDIT enabled they will be calling into _dl_profile_fixup to
|
|
+ update the reloc_result with the result of the lazy resolution.
|
|
+ The reloc_result guard variable is reloc_init, and we use
|
|
+ acquire/release loads and store to it to ensure that the results of
|
|
+ the structure are consistent with the loaded value of the guard.
|
|
+ This does not fix all of the data races that occur when two or more
|
|
+ threads read reloc_result->reloc_init with a value of zero and read
|
|
+ and write to that reloc_result concurrently. The expectation is
|
|
+ generally that while this is a data race it works because the
|
|
+ threads write the same values. Until the data races are fixed
|
|
+ there is a potential for problems to arise from these data races.
|
|
+ The reloc result updates should happen in parallel but there should
|
|
+ be an atomic RMW which does the final update to the real result
|
|
+ entry (see bug 23790).
|
|
+
|
|
+ The following code uses reloc_result->init set to 0 to indicate if it is
|
|
+ the first time this object is being relocated, otherwise 1 which
|
|
+ indicates the object has already been relocated.
|
|
+
|
|
+ Reading/Writing from/to reloc_result->reloc_init must not happen
|
|
+ before previous writes to reloc_result complete as they could
|
|
+ end-up with an incomplete struct. */
|
|
+ DL_FIXUP_VALUE_TYPE value;
|
|
+ unsigned int init = atomic_load_acquire (&reloc_result->init);
|
|
+
|
|
+ if (init == 0)
|
|
{
|
|
/* This is the first time we have to relocate this object. */
|
|
const ElfW(Sym) *const symtab
|
|
@@ -346,19 +372,31 @@ _dl_profile_fixup (
|
|
|
|
/* Store the result for later runs. */
|
|
if (__glibc_likely (! GLRO(dl_bind_not)))
|
|
- *resultp = value;
|
|
+ {
|
|
+ reloc_result->addr = value;
|
|
+ /* Guarantee all previous writes complete before
|
|
+ init is updated. See CONCURRENCY NOTES earlier */
|
|
+ atomic_store_release (&reloc_result->init, 1);
|
|
+ }
|
|
+ init = 1;
|
|
}
|
|
+ else
|
|
+ value = reloc_result->addr;
|
|
|
|
/* By default we do not call the pltexit function. */
|
|
long int framesize = -1;
|
|
|
|
+
|
|
#ifdef SHARED
|
|
/* Auditing checkpoint: report the PLT entering and allow the
|
|
auditors to change the value. */
|
|
- if (DL_FIXUP_VALUE_CODE_ADDR (value) != 0 && GLRO(dl_naudit) > 0
|
|
+ if (GLRO(dl_naudit) > 0
|
|
/* Don't do anything if no auditor wants to intercept this call. */
|
|
&& (reloc_result->enterexit & LA_SYMB_NOPLTENTER) == 0)
|
|
{
|
|
+ /* Sanity check: DL_FIXUP_VALUE_CODE_ADDR (value) should have been
|
|
+ initialized earlier in this function or in another thread. */
|
|
+ assert (DL_FIXUP_VALUE_CODE_ADDR (value) != 0);
|
|
ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound,
|
|
l_info[DT_SYMTAB])
|
|
+ reloc_result->boundndx);
|
|
diff --git a/include/link.h b/include/link.h
|
|
index 5924594548..83b1c34b7b 100644
|
|
--- a/include/link.h
|
|
+++ b/include/link.h
|
|
@@ -216,6 +216,10 @@ struct link_map
|
|
unsigned int boundndx;
|
|
uint32_t enterexit;
|
|
unsigned int flags;
|
|
+ /* CONCURRENCY NOTE: This is used to guard the concurrent initialization
|
|
+ of the relocation result across multiple threads. See the more
|
|
+ detailed notes in elf/dl-runtime.c. */
|
|
+ unsigned int init;
|
|
} *l_reloc_result;
|
|
|
|
/* Pointer to the version information if available. */
|
|
diff --git a/nptl/Makefile b/nptl/Makefile
|
|
index 982e43adfa..98b0aa01c7 100644
|
|
--- a/nptl/Makefile
|
|
+++ b/nptl/Makefile
|
|
@@ -382,7 +382,8 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx4 tst-cancelx5 \
|
|
tst-cleanupx0 tst-cleanupx1 tst-cleanupx2 tst-cleanupx3 tst-cleanupx4 \
|
|
tst-oncex3 tst-oncex4
|
|
ifeq ($(build-shared),yes)
|
|
-tests += tst-atfork2 tst-tls4 tst-_res1 tst-fini1 tst-compat-forwarder
|
|
+tests += tst-atfork2 tst-tls4 tst-_res1 tst-fini1 tst-compat-forwarder \
|
|
+ tst-audit-threads
|
|
tests-internal += tst-tls3 tst-tls3-malloc tst-tls5 tst-stackguard1
|
|
tests-nolibpthread += tst-fini1
|
|
ifeq ($(have-z-execstack),yes)
|
|
@@ -394,7 +395,8 @@ modules-names = tst-atfork2mod tst-tls3mod tst-tls4moda tst-tls4modb \
|
|
tst-tls5mod tst-tls5moda tst-tls5modb tst-tls5modc \
|
|
tst-tls5modd tst-tls5mode tst-tls5modf tst-stack4mod \
|
|
tst-_res1mod1 tst-_res1mod2 tst-execstack-mod tst-fini1mod \
|
|
- tst-join7mod tst-compat-forwarder-mod
|
|
+ tst-join7mod tst-compat-forwarder-mod tst-audit-threads-mod1 \
|
|
+ tst-audit-threads-mod2
|
|
extra-test-objs += $(addsuffix .os,$(strip $(modules-names))) \
|
|
tst-cleanup4aux.o tst-cleanupx4aux.o
|
|
test-extras += tst-cleanup4aux tst-cleanupx4aux
|
|
@@ -712,6 +714,14 @@ $(objpfx)tst-compat-forwarder: $(objpfx)tst-compat-forwarder-mod.so
|
|
|
|
tst-mutex10-ENV = GLIBC_TUNABLES=glibc.elision.enable=1
|
|
|
|
+# Protect against a build using -Wl,-z,now.
|
|
+LDFLAGS-tst-audit-threads-mod1.so = -Wl,-z,lazy
|
|
+LDFLAGS-tst-audit-threads-mod2.so = -Wl,-z,lazy
|
|
+LDFLAGS-tst-audit-threads = -Wl,-z,lazy
|
|
+$(objpfx)tst-audit-threads: $(objpfx)tst-audit-threads-mod2.so
|
|
+$(objpfx)tst-audit-threads.out: $(objpfx)tst-audit-threads-mod1.so
|
|
+tst-audit-threads-ENV = LD_AUDIT=$(objpfx)tst-audit-threads-mod1.so
|
|
+
|
|
# The tests here better do not run in parallel
|
|
ifneq ($(filter %tests,$(MAKECMDGOALS)),)
|
|
.NOTPARALLEL:
|
|
diff --git a/nptl/tst-audit-threads-mod1.c b/nptl/tst-audit-threads-mod1.c
|
|
new file mode 100644
|
|
index 0000000000..615d5ee512
|
|
--- /dev/null
|
|
+++ b/nptl/tst-audit-threads-mod1.c
|
|
@@ -0,0 +1,74 @@
|
|
+/* Dummy audit library for test-audit-threads.
|
|
+
|
|
+ Copyright (C) 2018 Free Software Foundation, Inc.
|
|
+ This file is part of the GNU C Library.
|
|
+
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU Lesser General Public
|
|
+ License as published by the Free Software Foundation; either
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
|
+
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ Lesser General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU Lesser General Public
|
|
+ License along with the GNU C Library; if not, see
|
|
+ <http://www.gnu.org/licenses/>. */
|
|
+
|
|
+#include <elf.h>
|
|
+#include <link.h>
|
|
+#include <stdio.h>
|
|
+#include <assert.h>
|
|
+#include <string.h>
|
|
+
|
|
+/* We must use a dummy LD_AUDIT module to force the dynamic loader to
|
|
+ *not* update the real PLT, and instead use a cached value for the
|
|
+ lazy resolution result. It is the update of that cached value that
|
|
+ we are testing for correctness by doing this. */
|
|
+
|
|
+/* Library to be audited. */
|
|
+#define LIB "tst-audit-threads-mod2.so"
|
|
+/* CALLNUM is the number of retNum functions. */
|
|
+#define CALLNUM 7999
|
|
+
|
|
+#define CONCATX(a, b) __CONCAT (a, b)
|
|
+
|
|
+static int previous = 0;
|
|
+
|
|
+unsigned int
|
|
+la_version (unsigned int ver)
|
|
+{
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+unsigned int
|
|
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
|
|
+{
|
|
+ return LA_FLG_BINDTO | LA_FLG_BINDFROM;
|
|
+}
|
|
+
|
|
+uintptr_t
|
|
+CONCATX(la_symbind, __ELF_NATIVE_CLASS) (ElfW(Sym) *sym,
|
|
+ unsigned int ndx,
|
|
+ uintptr_t *refcook,
|
|
+ uintptr_t *defcook,
|
|
+ unsigned int *flags,
|
|
+ const char *symname)
|
|
+{
|
|
+ const char * retnum = "retNum";
|
|
+ char * num = strstr (symname, retnum);
|
|
+ int n;
|
|
+ /* Validate if the symbols are getting called in the correct order.
|
|
+ This code is here to verify binutils does not optimize out the PLT
|
|
+ entries that require the symbol binding. */
|
|
+ if (num != NULL)
|
|
+ {
|
|
+ n = atoi (num);
|
|
+ assert (n >= previous);
|
|
+ assert (n <= CALLNUM);
|
|
+ previous = n;
|
|
+ }
|
|
+ return sym->st_value;
|
|
+}
|
|
diff --git a/nptl/tst-audit-threads-mod2.c b/nptl/tst-audit-threads-mod2.c
|
|
new file mode 100644
|
|
index 0000000000..f9817dd3dc
|
|
--- /dev/null
|
|
+++ b/nptl/tst-audit-threads-mod2.c
|
|
@@ -0,0 +1,22 @@
|
|
+/* Shared object with a huge number of functions for test-audit-threads.
|
|
+
|
|
+ Copyright (C) 2018 Free Software Foundation, Inc.
|
|
+ This file is part of the GNU C Library.
|
|
+
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU Lesser General Public
|
|
+ License as published by the Free Software Foundation; either
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
|
+
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ Lesser General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU Lesser General Public
|
|
+ License along with the GNU C Library; if not, see
|
|
+ <http://www.gnu.org/licenses/>. */
|
|
+
|
|
+/* Define all the retNumN functions in a library. */
|
|
+#define definenum
|
|
+#include "tst-audit-threads.h"
|
|
diff --git a/nptl/tst-audit-threads.c b/nptl/tst-audit-threads.c
|
|
new file mode 100644
|
|
index 0000000000..e4bf433bd8
|
|
--- /dev/null
|
|
+++ b/nptl/tst-audit-threads.c
|
|
@@ -0,0 +1,97 @@
|
|
+/* Test multi-threading using LD_AUDIT.
|
|
+
|
|
+ Copyright (C) 2018 Free Software Foundation, Inc.
|
|
+ This file is part of the GNU C Library.
|
|
+
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU Lesser General Public
|
|
+ License as published by the Free Software Foundation; either
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
|
+
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ Lesser General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU Lesser General Public
|
|
+ License along with the GNU C Library; if not, see
|
|
+ <http://www.gnu.org/licenses/>. */
|
|
+
|
|
+/* This test uses a dummy LD_AUDIT library (test-audit-threads-mod1) and a
|
|
+ library with a huge number of functions in order to validate lazy symbol
|
|
+ binding with an audit library. We use one thread per CPU to test that
|
|
+ concurrent lazy resolution does not have any defects which would cause
|
|
+ the process to fail. We use an LD_AUDIT library to force the testing of
|
|
+ the relocation resolution caching code in the dynamic loader i.e.
|
|
+ _dl_runtime_profile and _dl_profile_fixup. */
|
|
+
|
|
+#include <support/xthread.h>
|
|
+#include <strings.h>
|
|
+#include <stdlib.h>
|
|
+#include <sys/sysinfo.h>
|
|
+
|
|
+static int do_test (void);
|
|
+
|
|
+/* This test usually takes less than 3s to run. However, there are cases that
|
|
+ take up to 30s. */
|
|
+#define TIMEOUT 60
|
|
+#define TEST_FUNCTION do_test ()
|
|
+#include "../test-skeleton.c"
|
|
+
|
|
+/* Declare the functions we are going to call. */
|
|
+#define externnum
|
|
+#include "tst-audit-threads.h"
|
|
+#undef externnum
|
|
+
|
|
+int num_threads;
|
|
+pthread_barrier_t barrier;
|
|
+
|
|
+void
|
|
+sync_all (int num)
|
|
+{
|
|
+ pthread_barrier_wait (&barrier);
|
|
+}
|
|
+
|
|
+void
|
|
+call_all_ret_nums (void)
|
|
+{
|
|
+ /* Call each function one at a time from all threads. */
|
|
+#define callnum
|
|
+#include "tst-audit-threads.h"
|
|
+#undef callnum
|
|
+}
|
|
+
|
|
+void *
|
|
+thread_main (void *unused)
|
|
+{
|
|
+ call_all_ret_nums ();
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+#define STR2(X) #X
|
|
+#define STR(X) STR2(X)
|
|
+
|
|
+static int
|
|
+do_test (void)
|
|
+{
|
|
+ int i;
|
|
+ pthread_t *threads;
|
|
+
|
|
+ num_threads = get_nprocs ();
|
|
+ if (num_threads <= 1)
|
|
+ num_threads = 2;
|
|
+
|
|
+ /* Used to synchronize all the threads after calling each retNumN. */
|
|
+ xpthread_barrier_init (&barrier, NULL, num_threads);
|
|
+
|
|
+ threads = (pthread_t *) xcalloc (num_threads, sizeof(pthread_t));
|
|
+ for (i = 0; i < num_threads; i++)
|
|
+ threads[i] = xpthread_create(NULL, thread_main, NULL);
|
|
+
|
|
+ for (i = 0; i < num_threads; i++)
|
|
+ xpthread_join(threads[i]);
|
|
+
|
|
+ free (threads);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
diff --git a/nptl/tst-audit-threads.h b/nptl/tst-audit-threads.h
|
|
new file mode 100644
|
|
index 0000000000..1c9ecc08df
|
|
--- /dev/null
|
|
+++ b/nptl/tst-audit-threads.h
|
|
@@ -0,0 +1,92 @@
|
|
+/* Helper header for test-audit-threads.
|
|
+
|
|
+ Copyright (C) 2018 Free Software Foundation, Inc.
|
|
+ This file is part of the GNU C Library.
|
|
+
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU Lesser General Public
|
|
+ License as published by the Free Software Foundation; either
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
|
+
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ Lesser General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU Lesser General Public
|
|
+ License along with the GNU C Library; if not, see
|
|
+ <http://www.gnu.org/licenses/>. */
|
|
+
|
|
+/* We use this helper to create a large number of functions, all of
|
|
+ which will be resolved lazily and thus have their PLT updated.
|
|
+ This is done to provide enough functions that we can statistically
|
|
+ observe a thread vs. PLT resolution failure if one exists. */
|
|
+
|
|
+#define CONCAT(a, b) a ## b
|
|
+#define NUM(x, y) CONCAT (x, y)
|
|
+
|
|
+#define FUNC10(x) \
|
|
+ FUNC (NUM (x, 0)); \
|
|
+ FUNC (NUM (x, 1)); \
|
|
+ FUNC (NUM (x, 2)); \
|
|
+ FUNC (NUM (x, 3)); \
|
|
+ FUNC (NUM (x, 4)); \
|
|
+ FUNC (NUM (x, 5)); \
|
|
+ FUNC (NUM (x, 6)); \
|
|
+ FUNC (NUM (x, 7)); \
|
|
+ FUNC (NUM (x, 8)); \
|
|
+ FUNC (NUM (x, 9))
|
|
+
|
|
+#define FUNC100(x) \
|
|
+ FUNC10 (NUM (x, 0)); \
|
|
+ FUNC10 (NUM (x, 1)); \
|
|
+ FUNC10 (NUM (x, 2)); \
|
|
+ FUNC10 (NUM (x, 3)); \
|
|
+ FUNC10 (NUM (x, 4)); \
|
|
+ FUNC10 (NUM (x, 5)); \
|
|
+ FUNC10 (NUM (x, 6)); \
|
|
+ FUNC10 (NUM (x, 7)); \
|
|
+ FUNC10 (NUM (x, 8)); \
|
|
+ FUNC10 (NUM (x, 9))
|
|
+
|
|
+#define FUNC1000(x) \
|
|
+ FUNC100 (NUM (x, 0)); \
|
|
+ FUNC100 (NUM (x, 1)); \
|
|
+ FUNC100 (NUM (x, 2)); \
|
|
+ FUNC100 (NUM (x, 3)); \
|
|
+ FUNC100 (NUM (x, 4)); \
|
|
+ FUNC100 (NUM (x, 5)); \
|
|
+ FUNC100 (NUM (x, 6)); \
|
|
+ FUNC100 (NUM (x, 7)); \
|
|
+ FUNC100 (NUM (x, 8)); \
|
|
+ FUNC100 (NUM (x, 9))
|
|
+
|
|
+#define FUNC7000() \
|
|
+ FUNC1000 (1); \
|
|
+ FUNC1000 (2); \
|
|
+ FUNC1000 (3); \
|
|
+ FUNC1000 (4); \
|
|
+ FUNC1000 (5); \
|
|
+ FUNC1000 (6); \
|
|
+ FUNC1000 (7);
|
|
+
|
|
+#ifdef FUNC
|
|
+# undef FUNC
|
|
+#endif
|
|
+
|
|
+#ifdef externnum
|
|
+# define FUNC(x) extern int CONCAT (retNum, x) (void)
|
|
+#endif
|
|
+
|
|
+#ifdef definenum
|
|
+# define FUNC(x) int CONCAT (retNum, x) (void) { return x; }
|
|
+#endif
|
|
+
|
|
+#ifdef callnum
|
|
+# define FUNC(x) CONCAT (retNum, x) (); sync_all (x)
|
|
+#endif
|
|
+
|
|
+/* A value of 7000 functions is chosen as an arbitrarily large
|
|
+ number of functions that will allow us enough attempts to
|
|
+ verify lazy resolution operation. */
|
|
+FUNC7000 ();
|