parent
f2f7ddc537
commit
df75c59c69
@ -0,0 +1,22 @@
|
||||
Work around in the test case, the fact that RHEL-8 NSS modules
|
||||
infrastructure incorrectly allows merging in the hosts database. This
|
||||
is a RHEL-8 only fix.
|
||||
|
||||
diff --git a/nss/tst-nss-gai-actions.c b/nss/tst-nss-gai-actions.c
|
||||
index efca6cd1837a172a..c35e752896eceb2a 100644
|
||||
--- a/nss/tst-nss-gai-actions.c
|
||||
+++ b/nss/tst-nss-gai-actions.c
|
||||
@@ -87,6 +87,13 @@ do_one_test (int action, int family, bool canon)
|
||||
case ACTION_MERGE:
|
||||
if (ret == 0)
|
||||
{
|
||||
+ if (hints.ai_flags == 0 && hints.ai_family == AF_INET)
|
||||
+ {
|
||||
+ printf ("***** RHEL-8 limitation: "
|
||||
+ "NSS modules infrastructure incorrectly allows MERGE\n");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
char *formatted = support_format_addrinfo (ai, ret);
|
||||
|
||||
printf ("merge unexpectedly succeeded:\n %s\n", formatted);
|
@ -0,0 +1,121 @@
|
||||
commit 969e9733c7d17edf1e239a73fa172f357561f440
|
||||
Author: Florian Weimer <fweimer@redhat.com>
|
||||
Date: Tue Feb 21 09:20:28 2023 +0100
|
||||
|
||||
gshadow: Matching sgetsgent, sgetsgent_r ERANGE handling (bug 30151)
|
||||
|
||||
Before this change, sgetsgent_r did not set errno to ERANGE, but
|
||||
sgetsgent only check errno, not the return value from sgetsgent_r.
|
||||
Consequently, sgetsgent did not detect any error, and reported
|
||||
success to the caller, without initializing the struct sgrp object
|
||||
whose address was returned.
|
||||
|
||||
This commit changes sgetsgent_r to set errno as well. This avoids
|
||||
similar issues in applications which only change errno.
|
||||
|
||||
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
|
||||
|
||||
diff --git a/gshadow/Makefile b/gshadow/Makefile
|
||||
index 796fbbf473..a95524593a 100644
|
||||
--- a/gshadow/Makefile
|
||||
+++ b/gshadow/Makefile
|
||||
@@ -26,7 +26,7 @@ headers = gshadow.h
|
||||
routines = getsgent getsgnam sgetsgent fgetsgent putsgent \
|
||||
getsgent_r getsgnam_r sgetsgent_r fgetsgent_r
|
||||
|
||||
-tests = tst-gshadow tst-putsgent tst-fgetsgent_r
|
||||
+tests = tst-gshadow tst-putsgent tst-fgetsgent_r tst-sgetsgent
|
||||
|
||||
CFLAGS-getsgent_r.c += -fexceptions
|
||||
CFLAGS-getsgent.c += -fexceptions
|
||||
diff --git a/gshadow/sgetsgent_r.c b/gshadow/sgetsgent_r.c
|
||||
index ea085e91d7..c75624e1f7 100644
|
||||
--- a/gshadow/sgetsgent_r.c
|
||||
+++ b/gshadow/sgetsgent_r.c
|
||||
@@ -61,7 +61,10 @@ __sgetsgent_r (const char *string, struct sgrp *resbuf, char *buffer,
|
||||
buffer[buflen - 1] = '\0';
|
||||
sp = strncpy (buffer, string, buflen);
|
||||
if (buffer[buflen - 1] != '\0')
|
||||
- return ERANGE;
|
||||
+ {
|
||||
+ __set_errno (ERANGE);
|
||||
+ return ERANGE;
|
||||
+ }
|
||||
}
|
||||
else
|
||||
sp = (char *) string;
|
||||
diff --git a/gshadow/tst-sgetsgent.c b/gshadow/tst-sgetsgent.c
|
||||
new file mode 100644
|
||||
index 0000000000..0370c10fd0
|
||||
--- /dev/null
|
||||
+++ b/gshadow/tst-sgetsgent.c
|
||||
@@ -0,0 +1,69 @@
|
||||
+/* Test large input for sgetsgent (bug 30151).
|
||||
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <gshadow.h>
|
||||
+#include <stddef.h>
|
||||
+#include <support/check.h>
|
||||
+#include <support/support.h>
|
||||
+#include <support/xmemstream.h>
|
||||
+#include <stdlib.h>
|
||||
+
|
||||
+static int
|
||||
+do_test (void)
|
||||
+{
|
||||
+ /* Create a shadow group with 1000 members. */
|
||||
+ struct xmemstream mem;
|
||||
+ xopen_memstream (&mem);
|
||||
+ const char *passwd = "k+zD0nucwfxAo3sw1NXUj6K5vt5M16+X0TVGdE1uFvq5R8V7efJ";
|
||||
+ fprintf (mem.out, "group-name:%s::m0", passwd);
|
||||
+ for (int i = 1; i < 1000; ++i)
|
||||
+ fprintf (mem.out, ",m%d", i);
|
||||
+ xfclose_memstream (&mem);
|
||||
+
|
||||
+ /* Call sgetsgent. */
|
||||
+ char *input = mem.buffer;
|
||||
+ struct sgrp *e = sgetsgent (input);
|
||||
+ TEST_VERIFY_EXIT (e != NULL);
|
||||
+ TEST_COMPARE_STRING (e->sg_namp, "group-name");
|
||||
+ TEST_COMPARE_STRING (e->sg_passwd, passwd);
|
||||
+ /* No administrators. */
|
||||
+ TEST_COMPARE_STRING (e->sg_adm[0], NULL);
|
||||
+ /* Check the members list. */
|
||||
+ for (int i = 0; i < 1000; ++i)
|
||||
+ {
|
||||
+ char *member = xasprintf ("m%d", i);
|
||||
+ TEST_COMPARE_STRING (e->sg_mem[i], member);
|
||||
+ free (member);
|
||||
+ }
|
||||
+ TEST_COMPARE_STRING (e->sg_mem[1000], NULL);
|
||||
+
|
||||
+ /* Check that putsgent brings back the input string. */
|
||||
+ xopen_memstream (&mem);
|
||||
+ TEST_COMPARE (putsgent (e, mem.out), 0);
|
||||
+ xfclose_memstream (&mem);
|
||||
+ /* Compare without the trailing '\n' that putsgent added. */
|
||||
+ TEST_COMPARE (mem.buffer[mem.length - 1], '\n');
|
||||
+ mem.buffer[mem.length - 1] = '\0';
|
||||
+ TEST_COMPARE_STRING (mem.buffer, input);
|
||||
+
|
||||
+ free (mem.buffer);
|
||||
+ free (input);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#include <support/test-driver.c>
|
@ -0,0 +1,144 @@
|
||||
From 436a604b7dc741fc76b5a6704c6cd8bb178518e7 Mon Sep 17 00:00:00 2001
|
||||
From: Adam Yi <ayi@janestreet.com>
|
||||
Date: Tue, 7 Mar 2023 07:30:02 -0500
|
||||
Subject: posix: Fix system blocks SIGCHLD erroneously [BZ #30163]
|
||||
|
||||
Fix bug that SIGCHLD is erroneously blocked forever in the following
|
||||
scenario:
|
||||
|
||||
1. Thread A calls system but hasn't returned yet
|
||||
2. Thread B calls another system but returns
|
||||
|
||||
SIGCHLD would be blocked forever in thread B after its system() returns,
|
||||
even after the system() in thread A returns.
|
||||
|
||||
Although POSIX does not require, glibc system implementation aims to be
|
||||
thread and cancellation safe. This bug was introduced in
|
||||
5fb7fc96350575c9adb1316833e48ca11553be49 when we moved reverting signal
|
||||
mask to happen when the last concurrently running system returns,
|
||||
despite that signal mask is per thread. This commit reverts this logic
|
||||
and adds a test.
|
||||
|
||||
Signed-off-by: Adam Yi <ayi@janestreet.com>
|
||||
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
|
||||
[DJ: Edited to use integer sleep() instead of nanosleep() dependency rabbit hole]
|
||||
diff --git a/stdlib/tst-system.c b/stdlib/tst-system.c
|
||||
index 634acfe264..47a0afe6bf 100644
|
||||
--- a/stdlib/tst-system.c
|
||||
+++ b/stdlib/tst-system.c
|
||||
@@ -25,6 +25,7 @@
|
||||
#include <support/check.h>
|
||||
#include <support/temp_file.h>
|
||||
#include <support/support.h>
|
||||
+#include <support/xthread.h>
|
||||
#include <support/xunistd.h>
|
||||
|
||||
static char *tmpdir;
|
||||
@@ -71,6 +72,20 @@ call_system (void *closure)
|
||||
}
|
||||
}
|
||||
|
||||
+static void *
|
||||
+sleep_and_check_sigchld (void *closure)
|
||||
+{
|
||||
+ double *seconds = (double *) closure;
|
||||
+ char cmd[namemax];
|
||||
+ sprintf (cmd, "sleep %lf" , *seconds);
|
||||
+ TEST_COMPARE (system (cmd), 0);
|
||||
+
|
||||
+ sigset_t blocked = {0};
|
||||
+ TEST_COMPARE (sigprocmask (SIG_BLOCK, NULL, &blocked), 0);
|
||||
+ TEST_COMPARE (sigismember (&blocked, SIGCHLD), 0);
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
static int
|
||||
do_test (void)
|
||||
{
|
||||
@@ -154,6 +169,17 @@ do_test (void)
|
||||
xchmod (_PATH_BSHELL, st.st_mode);
|
||||
}
|
||||
|
||||
+ {
|
||||
+ pthread_t long_sleep_thread = xpthread_create (NULL,
|
||||
+ sleep_and_check_sigchld,
|
||||
+ &(double) { 2 });
|
||||
+ pthread_t short_sleep_thread = xpthread_create (NULL,
|
||||
+ sleep_and_check_sigchld,
|
||||
+ &(double) { 1 });
|
||||
+ xpthread_join (short_sleep_thread);
|
||||
+ xpthread_join (long_sleep_thread);
|
||||
+ }
|
||||
+
|
||||
TEST_COMPARE (system (""), 0);
|
||||
|
||||
return 0;
|
||||
diff --git a/support/shell-container.c b/support/shell-container.c
|
||||
index ffa3378b5e..b1f9e793c1 100644
|
||||
--- a/support/shell-container.c
|
||||
+++ b/support/shell-container.c
|
||||
@@ -169,6 +170,31 @@ kill_func (char **argv)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+/* Emulate the "/bin/sleep" command. No suffix support. Options are
|
||||
+ ignored. */
|
||||
+static int
|
||||
+sleep_func (char **argv)
|
||||
+{
|
||||
+ if (argv[0] == NULL)
|
||||
+ {
|
||||
+ fprintf (stderr, "sleep: missing operand\n");
|
||||
+ return 1;
|
||||
+ }
|
||||
+ char *endptr = NULL;
|
||||
+ long sec = strtol (argv[0], &endptr, 0);
|
||||
+ if (endptr == argv[0] || errno == ERANGE || sec < 0)
|
||||
+ {
|
||||
+ fprintf (stderr, "sleep: invalid time interval '%s'\n", argv[0]);
|
||||
+ return 1;
|
||||
+ }
|
||||
+ if (sleep (sec) < 0)
|
||||
+ {
|
||||
+ fprintf (stderr, "sleep: failed to nanosleep\n");
|
||||
+ return 1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/* This is a list of all the built-in commands we understand. */
|
||||
static struct {
|
||||
const char *name;
|
||||
@@ -179,6 +206,7 @@ static struct {
|
||||
{ "cp", copy_func },
|
||||
{ "exit", exit_func },
|
||||
{ "kill", kill_func },
|
||||
+ { "sleep", sleep_func },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
|
||||
diff --git a/sysdeps/posix/system.c b/sysdeps/posix/system.c
|
||||
index 2335a99184..d77720a625 100644
|
||||
--- a/sysdeps/posix/system.c
|
||||
+++ b/sysdeps/posix/system.c
|
||||
@@ -179,16 +179,16 @@ do_system (const char *line)
|
||||
as if the shell had terminated using _exit(127). */
|
||||
status = W_EXITCODE (127, 0);
|
||||
|
||||
+ /* sigaction can not fail with SIGINT/SIGQUIT used with old
|
||||
+ disposition. Same applies for sigprocmask. */
|
||||
DO_LOCK ();
|
||||
if (SUB_REF () == 0)
|
||||
{
|
||||
- /* sigaction can not fail with SIGINT/SIGQUIT used with old
|
||||
- disposition. Same applies for sigprocmask. */
|
||||
__sigaction (SIGINT, &intr, NULL);
|
||||
__sigaction (SIGQUIT, &quit, NULL);
|
||||
- __sigprocmask (SIG_SETMASK, &omask, NULL);
|
||||
}
|
||||
DO_UNLOCK ();
|
||||
+ __sigprocmask (SIG_SETMASK, &omask, NULL);
|
||||
|
||||
if (ret != 0)
|
||||
__set_errno (ret);
|
@ -0,0 +1,27 @@
|
||||
From d03094649d39949a30513bf3ffb03a28fecbccd8 Mon Sep 17 00:00:00 2001
|
||||
From: Adam Yi <ayi@janestreet.com>
|
||||
Date: Wed, 8 Mar 2023 03:11:47 -0500
|
||||
Subject: hurd: fix build of tst-system.c
|
||||
|
||||
We made tst-system.c depend on pthread, but that requires linking with
|
||||
$(shared-thread-library). It does not fail under Linux because the
|
||||
variable expands to nothing under Linux, but it fails for Hurd.
|
||||
|
||||
I tested verified via cross-compiling that "make check" now works
|
||||
for Hurd.
|
||||
|
||||
Signed-off-by: Adam Yi <ayi@janestreet.com>
|
||||
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
|
||||
[DJ: Edited for RHEL 8]
|
||||
diff -rup a/stdlib/Makefile b/stdlib/Makefile
|
||||
--- a/stdlib/Makefile 2023-07-07 00:44:55.810981644 -0400
|
||||
+++ b/stdlib/Makefile 2023-07-07 00:46:47.541411091 -0400
|
||||
@@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-threa
|
||||
LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
|
||||
LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
|
||||
LDLIBS-test-on_exit-race = $(shared-thread-library)
|
||||
+LDLIBS-tst-system = $(shared-thread-library)
|
||||
|
||||
LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
|
||||
LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)
|
@ -0,0 +1,42 @@
|
||||
This patch is a RHEL-only patch which modifies the custom changes
|
||||
in the previous patches in this series to make the test case look
|
||||
more like the upstream test case.
|
||||
|
||||
diff -rup a/stdlib/tst-system.c b/stdlib/tst-system.c
|
||||
--- a/stdlib/tst-system.c 2023-07-10 13:37:53.089505036 -0400
|
||||
+++ b/stdlib/tst-system.c 2023-07-10 14:04:03.922610279 -0400
|
||||
@@ -173,10 +173,10 @@ do_test (void)
|
||||
{
|
||||
pthread_t long_sleep_thread = xpthread_create (NULL,
|
||||
sleep_and_check_sigchld,
|
||||
- &(double) { 2 });
|
||||
+ &(double) { 0.2 });
|
||||
pthread_t short_sleep_thread = xpthread_create (NULL,
|
||||
sleep_and_check_sigchld,
|
||||
- &(double) { 1 });
|
||||
+ &(double) { 0.1 });
|
||||
xpthread_join (short_sleep_thread);
|
||||
xpthread_join (long_sleep_thread);
|
||||
}
|
||||
diff -rup a/support/shell-container.c b/support/shell-container.c
|
||||
--- a/support/shell-container.c 2023-07-10 13:37:53.089505036 -0400
|
||||
+++ b/support/shell-container.c 2023-07-10 14:03:20.392920627 -0400
|
||||
@@ -182,15 +182,15 @@ sleep_func (char **argv)
|
||||
return 1;
|
||||
}
|
||||
char *endptr = NULL;
|
||||
- long sec = strtol (argv[0], &endptr, 0);
|
||||
+ double sec = strtod (argv[0], &endptr);
|
||||
if (endptr == argv[0] || errno == ERANGE || sec < 0)
|
||||
{
|
||||
fprintf (stderr, "sleep: invalid time interval '%s'\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
- if (sleep (sec) < 0)
|
||||
+ if (usleep ((useconds_t)(sec * 1000000.0)) < 0)
|
||||
{
|
||||
- fprintf (stderr, "sleep: failed to nanosleep\n");
|
||||
+ fprintf (stderr, "sleep: failed to usleep: %s\n", strerror (errno));
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
@ -0,0 +1,70 @@
|
||||
commit 801af9fafd4689337ebf27260aa115335a0cb2bc
|
||||
Author: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
|
||||
Date: Sat Feb 4 14:41:38 2023 +0300
|
||||
|
||||
gmon: Fix allocated buffer overflow (bug 29444)
|
||||
|
||||
The `__monstartup()` allocates a buffer used to store all the data
|
||||
accumulated by the monitor.
|
||||
|
||||
The size of this buffer depends on the size of the internal structures
|
||||
used and the address range for which the monitor is activated, as well
|
||||
as on the maximum density of call instructions and/or callable functions
|
||||
that could be potentially on a segment of executable code.
|
||||
|
||||
In particular a hash table of arcs is placed at the end of this buffer.
|
||||
The size of this hash table is calculated in bytes as
|
||||
p->fromssize = p->textsize / HASHFRACTION;
|
||||
|
||||
but actually should be
|
||||
p->fromssize = ROUNDUP(p->textsize / HASHFRACTION, sizeof(*p->froms));
|
||||
|
||||
This results in writing beyond the end of the allocated buffer when an
|
||||
added arc corresponds to a call near from the end of the monitored
|
||||
address range, since `_mcount()` check the incoming caller address for
|
||||
monitored range but not the intermediate result hash-like index that
|
||||
uses to write into the table.
|
||||
|
||||
It should be noted that when the results are output to `gmon.out`, the
|
||||
table is read to the last element calculated from the allocated size in
|
||||
bytes, so the arcs stored outside the buffer boundary did not fall into
|
||||
`gprof` for analysis. Thus this "feature" help me to found this bug
|
||||
during working with https://sourceware.org/bugzilla/show_bug.cgi?id=29438
|
||||
|
||||
Just in case, I will explicitly note that the problem breaks the
|
||||
`make test t=gmon/tst-gmon-dso` added for Bug 29438.
|
||||
There, the arc of the `f3()` call disappears from the output, since in
|
||||
the DSO case, the call to `f3` is located close to the end of the
|
||||
monitored range.
|
||||
|
||||
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
|
||||
|
||||
Another minor error seems a related typo in the calculation of
|
||||
`kcountsize`, but since kcounts are smaller than froms, this is
|
||||
actually to align the p->froms data.
|
||||
|
||||
Co-authored-by: DJ Delorie <dj@redhat.com>
|
||||
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
|
||||
|
||||
diff --git a/gmon/gmon.c b/gmon/gmon.c
|
||||
index dee64803ada583d7..bf76358d5b1aa2da 100644
|
||||
--- a/gmon/gmon.c
|
||||
+++ b/gmon/gmon.c
|
||||
@@ -132,6 +132,8 @@ __monstartup (u_long lowpc, u_long highpc)
|
||||
p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER));
|
||||
p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER));
|
||||
p->textsize = p->highpc - p->lowpc;
|
||||
+ /* This looks like a typo, but it's here to align the p->froms
|
||||
+ section. */
|
||||
p->kcountsize = ROUNDUP(p->textsize / HISTFRACTION, sizeof(*p->froms));
|
||||
p->hashfraction = HASHFRACTION;
|
||||
p->log_hashfraction = -1;
|
||||
@@ -142,7 +144,7 @@ __monstartup (u_long lowpc, u_long highpc)
|
||||
instead of integer division. Precompute shift amount. */
|
||||
p->log_hashfraction = ffs(p->hashfraction * sizeof(*p->froms)) - 1;
|
||||
}
|
||||
- p->fromssize = p->textsize / HASHFRACTION;
|
||||
+ p->fromssize = ROUNDUP(p->textsize / HASHFRACTION, sizeof(*p->froms));
|
||||
p->tolimit = p->textsize * ARCDENSITY / 100;
|
||||
if (p->tolimit < MINARCS)
|
||||
p->tolimit = MINARCS;
|
@ -0,0 +1,477 @@
|
||||
This patch adds the required @order directives to preserve the
|
||||
GLIBC_PRIVATE ABI.
|
||||
|
||||
commit 31be941e4367c001b2009308839db5c67bf9dcbc
|
||||
Author: Simon Kissane <skissane@gmail.com>
|
||||
Date: Sat Feb 11 20:12:13 2023 +1100
|
||||
|
||||
gmon: improve mcount overflow handling [BZ# 27576]
|
||||
|
||||
When mcount overflows, no gmon.out file is generated, but no message is printed
|
||||
to the user, leaving the user with no idea why, and thinking maybe there is
|
||||
some bug - which is how BZ 27576 ended up being logged. Print a message to
|
||||
stderr in this case so the user knows what is going on.
|
||||
|
||||
As a comment in sys/gmon.h acknowledges, the hardcoded MAXARCS value is too
|
||||
small for some large applications, including the test case in that BZ. Rather
|
||||
than increase it, add tunables to enable MINARCS and MAXARCS to be overridden
|
||||
at runtime (glibc.gmon.minarcs and glibc.gmon.maxarcs). So if a user gets the
|
||||
mcount overflow error, they can try increasing maxarcs (they might need to
|
||||
increase minarcs too if the heuristic is wrong in their case.)
|
||||
|
||||
Note setting minarcs/maxarcs too large can cause monstartup to fail with an
|
||||
out of memory error. If you set them large enough, it can cause an integer
|
||||
overflow in calculating the buffer size. I haven't done anything to defend
|
||||
against that - it would not generally be a security vulnerability, since these
|
||||
tunables will be ignored in suid/sgid programs (due to the SXID_ERASE default),
|
||||
and if you can set GLIBC_TUNABLES in the environment of a process, you can take
|
||||
it over anyway (LD_PRELOAD, LD_LIBRARY_PATH, etc). I thought about modifying
|
||||
the code of monstartup to defend against integer overflows, but doing so is
|
||||
complicated, and I realise the existing code is susceptible to them even prior
|
||||
to this change (e.g. try passing a pathologically large highpc argument to
|
||||
monstartup), so I decided just to leave that possibility in-place.
|
||||
|
||||
Add a test case which demonstrates mcount overflow and the tunables.
|
||||
|
||||
Document the new tunables in the manual.
|
||||
|
||||
Signed-off-by: Simon Kissane <skissane@gmail.com>
|
||||
Reviewed-by: DJ Delorie <dj@redhat.com>
|
||||
|
||||
Conflicts:
|
||||
manual/tunables.texi
|
||||
(missing tunables downstream)
|
||||
|
||||
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
|
||||
index f11ca5b3e8b09b43..dc2999796042dbaf 100644
|
||||
--- a/elf/dl-tunables.list
|
||||
+++ b/elf/dl-tunables.list
|
||||
@@ -149,4 +149,17 @@ glibc {
|
||||
default: 2
|
||||
}
|
||||
}
|
||||
+
|
||||
+ gmon {
|
||||
+ minarcs {
|
||||
+ type: INT_32
|
||||
+ minval: 50
|
||||
+ default: 50
|
||||
+ }
|
||||
+ maxarcs {
|
||||
+ type: INT_32
|
||||
+ minval: 50
|
||||
+ default: 1048576
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
diff --git a/gmon/Makefile b/gmon/Makefile
|
||||
index d94593c9d8a882eb..54f05894d4dd8c4a 100644
|
||||
--- a/gmon/Makefile
|
||||
+++ b/gmon/Makefile
|
||||
@@ -25,7 +25,7 @@ include ../Makeconfig
|
||||
headers := sys/gmon.h sys/gmon_out.h sys/profil.h
|
||||
routines := gmon mcount profil sprofil prof-freq
|
||||
|
||||
-tests = tst-sprofil tst-gmon
|
||||
+tests = tst-sprofil tst-gmon tst-mcount-overflow
|
||||
ifeq ($(build-profile),yes)
|
||||
tests += tst-profile-static
|
||||
tests-static += tst-profile-static
|
||||
@@ -56,6 +56,18 @@ ifeq ($(run-built-tests),yes)
|
||||
tests-special += $(objpfx)tst-gmon-gprof.out
|
||||
endif
|
||||
|
||||
+CFLAGS-tst-mcount-overflow.c := -fno-omit-frame-pointer -pg
|
||||
+tst-mcount-overflow-no-pie = yes
|
||||
+CRT-tst-mcount-overflow := $(csu-objpfx)g$(start-installed-name)
|
||||
+# Intentionally use invalid config where maxarcs<minarcs to check warning is printed
|
||||
+tst-mcount-overflow-ENV := GMON_OUT_PREFIX=$(objpfx)tst-mcount-overflow.data \
|
||||
+ GLIBC_TUNABLES=glibc.gmon.minarcs=51:glibc.gmon.maxarcs=50
|
||||
+# Send stderr into output file because we make sure expected messages are printed
|
||||
+tst-mcount-overflow-ARGS := 2>&1 1>/dev/null | cat
|
||||
+ifeq ($(run-built-tests),yes)
|
||||
+tests-special += $(objpfx)tst-mcount-overflow-check.out
|
||||
+endif
|
||||
+
|
||||
CFLAGS-tst-gmon-static.c := $(PIE-ccflag) -fno-omit-frame-pointer -pg
|
||||
CRT-tst-gmon-static := $(csu-objpfx)gcrt1.o
|
||||
tst-gmon-static-no-pie = yes
|
||||
@@ -103,6 +115,14 @@ $(objpfx)tst-gmon.out: clean-tst-gmon-data
|
||||
clean-tst-gmon-data:
|
||||
rm -f $(objpfx)tst-gmon.data.*
|
||||
|
||||
+$(objpfx)tst-mcount-overflow.o: clean-tst-mcount-overflow-data
|
||||
+clean-tst-mcount-overflow-data:
|
||||
+ rm -f $(objpfx)tst-mcount-overflow.data.*
|
||||
+
|
||||
+$(objpfx)tst-mcount-overflow-check.out: tst-mcount-overflow-check.sh $(objpfx)tst-mcount-overflow.out
|
||||
+ $(SHELL) $< $(objpfx)tst-mcount-overflow > $@; \
|
||||
+ $(evaluate-test)
|
||||
+
|
||||
$(objpfx)tst-gmon-gprof.out: tst-gmon-gprof.sh $(objpfx)tst-gmon.out
|
||||
$(SHELL) $< $(GPROF) $(objpfx)tst-gmon $(objpfx)tst-gmon.data.* > $@; \
|
||||
$(evaluate-test)
|
||||
diff --git a/gmon/gmon.c b/gmon/gmon.c
|
||||
index bf76358d5b1aa2da..689bf80141e559ca 100644
|
||||
--- a/gmon/gmon.c
|
||||
+++ b/gmon/gmon.c
|
||||
@@ -46,6 +46,11 @@
|
||||
#include <libc-internal.h>
|
||||
#include <not-cancel.h>
|
||||
|
||||
+#if HAVE_TUNABLES
|
||||
+# define TUNABLE_NAMESPACE gmon
|
||||
+# include <elf/dl-tunables.h>
|
||||
+#endif
|
||||
+
|
||||
#ifdef PIC
|
||||
# include <link.h>
|
||||
|
||||
@@ -124,6 +129,22 @@ __monstartup (u_long lowpc, u_long highpc)
|
||||
int o;
|
||||
char *cp;
|
||||
struct gmonparam *p = &_gmonparam;
|
||||
+ long int minarcs, maxarcs;
|
||||
+
|
||||
+#if HAVE_TUNABLES
|
||||
+ /* Read minarcs/maxarcs tunables. */
|
||||
+ minarcs = TUNABLE_GET (minarcs, int32_t, NULL);
|
||||
+ maxarcs = TUNABLE_GET (maxarcs, int32_t, NULL);
|
||||
+ if (maxarcs < minarcs)
|
||||
+ {
|
||||
+ ERR("monstartup: maxarcs < minarcs, setting maxarcs = minarcs\n");
|
||||
+ maxarcs = minarcs;
|
||||
+ }
|
||||
+#else
|
||||
+ /* No tunables, we use hardcoded defaults */
|
||||
+ minarcs = MINARCS;
|
||||
+ maxarcs = MAXARCS;
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* round lowpc and highpc to multiples of the density we're using
|
||||
@@ -146,10 +167,10 @@ __monstartup (u_long lowpc, u_long highpc)
|
||||
}
|
||||
p->fromssize = ROUNDUP(p->textsize / HASHFRACTION, sizeof(*p->froms));
|
||||
p->tolimit = p->textsize * ARCDENSITY / 100;
|
||||
- if (p->tolimit < MINARCS)
|
||||
- p->tolimit = MINARCS;
|
||||
- else if (p->tolimit > MAXARCS)
|
||||
- p->tolimit = MAXARCS;
|
||||
+ if (p->tolimit < minarcs)
|
||||
+ p->tolimit = minarcs;
|
||||
+ else if (p->tolimit > maxarcs)
|
||||
+ p->tolimit = maxarcs;
|
||||
p->tossize = p->tolimit * sizeof(struct tostruct);
|
||||
|
||||
cp = calloc (p->kcountsize + p->fromssize + p->tossize, 1);
|
||||
diff --git a/gmon/mcount.c b/gmon/mcount.c
|
||||
index 9d4a1a50fa6ab21a..f7180fdb83399a14 100644
|
||||
--- a/gmon/mcount.c
|
||||
+++ b/gmon/mcount.c
|
||||
@@ -41,6 +41,10 @@ static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
|
||||
|
||||
#include <atomic.h>
|
||||
|
||||
+#include <not-cancel.h>
|
||||
+#include <unistd.h>
|
||||
+#define ERR(s) __write_nocancel (STDERR_FILENO, s, sizeof (s) - 1)
|
||||
+
|
||||
/*
|
||||
* mcount is called on entry to each function compiled with the profiling
|
||||
* switch set. _mcount(), which is declared in a machine-dependent way
|
||||
@@ -170,6 +174,7 @@ done:
|
||||
return;
|
||||
overflow:
|
||||
p->state = GMON_PROF_ERROR;
|
||||
+ ERR("mcount: call graph buffer size limit exceeded, gmon.out will not be generated\n");
|
||||
return;
|
||||
}
|
||||
|
||||
diff --git a/gmon/sys/gmon.h b/gmon/sys/gmon.h
|
||||
index b4cc3b043a2aec77..af0582a3717085b5 100644
|
||||
--- a/gmon/sys/gmon.h
|
||||
+++ b/gmon/sys/gmon.h
|
||||
@@ -111,6 +111,8 @@ extern struct __bb *__bb_head;
|
||||
* Always allocate at least this many tostructs. This
|
||||
* hides the inadequacy of the ARCDENSITY heuristic, at least
|
||||
* for small programs.
|
||||
+ *
|
||||
+ * Value can be overridden at runtime by glibc.gmon.minarcs tunable.
|
||||
*/
|
||||
#define MINARCS 50
|
||||
|
||||
@@ -124,8 +126,8 @@ extern struct __bb *__bb_head;
|
||||
* Used to be max representable value of ARCINDEX minus 2, but now
|
||||
* that ARCINDEX is a long, that's too large; we don't really want
|
||||
* to allow a 48 gigabyte table.
|
||||
- * The old value of 1<<16 wasn't high enough in practice for large C++
|
||||
- * programs; will 1<<20 be adequate for long? FIXME
|
||||
+ *
|
||||
+ * Value can be overridden at runtime by glibc.gmon.maxarcs tunable.
|
||||
*/
|
||||
#define MAXARCS (1 << 20)
|
||||
|
||||
diff --git a/gmon/tst-mcount-overflow-check.sh b/gmon/tst-mcount-overflow-check.sh
|
||||
new file mode 100644
|
||||
index 0000000000000000..27eb5538fd573a6e
|
||||
--- /dev/null
|
||||
+++ b/gmon/tst-mcount-overflow-check.sh
|
||||
@@ -0,0 +1,45 @@
|
||||
+#!/bin/sh
|
||||
+# Test expected messages generated when mcount overflows
|
||||
+# Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
||||
+# Copyright The GNU Toolchain Authors.
|
||||
+# This file is part of the GNU C Library.
|
||||
+
|
||||
+# The GNU C Library is free software; you can redistribute it and/or
|
||||
+# modify it under the terms of the GNU Lesser General Public
|
||||
+# License as published by the Free Software Foundation; either
|
||||
+# version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+# The GNU C Library is distributed in the hope that it will be useful,
|
||||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+# Lesser General Public License for more details.
|
||||
+
|
||||
+# You should have received a copy of the GNU Lesser General Public
|
||||
+# License along with the GNU C Library; if not, see
|
||||
+# <https://www.gnu.org/licenses/>.
|
||||
+
|
||||
+LC_ALL=C
|
||||
+export LC_ALL
|
||||
+set -e
|
||||
+exec 2>&1
|
||||
+
|
||||
+program="$1"
|
||||
+
|
||||
+check_msg() {
|
||||
+ if ! grep -q "$1" "$program.out"; then
|
||||
+ echo "FAIL: expected message not in output: $1"
|
||||
+ exit 1
|
||||
+ fi
|
||||
+}
|
||||
+
|
||||
+check_msg 'monstartup: maxarcs < minarcs, setting maxarcs = minarcs'
|
||||
+check_msg 'mcount: call graph buffer size limit exceeded, gmon.out will not be generated'
|
||||
+
|
||||
+for data_file in $1.data.*; do
|
||||
+ if [ -f "$data_file" ]; then
|
||||
+ echo "FAIL: expected no data files, but found $data_file"
|
||||
+ exit 1
|
||||
+ fi
|
||||
+done
|
||||
+
|
||||
+echo PASS
|
||||
diff --git a/gmon/tst-mcount-overflow.c b/gmon/tst-mcount-overflow.c
|
||||
new file mode 100644
|
||||
index 0000000000000000..06cc93ef872eb7c1
|
||||
--- /dev/null
|
||||
+++ b/gmon/tst-mcount-overflow.c
|
||||
@@ -0,0 +1,72 @@
|
||||
+/* Test program to trigger mcount overflow in profiling collection.
|
||||
+ Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Program with sufficiently complex, yet pointless, call graph
|
||||
+ that it will trigger an mcount overflow, when you set the
|
||||
+ minarcs/maxarcs tunables to very low values. */
|
||||
+
|
||||
+#define PREVENT_TAIL_CALL asm volatile ("")
|
||||
+
|
||||
+/* Calls REP(n) macro 16 times, for n=0..15.
|
||||
+ * You need to define REP(n) before using this.
|
||||
+ */
|
||||
+#define REPS \
|
||||
+ REP(0) REP(1) REP(2) REP(3) REP(4) REP(5) REP(6) REP(7) \
|
||||
+ REP(8) REP(9) REP(10) REP(11) REP(12) REP(13) REP(14) REP(15)
|
||||
+
|
||||
+/* Defines 16 leaf functions named f1_0 to f1_15 */
|
||||
+#define REP(n) \
|
||||
+ __attribute__ ((noinline, noclone, weak)) void f1_##n (void) {};
|
||||
+REPS
|
||||
+#undef REP
|
||||
+
|
||||
+/* Calls all 16 leaf functions f1_* in succession */
|
||||
+__attribute__ ((noinline, noclone, weak)) void
|
||||
+f2 (void)
|
||||
+{
|
||||
+# define REP(n) f1_##n();
|
||||
+ REPS
|
||||
+# undef REP
|
||||
+ PREVENT_TAIL_CALL;
|
||||
+}
|
||||
+
|
||||
+/* Defines 16 functions named f2_0 to f2_15, which all just call f2 */
|
||||
+#define REP(n) \
|
||||
+ __attribute__ ((noinline, noclone, weak)) void \
|
||||
+ f2_##n (void) { f2(); PREVENT_TAIL_CALL; };
|
||||
+REPS
|
||||
+#undef REP
|
||||
+
|
||||
+__attribute__ ((noinline, noclone, weak)) void
|
||||
+f3 (int count)
|
||||
+{
|
||||
+ for (int i = 0; i < count; ++i)
|
||||
+ {
|
||||
+ /* Calls f1_0(), f2_0(), f1_1(), f2_1(), f3_0(), etc */
|
||||
+# define REP(n) f1_##n(); f2_##n();
|
||||
+ REPS
|
||||
+# undef REP
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+main (void)
|
||||
+{
|
||||
+ f3 (1000);
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/manual/tunables.texi b/manual/tunables.texi
|
||||
index 7b70e80391ee87f7..00eafcf44b562b9e 100644
|
||||
--- a/manual/tunables.texi
|
||||
+++ b/manual/tunables.texi
|
||||
@@ -73,6 +73,9 @@ glibc.malloc.check: 0 (min: 0, max: 3)
|
||||
* Elision Tunables:: Tunables in elision subsystem
|
||||
* Hardware Capability Tunables:: Tunables that modify the hardware
|
||||
capabilities seen by @theglibc{}
|
||||
+* gmon Tunables:: Tunables that control the gmon profiler, used in
|
||||
+ conjunction with gprof
|
||||
+
|
||||
@end menu
|
||||
|
||||
@node Tunable names
|
||||
@@ -506,3 +509,59 @@ instead.
|
||||
|
||||
This tunable is specific to i386 and x86-64.
|
||||
@end deftp
|
||||
+
|
||||
+@node gmon Tunables
|
||||
+@section gmon Tunables
|
||||
+@cindex gmon tunables
|
||||
+
|
||||
+@deftp {Tunable namespace} glibc.gmon
|
||||
+This tunable namespace affects the behaviour of the gmon profiler.
|
||||
+gmon is a component of @theglibc{} which is normally used in
|
||||
+conjunction with gprof.
|
||||
+
|
||||
+When GCC compiles a program with the @code{-pg} option, it instruments
|
||||
+the program with calls to the @code{mcount} function, to record the
|
||||
+program's call graph. At program startup, a memory buffer is allocated
|
||||
+to store this call graph; the size of the buffer is calculated using a
|
||||
+heuristic based on code size. If during execution, the buffer is found
|
||||
+to be too small, profiling will be aborted and no @file{gmon.out} file
|
||||
+will be produced. In that case, you will see the following message
|
||||
+printed to standard error:
|
||||
+
|
||||
+@example
|
||||
+mcount: call graph buffer size limit exceeded, gmon.out will not be generated
|
||||
+@end example
|
||||
+
|
||||
+Most of the symbols discussed in this section are defined in the header
|
||||
+@code{sys/gmon.h}. However, some symbols (for example @code{mcount})
|
||||
+are not defined in any header file, since they are only intended to be
|
||||
+called from code generated by the compiler.
|
||||
+@end deftp
|
||||
+
|
||||
+@deftp Tunable glibc.mem.minarcs
|
||||
+The heuristic for sizing the call graph buffer is known to be
|
||||
+insufficient for small programs; hence, the calculated value is clamped
|
||||
+to be at least a minimum size. The default minimum (in units of
|
||||
+call graph entries, @code{struct tostruct}), is given by the macro
|
||||
+@code{MINARCS}. If you have some program with an unusually complex
|
||||
+call graph, for which the heuristic fails to allocate enough space,
|
||||
+you can use this tunable to increase the minimum to a larger value.
|
||||
+@end deftp
|
||||
+
|
||||
+@deftp Tunable glibc.mem.maxarcs
|
||||
+To prevent excessive memory consumption when profiling very large
|
||||
+programs, the call graph buffer is allowed to have a maximum of
|
||||
+@code{MAXARCS} entries. For some very large programs, the default
|
||||
+value of @code{MAXARCS} defined in @file{sys/gmon.h} is too small; in
|
||||
+that case, you can use this tunable to increase it.
|
||||
+
|
||||
+Note the value of the @code{maxarcs} tunable must be greater or equal
|
||||
+to that of the @code{minarcs} tunable; if this constraint is violated,
|
||||
+a warning will printed to standard error at program startup, and
|
||||
+the @code{minarcs} value will be used as the maximum as well.
|
||||
+
|
||||
+Setting either tunable too high may result in a call graph buffer
|
||||
+whose size exceeds the available memory; in that case, an out of memory
|
||||
+error will be printed at program startup, the profiler will be
|
||||
+disabled, and no @file{gmon.out} file will be generated.
|
||||
+@end deftp
|
||||
diff --git a/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list b/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list
|
||||
index 5c3c5292025607a1..265f82ef2be42fd0 100644
|
||||
--- a/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list
|
||||
+++ b/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list
|
||||
@@ -24,3 +24,7 @@
|
||||
|
||||
# Tunables added in RHEL 8.8.0
|
||||
@order glibc.rtld.dynamic_sort
|
||||
+
|
||||
+# Tunables added in RHEL 8.9.0
|
||||
+@order glibc.gmon.minarcs
|
||||
+@order glibc.gmon.maxarcs
|
||||
diff --git a/sysdeps/unix/sysv/linux/i386/dl-tunables.list b/sysdeps/unix/sysv/linux/i386/dl-tunables.list
|
||||
index b9cad4af62d9f2e5..9c1ccb86501c61e7 100644
|
||||
--- a/sysdeps/unix/sysv/linux/i386/dl-tunables.list
|
||||
+++ b/sysdeps/unix/sysv/linux/i386/dl-tunables.list
|
||||
@@ -31,3 +31,7 @@
|
||||
|
||||
# Tunables added in RHEL 8.8.0
|
||||
@order glibc.rtld.dynamic_sort
|
||||
+
|
||||
+# Tunables added in RHEL 8.9.0
|
||||
+@order glibc.gmon.minarcs
|
||||
+@order glibc.gmon.maxarcs
|
||||
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list
|
||||
index ee1e6fca95e1f2da..c8bb1a8ec0283ac8 100644
|
||||
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list
|
||||
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list
|
||||
@@ -24,3 +24,7 @@
|
||||
|
||||
# Tunables added in RHEL 8.8.0
|
||||
@order glibc.rtld.dynamic_sort
|
||||
+
|
||||
+# Tunables added in RHEL 8.9.0
|
||||
+@order glibc.gmon.minarcs
|
||||
+@order glibc.gmon.maxarcs
|
||||
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list b/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
|
||||
index 099e28d8f8e67944..85b3a014ffcadc45 100644
|
||||
--- a/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
|
||||
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
|
||||
@@ -23,3 +23,7 @@
|
||||
|
||||
# Tunables added in RHEL 8.8.0
|
||||
@order glibc.rtld.dynamic_sort
|
||||
+
|
||||
+# Tunables added in RHEL 8.9.0
|
||||
+@order glibc.gmon.minarcs
|
||||
+@order glibc.gmon.maxarcs
|
||||
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list b/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list
|
||||
index b9cad4af62d9f2e5..9c1ccb86501c61e7 100644
|
||||
--- a/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list
|
||||
+++ b/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list
|
||||
@@ -31,3 +31,7 @@
|
||||
|
||||
# Tunables added in RHEL 8.8.0
|
||||
@order glibc.rtld.dynamic_sort
|
||||
+
|
||||
+# Tunables added in RHEL 8.9.0
|
||||
+@order glibc.gmon.minarcs
|
||||
+@order glibc.gmon.maxarcs
|
@ -0,0 +1,191 @@
|
||||
commit bde121872001d8f3224eeafa5b7effb871c3fbca
|
||||
Author: Simon Kissane <skissane@gmail.com>
|
||||
Date: Sat Feb 11 08:58:02 2023 +1100
|
||||
|
||||
gmon: fix memory corruption issues [BZ# 30101]
|
||||
|
||||
V2 of this patch fixes an issue in V1, where the state was changed to ON not
|
||||
OFF at end of _mcleanup. I hadn't noticed that (counterintuitively) ON=0 and
|
||||
OFF=3, hence zeroing the buffer turned it back on. So set the state to OFF
|
||||
after the memset.
|
||||
|
||||
1. Prevent double free, and reads from unallocated memory, when
|
||||
_mcleanup is (incorrectly) called two or more times in a row,
|
||||
without an intervening call to __monstartup; with this patch, the
|
||||
second and subsequent calls effectively become no-ops instead.
|
||||
While setting tos=NULL is minimal fix, safest action is to zero the
|
||||
whole gmonparam buffer.
|
||||
|
||||
2. Prevent memory leak when __monstartup is (incorrectly) called two
|
||||
or more times in a row, without an intervening call to _mcleanup;
|
||||
with this patch, the second and subsequent calls effectively become
|
||||
no-ops instead.
|
||||
|
||||
3. After _mcleanup, treat __moncontrol(1) as __moncontrol(0) instead.
|
||||
With zeroing of gmonparam buffer in _mcleanup, this stops the
|
||||
state incorrectly being changed to GMON_PROF_ON despite profiling
|
||||
actually being off. If we'd just done the minimal fix to _mcleanup
|
||||
of setting tos=NULL, there is risk of far worse memory corruption:
|
||||
kcount would point to deallocated memory, and the __profil syscall
|
||||
would make the kernel write profiling data into that memory,
|
||||
which could have since been reallocated to something unrelated.
|
||||
|
||||
4. Ensure __moncontrol(0) still turns off profiling even in error
|
||||
state. Otherwise, if mcount overflows and sets state to
|
||||
GMON_PROF_ERROR, when _mcleanup calls __moncontrol(0), the __profil
|
||||
syscall to disable profiling will not be invoked. _mcleanup will
|
||||
free the buffer, but the kernel will still be writing profiling
|
||||
data into it, potentially corrupted arbitrary memory.
|
||||
|
||||
Also adds a test case for (1). Issues (2)-(4) are not feasible to test.
|
||||
|
||||
Signed-off-by: Simon Kissane <skissane@gmail.com>
|
||||
Reviewed-by: DJ Delorie <dj@redhat.com>
|
||||
|
||||
Conflicts:
|
||||
gmon/Makefile
|
||||
(copyright year update)
|
||||
|
||||
diff --git a/gmon/Makefile b/gmon/Makefile
|
||||
index 54f05894d4dd8c4a..1bc4ad6e14e292a9 100644
|
||||
--- a/gmon/Makefile
|
||||
+++ b/gmon/Makefile
|
||||
@@ -1,4 +1,5 @@
|
||||
-# Copyright (C) 1995-2018 Free Software Foundation, Inc.
|
||||
+# Copyright (C) 1995-2023 Free Software Foundation, Inc.
|
||||
+# Copyright The GNU Toolchain Authors.
|
||||
# This file is part of the GNU C Library.
|
||||
|
||||
# The GNU C Library is free software; you can redistribute it and/or
|
||||
@@ -25,7 +26,7 @@ include ../Makeconfig
|
||||
headers := sys/gmon.h sys/gmon_out.h sys/profil.h
|
||||
routines := gmon mcount profil sprofil prof-freq
|
||||
|
||||
-tests = tst-sprofil tst-gmon tst-mcount-overflow
|
||||
+tests = tst-sprofil tst-gmon tst-mcount-overflow tst-mcleanup
|
||||
ifeq ($(build-profile),yes)
|
||||
tests += tst-profile-static
|
||||
tests-static += tst-profile-static
|
||||
@@ -68,6 +69,14 @@ ifeq ($(run-built-tests),yes)
|
||||
tests-special += $(objpfx)tst-mcount-overflow-check.out
|
||||
endif
|
||||
|
||||
+CFLAGS-tst-mcleanup.c := -fno-omit-frame-pointer -pg
|
||||
+tst-mcleanup-no-pie = yes
|
||||
+CRT-tst-mcleanup := $(csu-objpfx)g$(start-installed-name)
|
||||
+tst-mcleanup-ENV := GMON_OUT_PREFIX=$(objpfx)tst-mcleanup.data
|
||||
+ifeq ($(run-built-tests),yes)
|
||||
+tests-special += $(objpfx)tst-mcleanup.out
|
||||
+endif
|
||||
+
|
||||
CFLAGS-tst-gmon-static.c := $(PIE-ccflag) -fno-omit-frame-pointer -pg
|
||||
CRT-tst-gmon-static := $(csu-objpfx)gcrt1.o
|
||||
tst-gmon-static-no-pie = yes
|
||||
@@ -123,6 +132,10 @@ $(objpfx)tst-mcount-overflow-check.out: tst-mcount-overflow-check.sh $(objpfx)ts
|
||||
$(SHELL) $< $(objpfx)tst-mcount-overflow > $@; \
|
||||
$(evaluate-test)
|
||||
|
||||
+$(objpfx)tst-mcleanup.out: clean-tst-mcleanup-data
|
||||
+clean-tst-mcleanup-data:
|
||||
+ rm -f $(objpfx)tst-mcleanup.data.*
|
||||
+
|
||||
$(objpfx)tst-gmon-gprof.out: tst-gmon-gprof.sh $(objpfx)tst-gmon.out
|
||||
$(SHELL) $< $(GPROF) $(objpfx)tst-gmon $(objpfx)tst-gmon.data.* > $@; \
|
||||
$(evaluate-test)
|
||||
diff --git a/gmon/gmon.c b/gmon/gmon.c
|
||||
index 689bf80141e559ca..5e99a7351dc71666 100644
|
||||
--- a/gmon/gmon.c
|
||||
+++ b/gmon/gmon.c
|
||||
@@ -102,11 +102,8 @@ __moncontrol (int mode)
|
||||
{
|
||||
struct gmonparam *p = &_gmonparam;
|
||||
|
||||
- /* Don't change the state if we ran into an error. */
|
||||
- if (p->state == GMON_PROF_ERROR)
|
||||
- return;
|
||||
-
|
||||
- if (mode)
|
||||
+ /* Treat start request as stop if error or gmon not initialized. */
|
||||
+ if (mode && p->state != GMON_PROF_ERROR && p->tos != NULL)
|
||||
{
|
||||
/* start */
|
||||
__profil((void *) p->kcount, p->kcountsize, p->lowpc, s_scale);
|
||||
@@ -116,7 +113,9 @@ __moncontrol (int mode)
|
||||
{
|
||||
/* stop */
|
||||
__profil(NULL, 0, 0, 0);
|
||||
- p->state = GMON_PROF_OFF;
|
||||
+ /* Don't change the state if we ran into an error. */
|
||||
+ if (p->state != GMON_PROF_ERROR)
|
||||
+ p->state = GMON_PROF_OFF;
|
||||
}
|
||||
}
|
||||
libc_hidden_def (__moncontrol)
|
||||
@@ -146,6 +145,14 @@ __monstartup (u_long lowpc, u_long highpc)
|
||||
maxarcs = MAXARCS;
|
||||
#endif
|
||||
|
||||
+ /*
|
||||
+ * If we are incorrectly called twice in a row (without an
|
||||
+ * intervening call to _mcleanup), ignore the second call to
|
||||
+ * prevent leaking memory.
|
||||
+ */
|
||||
+ if (p->tos != NULL)
|
||||
+ return;
|
||||
+
|
||||
/*
|
||||
* round lowpc and highpc to multiples of the density we're using
|
||||
* so the rest of the scaling (here and in gprof) stays in ints.
|
||||
@@ -463,9 +470,14 @@ _mcleanup (void)
|
||||
{
|
||||
__moncontrol (0);
|
||||
|
||||
- if (_gmonparam.state != GMON_PROF_ERROR)
|
||||
+ if (_gmonparam.state != GMON_PROF_ERROR && _gmonparam.tos != NULL)
|
||||
write_gmon ();
|
||||
|
||||
/* free the memory. */
|
||||
free (_gmonparam.tos);
|
||||
+
|
||||
+ /* reset buffer to initial state for safety */
|
||||
+ memset(&_gmonparam, 0, sizeof _gmonparam);
|
||||
+ /* somewhat confusingly, ON=0, OFF=3 */
|
||||
+ _gmonparam.state = GMON_PROF_OFF;
|
||||
}
|
||||
diff --git a/gmon/tst-mcleanup.c b/gmon/tst-mcleanup.c
|
||||
new file mode 100644
|
||||
index 0000000000000000..b259653ec833aca4
|
||||
--- /dev/null
|
||||
+++ b/gmon/tst-mcleanup.c
|
||||
@@ -0,0 +1,31 @@
|
||||
+/* Test program for repeated invocation of _mcleanup
|
||||
+ Copyright The GNU Toolchain Authors.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Intentionally calls _mcleanup() twice: once manually, it will be
|
||||
+ called again as an atexit handler. This is incorrect use of the API,
|
||||
+ but the point of the test is to make sure we don't crash when the
|
||||
+ API is misused in this way. */
|
||||
+
|
||||
+#include <sys/gmon.h>
|
||||
+
|
||||
+int
|
||||
+main (void)
|
||||
+{
|
||||
+ _mcleanup();
|
||||
+ return 0;
|
||||
+}
|
@ -0,0 +1,216 @@
|
||||
From af992e7abdc9049714da76cae1e5e18bc4838fb8 Mon Sep 17 00:00:00 2001
|
||||
From: Noah Goldstein <goldstein.w.n@gmail.com>
|
||||
Date: Wed, 7 Jun 2023 13:18:01 -0500
|
||||
Subject: [PATCH] x86: Increase `non_temporal_threshold` to roughly `sizeof_L3
|
||||
/ 4`
|
||||
Content-type: text/plain; charset=UTF-8
|
||||
|
||||
Current `non_temporal_threshold` set to roughly '3/4 * sizeof_L3 /
|
||||
ncores_per_socket'. This patch updates that value to roughly
|
||||
'sizeof_L3 / 4`
|
||||
|
||||
The original value (specifically dividing the `ncores_per_socket`) was
|
||||
done to limit the amount of other threads' data a `memcpy`/`memset`
|
||||
could evict.
|
||||
|
||||
Dividing by 'ncores_per_socket', however leads to exceedingly low
|
||||
non-temporal thresholds and leads to using non-temporal stores in
|
||||
cases where REP MOVSB is multiple times faster.
|
||||
|
||||
Furthermore, non-temporal stores are written directly to main memory
|
||||
so using it at a size much smaller than L3 can place soon to be
|
||||
accessed data much further away than it otherwise could be. As well,
|
||||
modern machines are able to detect streaming patterns (especially if
|
||||
REP MOVSB is used) and provide LRU hints to the memory subsystem. This
|
||||
in affect caps the total amount of eviction at 1/cache_associativity,
|
||||
far below meaningfully thrashing the entire cache.
|
||||
|
||||
As best I can tell, the benchmarks that lead this small threshold
|
||||
where done comparing non-temporal stores versus standard cacheable
|
||||
stores. A better comparison (linked below) is to be REP MOVSB which,
|
||||
on the measure systems, is nearly 2x faster than non-temporal stores
|
||||
at the low-end of the previous threshold, and within 10% for over
|
||||
100MB copies (well past even the current threshold). In cases with a
|
||||
low number of threads competing for bandwidth, REP MOVSB is ~2x faster
|
||||
up to `sizeof_L3`.
|
||||
|
||||
The divisor of `4` is a somewhat arbitrary value. From benchmarks it
|
||||
seems Skylake and Icelake both prefer a divisor of `2`, but older CPUs
|
||||
such as Broadwell prefer something closer to `8`. This patch is meant
|
||||
to be followed up by another one to make the divisor cpu-specific, but
|
||||
in the meantime (and for easier backporting), this patch settles on
|
||||
`4` as a middle-ground.
|
||||
|
||||
Benchmarks comparing non-temporal stores, REP MOVSB, and cacheable
|
||||
stores where done using:
|
||||
https://github.com/goldsteinn/memcpy-nt-benchmarks
|
||||
|
||||
Sheets results (also available in pdf on the github):
|
||||
https://docs.google.com/spreadsheets/d/e/2PACX-1vS183r0rW_jRX6tG_E90m9qVuFiMbRIJvi5VAE8yYOvEOIEEc3aSNuEsrFbuXw5c3nGboxMmrupZD7K/pubhtml
|
||||
Reviewed-by: DJ Delorie <dj@redhat.com>
|
||||
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
|
||||
---
|
||||
sysdeps/x86/dl-cacheinfo.h | 70 +++++++++++++++++++++++---------------
|
||||
1 file changed, 43 insertions(+), 27 deletions(-)
|
||||
|
||||
[DJ - ported to C8S]
|
||||
|
||||
diff -rup a/sysdeps/x86/cacheinfo.h b/sysdeps/x86/cacheinfo.h
|
||||
--- a/sysdeps/x86/cacheinfo.h 2023-08-08 11:54:09.969791421 -0400
|
||||
+++ b/sysdeps/x86/cacheinfo.h 2023-08-08 13:44:55.185333601 -0400
|
||||
@@ -46,7 +46,7 @@ long int __x86_rep_movsb_threshold attri
|
||||
long int __x86_rep_stosb_threshold attribute_hidden = 2048;
|
||||
|
||||
static void
|
||||
-get_common_cache_info (long int *shared_ptr, unsigned int *threads_ptr,
|
||||
+get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, unsigned int *threads_ptr,
|
||||
long int core)
|
||||
{
|
||||
unsigned int eax;
|
||||
@@ -65,6 +65,7 @@ get_common_cache_info (long int *shared_
|
||||
unsigned int family = cpu_features->basic.family;
|
||||
unsigned int model = cpu_features->basic.model;
|
||||
long int shared = *shared_ptr;
|
||||
+ long int shared_per_thread = *shared_per_thread_ptr;
|
||||
unsigned int threads = *threads_ptr;
|
||||
bool inclusive_cache = true;
|
||||
bool support_count_mask = true;
|
||||
@@ -80,6 +81,7 @@ get_common_cache_info (long int *shared_
|
||||
/* Try L2 otherwise. */
|
||||
level = 2;
|
||||
shared = core;
|
||||
+ shared_per_thread = core;
|
||||
threads_l2 = 0;
|
||||
threads_l3 = -1;
|
||||
}
|
||||
@@ -236,29 +238,28 @@ get_common_cache_info (long int *shared_
|
||||
}
|
||||
else
|
||||
{
|
||||
-intel_bug_no_cache_info:
|
||||
- /* Assume that all logical threads share the highest cache
|
||||
- level. */
|
||||
- threads
|
||||
- = ((cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx
|
||||
- >> 16) & 0xff);
|
||||
- }
|
||||
-
|
||||
- /* Cap usage of highest cache level to the number of supported
|
||||
- threads. */
|
||||
- if (shared > 0 && threads > 0)
|
||||
- shared /= threads;
|
||||
+ intel_bug_no_cache_info:
|
||||
+ /* Assume that all logical threads share the highest cache
|
||||
+ level. */
|
||||
+ threads = ((cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx >> 16)
|
||||
+ & 0xff);
|
||||
+
|
||||
+ /* Get per-thread size of highest level cache. */
|
||||
+ if (shared_per_thread > 0 && threads > 0)
|
||||
+ shared_per_thread /= threads;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Account for non-inclusive L2 and L3 caches. */
|
||||
if (!inclusive_cache)
|
||||
{
|
||||
if (threads_l2 > 0)
|
||||
- core /= threads_l2;
|
||||
+ shared_per_thread += core / threads_l2;
|
||||
shared += core;
|
||||
}
|
||||
|
||||
*shared_ptr = shared;
|
||||
+ *shared_per_thread_ptr = shared_per_thread;
|
||||
*threads_ptr = threads;
|
||||
}
|
||||
|
||||
@@ -272,6 +273,7 @@ init_cacheinfo (void)
|
||||
int max_cpuid_ex;
|
||||
long int data = -1;
|
||||
long int shared = -1;
|
||||
+ long int shared_per_thread = -1;
|
||||
long int core;
|
||||
unsigned int threads = 0;
|
||||
const struct cpu_features *cpu_features = __get_cpu_features ();
|
||||
@@ -287,22 +289,25 @@ init_cacheinfo (void)
|
||||
data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
|
||||
core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
|
||||
shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
|
||||
+ shared_per_thread = shared;
|
||||
|
||||
- get_common_cache_info (&shared, &threads, core);
|
||||
+ get_common_cache_info (&shared, &shared_per_thread, &threads, core);
|
||||
}
|
||||
else if (cpu_features->basic.kind == arch_kind_zhaoxin)
|
||||
{
|
||||
data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE);
|
||||
core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
|
||||
shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE);
|
||||
+ shared_per_thread = shared;
|
||||
|
||||
- get_common_cache_info (&shared, &threads, core);
|
||||
+ get_common_cache_info (&shared, &shared_per_thread, &threads, core);
|
||||
}
|
||||
else if (cpu_features->basic.kind == arch_kind_amd)
|
||||
{
|
||||
data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
|
||||
long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
|
||||
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
|
||||
+ shared_per_thread = shared;
|
||||
|
||||
/* Get maximum extended function. */
|
||||
__cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
|
||||
@@ -352,6 +357,9 @@ init_cacheinfo (void)
|
||||
shared += core;
|
||||
}
|
||||
}
|
||||
+
|
||||
+ if (shared_per_thread <= 0)
|
||||
+ shared_per_thread = shared;
|
||||
}
|
||||
|
||||
if (cpu_features->data_cache_size != 0)
|
||||
@@ -380,20 +388,30 @@ init_cacheinfo (void)
|
||||
__x86_shared_cache_size = shared;
|
||||
}
|
||||
|
||||
- /* The default setting for the non_temporal threshold is 3/4 of one
|
||||
- thread's share of the chip's cache. For most Intel and AMD processors
|
||||
- with an initial release date between 2017 and 2020, a thread's typical
|
||||
- share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4
|
||||
- threshold leaves 125 KBytes to 500 KBytes of the thread's data
|
||||
- in cache after a maximum temporal copy, which will maintain
|
||||
- in cache a reasonable portion of the thread's stack and other
|
||||
- active data. If the threshold is set higher than one thread's
|
||||
- share of the cache, it has a substantial risk of negatively
|
||||
- impacting the performance of other threads running on the chip. */
|
||||
+ /* The default setting for the non_temporal threshold is 1/4 of size
|
||||
+ of the chip's cache. For most Intel and AMD processors with an
|
||||
+ initial release date between 2017 and 2023, a thread's typical
|
||||
+ share of the cache is from 18-64MB. Using the 1/4 L3 is meant to
|
||||
+ estimate the point where non-temporal stores begin out-competing
|
||||
+ REP MOVSB. As well the point where the fact that non-temporal
|
||||
+ stores are forced back to main memory would already occurred to the
|
||||
+ majority of the lines in the copy. Note, concerns about the
|
||||
+ entire L3 cache being evicted by the copy are mostly alleviated
|
||||
+ by the fact that modern HW detects streaming patterns and
|
||||
+ provides proper LRU hints so that the maximum thrashing
|
||||
+ capped at 1/associativity. */
|
||||
+ unsigned long int non_temporal_threshold = shared / 4;
|
||||
+ /* If no ERMS, we use the per-thread L3 chunking. Normal cacheable stores run
|
||||
+ a higher risk of actually thrashing the cache as they don't have a HW LRU
|
||||
+ hint. As well, their performance in highly parallel situations is
|
||||
+ noticeably worse. */
|
||||
+ if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||||
+ non_temporal_threshold = shared_per_thread * 3 / 4;
|
||||
+
|
||||
__x86_shared_non_temporal_threshold
|
||||
= (cpu_features->non_temporal_threshold != 0
|
||||
? cpu_features->non_temporal_threshold
|
||||
- : __x86_shared_cache_size * 3 / 4);
|
||||
+ : non_temporal_threshold);
|
||||
|
||||
/* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */
|
||||
unsigned int minimum_rep_movsb_threshold;
|
||||
Only in b/sysdeps/x86: cacheinfo.h~
|
@ -0,0 +1,47 @@
|
||||
From 47f747217811db35854ea06741be3685e8bbd44d Mon Sep 17 00:00:00 2001
|
||||
From: Noah Goldstein <goldstein.w.n@gmail.com>
|
||||
Date: Mon, 17 Jul 2023 23:14:33 -0500
|
||||
Subject: [PATCH] x86: Fix slight bug in `shared_per_thread` cache size
|
||||
calculation.
|
||||
Content-type: text/plain; charset=UTF-8
|
||||
|
||||
After:
|
||||
```
|
||||
commit af992e7abdc9049714da76cae1e5e18bc4838fb8
|
||||
Author: Noah Goldstein <goldstein.w.n@gmail.com>
|
||||
Date: Wed Jun 7 13:18:01 2023 -0500
|
||||
|
||||
x86: Increase `non_temporal_threshold` to roughly `sizeof_L3 / 4`
|
||||
```
|
||||
|
||||
Split `shared` (cumulative cache size) from `shared_per_thread` (cache
|
||||
size per socket), the `shared_per_thread` *can* be slightly off from
|
||||
the previous calculation.
|
||||
|
||||
Previously we added `core` even if `threads_l2` was invalid, and only
|
||||
used `threads_l2` to divide `core` if it was present. The changed
|
||||
version only included `core` if `threads_l2` was valid.
|
||||
|
||||
This change restores the old behavior if `threads_l2` is invalid by
|
||||
adding the entire value of `core`.
|
||||
Reviewed-by: DJ Delorie <dj@redhat.com>
|
||||
---
|
||||
sysdeps/x86/dl-cacheinfo.h | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
[DJ - ported to C8S]
|
||||
|
||||
diff -rup b1/sysdeps/x86/cacheinfo.h b2/sysdeps/x86/cacheinfo.h
|
||||
--- b1/sysdeps/x86/cacheinfo.h 2023-08-08 13:44:55.185333601 -0400
|
||||
+++ b2/sysdeps/x86/cacheinfo.h 2023-08-08 13:55:16.474680016 -0400
|
||||
@@ -253,8 +253,8 @@ get_common_cache_info (long int *shared_
|
||||
/* Account for non-inclusive L2 and L3 caches. */
|
||||
if (!inclusive_cache)
|
||||
{
|
||||
- if (threads_l2 > 0)
|
||||
- shared_per_thread += core / threads_l2;
|
||||
+ long int core_per_thread = threads_l2 > 0 ? (core / threads_l2) : core;
|
||||
+ shared_per_thread += core_per_thread;
|
||||
shared += core;
|
||||
}
|
||||
|
@ -0,0 +1,44 @@
|
||||
From 8b9a0af8ca012217bf90d1dc0694f85b49ae09da Mon Sep 17 00:00:00 2001
|
||||
From: Noah Goldstein <goldstein.w.n@gmail.com>
|
||||
Date: Tue, 18 Jul 2023 10:27:59 -0500
|
||||
Subject: [PATCH] [PATCH v1] x86: Use `3/4*sizeof(per-thread-L3)` as low bound
|
||||
for NT threshold.
|
||||
Content-type: text/plain; charset=UTF-8
|
||||
|
||||
On some machines we end up with incomplete cache information. This can
|
||||
make the new calculation of `sizeof(total-L3)/custom-divisor` end up
|
||||
lower than intended (and lower than the prior value). So reintroduce
|
||||
the old bound as a lower bound to avoid potentially regressing code
|
||||
where we don't have complete information to make the decision.
|
||||
Reviewed-by: DJ Delorie <dj@redhat.com>
|
||||
---
|
||||
sysdeps/x86/dl-cacheinfo.h | 15 ++++++++++++---
|
||||
1 file changed, 12 insertions(+), 3 deletions(-)
|
||||
|
||||
[DJ - ported to C8S]
|
||||
|
||||
diff -rup b2/sysdeps/x86/cacheinfo.h b3/sysdeps/x86/cacheinfo.h
|
||||
--- b2/sysdeps/x86/cacheinfo.h 2023-08-08 13:55:16.474680016 -0400
|
||||
+++ b3/sysdeps/x86/cacheinfo.h 2023-08-08 13:59:14.507988958 -0400
|
||||
@@ -401,12 +401,20 @@ init_cacheinfo (void)
|
||||
provides proper LRU hints so that the maximum thrashing
|
||||
capped at 1/associativity. */
|
||||
unsigned long int non_temporal_threshold = shared / 4;
|
||||
+ /* If the computed non_temporal_threshold <= 3/4 * per-thread L3, we most
|
||||
+ likely have incorrect/incomplete cache info in which case, default to
|
||||
+ 3/4 * per-thread L3 to avoid regressions. */
|
||||
+ unsigned long int non_temporal_threshold_lowbound
|
||||
+ = shared_per_thread * 3 / 4;
|
||||
+ if (non_temporal_threshold < non_temporal_threshold_lowbound)
|
||||
+ non_temporal_threshold = non_temporal_threshold_lowbound;
|
||||
+
|
||||
/* If no ERMS, we use the per-thread L3 chunking. Normal cacheable stores run
|
||||
a higher risk of actually thrashing the cache as they don't have a HW LRU
|
||||
hint. As well, their performance in highly parallel situations is
|
||||
noticeably worse. */
|
||||
if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||||
- non_temporal_threshold = shared_per_thread * 3 / 4;
|
||||
+ non_temporal_threshold = non_temporal_threshold_lowbound;
|
||||
|
||||
__x86_shared_non_temporal_threshold
|
||||
= (cpu_features->non_temporal_threshold != 0
|
@ -0,0 +1,39 @@
|
||||
Adjusted for backport to c8s by modifying sysdeps/x86/cacheinfo.h.
|
||||
|
||||
commit 885a7f0feee951f514a121788f46f33b2867110f
|
||||
Author: Noah Goldstein <goldstein.w.n@gmail.com>
|
||||
Date: Fri Aug 11 12:29:11 2023 -0500
|
||||
|
||||
x86: Fix incorrect scope of setting `shared_per_thread` [BZ# 30745]
|
||||
|
||||
The:
|
||||
|
||||
```
|
||||
if (shared_per_thread > 0 && threads > 0)
|
||||
shared_per_thread /= threads;
|
||||
```
|
||||
|
||||
Code was accidentally moved to inside the else scope. This doesn't
|
||||
match how it was previously (before af992e7abd).
|
||||
|
||||
This patch fixes that by putting the division after the `else` block.
|
||||
|
||||
diff --git a/sysdeps/x86/cacheinfo.h b/sysdeps/x86/cacheinfo.h
|
||||
index 4dbfa979ef052eaa..e53fa25106c95253 100644
|
||||
--- a/sysdeps/x86/cacheinfo.h
|
||||
+++ b/sysdeps/x86/cacheinfo.h
|
||||
@@ -243,11 +243,10 @@ get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, u
|
||||
level. */
|
||||
threads = ((cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx >> 16)
|
||||
& 0xff);
|
||||
-
|
||||
- /* Get per-thread size of highest level cache. */
|
||||
- if (shared_per_thread > 0 && threads > 0)
|
||||
- shared_per_thread /= threads;
|
||||
}
|
||||
+ /* Get per-thread size of highest level cache. */
|
||||
+ if (shared_per_thread > 0 && threads > 0)
|
||||
+ shared_per_thread /= threads;
|
||||
}
|
||||
|
||||
/* Account for non-inclusive L2 and L3 caches. */
|
@ -0,0 +1,35 @@
|
||||
commit 5d1ccdda7b0c625751661d50977f3dfbc73f8eae
|
||||
Author: Florian Weimer <fweimer@redhat.com>
|
||||
Date: Mon Apr 3 17:23:11 2023 +0200
|
||||
|
||||
x86_64: Fix asm constraints in feraiseexcept (bug 30305)
|
||||
|
||||
The divss instruction clobbers its first argument, and the constraints
|
||||
need to reflect that. Fortunately, with GCC 12, generated code does
|
||||
not actually change, so there is no externally visible bug.
|
||||
|
||||
Suggested-by: Jakub Jelinek <jakub@redhat.com>
|
||||
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
|
||||
|
||||
diff --git a/sysdeps/x86_64/fpu/fraiseexcpt.c b/sysdeps/x86_64/fpu/fraiseexcpt.c
|
||||
index ca1c223053bf016b..fb886ed540b52100 100644
|
||||
--- a/sysdeps/x86_64/fpu/fraiseexcpt.c
|
||||
+++ b/sysdeps/x86_64/fpu/fraiseexcpt.c
|
||||
@@ -33,7 +33,7 @@ __feraiseexcept (int excepts)
|
||||
/* One example of an invalid operation is 0.0 / 0.0. */
|
||||
float f = 0.0;
|
||||
|
||||
- __asm__ __volatile__ ("divss %0, %0 " : : "x" (f));
|
||||
+ __asm__ __volatile__ ("divss %0, %0 " : "+x" (f));
|
||||
(void) &f;
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ __feraiseexcept (int excepts)
|
||||
float f = 1.0;
|
||||
float g = 0.0;
|
||||
|
||||
- __asm__ __volatile__ ("divss %1, %0" : : "x" (f), "x" (g));
|
||||
+ __asm__ __volatile__ ("divss %1, %0" : "+x" (f) : "x" (g));
|
||||
(void) &f;
|
||||
}
|
||||
|
@ -0,0 +1,28 @@
|
||||
Apply a fix similar to upstream commit 5d1ccdda7b0c625751661d50977f3dfbc73f8eae
|
||||
to the installed header file. Upstream, the header file has been removed
|
||||
in its uncorrected state, so there is no upstream fix to backport.
|
||||
|
||||
Suggested by Jakub Jelinek.
|
||||
|
||||
diff --git a/sysdeps/x86/fpu/bits/fenv.h b/sysdeps/x86/fpu/bits/fenv.h
|
||||
index 4103982d8c8ae014..4ae2d2a04c6754bd 100644
|
||||
--- a/sysdeps/x86/fpu/bits/fenv.h
|
||||
+++ b/sysdeps/x86/fpu/bits/fenv.h
|
||||
@@ -132,7 +132,7 @@ __NTH (__feraiseexcept_invalid_divbyzero (int __excepts))
|
||||
float __f = 0.0;
|
||||
|
||||
# ifdef __SSE_MATH__
|
||||
- __asm__ __volatile__ ("divss %0, %0 " : : "x" (__f));
|
||||
+ __asm__ __volatile__ ("divss %0, %0 " : "+x" (__f));
|
||||
# else
|
||||
__asm__ __volatile__ ("fdiv %%st, %%st(0); fwait"
|
||||
: "=t" (__f) : "0" (__f));
|
||||
@@ -145,7 +145,7 @@ __NTH (__feraiseexcept_invalid_divbyzero (int __excepts))
|
||||
float __g = 0.0;
|
||||
|
||||
# ifdef __SSE_MATH__
|
||||
- __asm__ __volatile__ ("divss %1, %0" : : "x" (__f), "x" (__g));
|
||||
+ __asm__ __volatile__ ("divss %1, %0" : "+x" (__f) : "x" (__g));
|
||||
# else
|
||||
__asm__ __volatile__ ("fdivp %%st, %%st(1); fwait"
|
||||
: "=t" (__f) : "0" (__f), "u" (__g) : "st(1)");
|
@ -0,0 +1,46 @@
|
||||
Only backport po/it.po and po/ja.po changes for the ESTALE message
|
||||
translation which we use during CI testing.
|
||||
|
||||
commit 7ff33eca6860648fb909df954da4996ce853d01d
|
||||
Author: Carlos O'Donell <carlos@redhat.com>
|
||||
Date: Fri Jul 7 11:27:08 2023 -0400
|
||||
|
||||
Translations: Add new ro support and update others.
|
||||
|
||||
This brings in the new Romanian language translations, and updates
|
||||
nine other translations. Important translations in this update
|
||||
include the Italian and Japanese translations for ESTALE which
|
||||
remove the mention of "NFS" from the error message translation.
|
||||
|
||||
diff --git a/po/it.po b/po/it.po
|
||||
index 2750575a1082f1db..6c2be3a4df5611ff 100644
|
||||
--- a/po/it.po
|
||||
+++ b/po/it.po
|
||||
@@ -6793,10 +6793,8 @@ msgstr "Quota disco superata"
|
||||
#. TRANS Repairing this condition usually requires unmounting, possibly repairing
|
||||
#. TRANS and remounting the file system.
|
||||
#: sysdeps/gnu/errlist.c:788
|
||||
-#, fuzzy
|
||||
-#| msgid "Stale NFS file handle"
|
||||
msgid "Stale file handle"
|
||||
-msgstr "Gestione del file NFS interrotta"
|
||||
+msgstr "Riferimento al file obsoleto"
|
||||
|
||||
# lf
|
||||
#. TRANS An attempt was made to NFS-mount a remote file system with a file name that
|
||||
diff --git a/po/ja.po b/po/ja.po
|
||||
index bd9b7ffbbd3e3bf6..8fb598c5edbc5891 100644
|
||||
--- a/po/ja.po
|
||||
+++ b/po/ja.po
|
||||
@@ -6360,10 +6360,8 @@ msgstr "ディスク使用量制限を超過しました"
|
||||
#. TRANS Repairing this condition usually requires unmounting, possibly repairing
|
||||
#. TRANS and remounting the file system.
|
||||
#: sysdeps/gnu/errlist.c:788
|
||||
-#, fuzzy
|
||||
-#| msgid "Stale NFS file handle"
|
||||
msgid "Stale file handle"
|
||||
-msgstr "実効性のないNFSファイルハンドルです"
|
||||
+msgstr "古いファイルハンドルです"
|
||||
|
||||
#. TRANS An attempt was made to NFS-mount a remote file system with a file name that
|
||||
#. TRANS already specifies an NFS-mounted file.
|
@ -0,0 +1,26 @@
|
||||
From abcf8db7fa46b73fd5b8193ce11f9312301b84c7 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Schwab <schwab@suse.de>
|
||||
Date: Wed, 7 Jun 2023 11:21:48 +0200
|
||||
Subject: resolv_conf: release lock on allocation failure (bug 30527)
|
||||
|
||||
When the initial allocation of global fails, the local lock is left
|
||||
locked.
|
||||
|
||||
Reported by Steffen Lammel of SAP HANA development.
|
||||
|
||||
diff --git a/resolv/resolv_conf.c b/resolv/resolv_conf.c
|
||||
index bd5890773b..8bc9edc634 100644
|
||||
--- a/resolv/resolv_conf.c
|
||||
+++ b/resolv/resolv_conf.c
|
||||
@@ -93,7 +93,10 @@ get_locked_global (void)
|
||||
{
|
||||
global_copy = calloc (1, sizeof (*global));
|
||||
if (global_copy == NULL)
|
||||
- return NULL;
|
||||
+ {
|
||||
+ __libc_lock_unlock (lock);
|
||||
+ return NULL;
|
||||
+ }
|
||||
atomic_store_relaxed (&global, global_copy);
|
||||
resolv_conf_array_init (&global_copy->array);
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
commit 0fda2a41baf7e978d07322aa278e964f4dce8802
|
||||
Author: Florian Weimer <fweimer@redhat.com>
|
||||
Date: Thu Jul 20 18:31:48 2023 +0200
|
||||
|
||||
debug: Mark libSegFault.so as NODELETE
|
||||
|
||||
The signal handler installed in the ELF constructor cannot easily
|
||||
be removed again (because the program may have changed handlers
|
||||
in the meantime). Mark the object as NODELETE so that the registered
|
||||
handler function is never unloaded.
|
||||
|
||||
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
|
||||
(cherry picked from commit 23ee92deea4c99d0e6a5f48fa7b942909b123ec5)
|
||||
|
||||
diff --git a/debug/Makefile b/debug/Makefile
|
||||
index b0f0b7beb6d5cef5..8bce89ddcd0a61ed 100644
|
||||
--- a/debug/Makefile
|
||||
+++ b/debug/Makefile
|
||||
@@ -213,6 +213,8 @@ extra-libs-others = $(extra-libs)
|
||||
|
||||
libSegFault-routines = segfault
|
||||
libSegFault-inhibit-o = $(filter-out .os,$(object-suffixes))
|
||||
+# libSegFault.so installs a signal handler in its ELF constructor.
|
||||
+LDFLAGS-SegFault.so = -Wl,--enable-new-dtags,-z,nodelete
|
||||
|
||||
libpcprofile-routines = pcprofile
|
||||
libpcprofile-inhibit-o = $(filter-out .os,$(object-suffixes))
|
Loading…
Reference in new issue