import glibc-2.34-82.el9

c9-beta imports/c9-beta/glibc-2.34-82.el9
MSVSphere Packaging Team 1 year ago
parent e763638f91
commit f4a03fdce9

2
.gitignore vendored

@ -1 +1,3 @@
SOURCES/glibc-2.34.tar.xz
SOURCES/glibc-c-utf8-locale-2.patch
SOURCES/glibc-upstream-2.34-373.patch

@ -1 +1,3 @@
7c3b8890a6346793b6334cc5f2fea5d437d307b8 SOURCES/glibc-2.34.tar.xz
47cf1a27ae2e86b37e44c49f6bf4630a1adabd9a SOURCES/glibc-c-utf8-locale-2.patch
6022f103e5596ad229f22bc966327d71208f7016 SOURCES/glibc-upstream-2.34-373.patch

@ -0,0 +1,54 @@
Only backport po/it.po and po/ja.po changes for the ESTALE message
translation which we use during CI testing.
commit 7ff33eca6860648fb909df954da4996ce853d01d
Author: Carlos O'Donell <carlos@redhat.com>
Date: Fri Jul 7 11:27:08 2023 -0400
Translations: Add new ro support and update others.
This brings in the new Romanian language translations, and updates
nine other translations. Important translations in this update
include the Italian and Japanese translations for ESTALE which
remove the mention of "NFS" from the error message translation.
diff --git a/po/it.po b/po/it.po
index abd762b6e383008b..4c62751a92b15e64 100644
--- a/po/it.po
+++ b/po/it.po
@@ -5692,6 +5692,15 @@ msgstr "Troppi utenti"
msgid "Disk quota exceeded"
msgstr "Quota disco superata"
+#. TRANS This indicates an internal confusion in the
+#. TRANS file system which is due to file system rearrangements on the server host
+#. TRANS for NFS file systems or corruption in other file systems.
+#. TRANS Repairing this condition usually requires unmounting, possibly repairing
+#. TRANS and remounting the file system.
+#: sysdeps/gnu/errlist.h:471
+msgid "Stale file handle"
+msgstr "Riferimento al file obsoleto"
+
# lf
#. TRANS An attempt was made to NFS-mount a remote file system with a file name that
#. TRANS already specifies an NFS-mounted file.
diff --git a/po/ja.po b/po/ja.po
index 87ceb0abb22507b0..07d90ba0205f7065 100644
--- a/po/ja.po
+++ b/po/ja.po
@@ -5279,6 +5279,15 @@ msgstr "ユーザが多すぎます"
msgid "Disk quota exceeded"
msgstr "ディスク使用量制限を超過しました"
+#. TRANS This indicates an internal confusion in the
+#. TRANS file system which is due to file system rearrangements on the server host
+#. TRANS for NFS file systems or corruption in other file systems.
+#. TRANS Repairing this condition usually requires unmounting, possibly repairing
+#. TRANS and remounting the file system.
+#: sysdeps/gnu/errlist.h:471
+msgid "Stale file handle"
+msgstr "古いファイルハンドルです"
+
#. TRANS An attempt was made to NFS-mount a remote file system with a file name that
#. TRANS already specifies an NFS-mounted file.
#. TRANS (This is an error on some operating systems, but we expect it to work

File diff suppressed because it is too large Load Diff

@ -0,0 +1,23 @@
From 61a4425dd412701f6c3545d8c1acd7e1a378ee8b Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Wed, 26 Jan 2022 12:18:21 -0800
Subject: x86: Don't check PTWRITE in tst-cpu-features-cpuinfo.c
Don't check PTWRITE against /proc/cpuinfo since kernel doesn't report
PTWRITE in /proc/cpuinfo.
diff --git a/sysdeps/x86/tst-cpu-features-cpuinfo.c b/sysdeps/x86/tst-cpu-features-cpuinfo.c
index 91a656b61d..981b2be5fd 100644
--- a/sysdeps/x86/tst-cpu-features-cpuinfo.c
+++ b/sysdeps/x86/tst-cpu-features-cpuinfo.c
@@ -218,7 +218,10 @@ do_test (int argc, char **argv)
fails += CHECK_PROC (popcnt, POPCNT);
fails += CHECK_PROC (3dnowprefetch, PREFETCHW);
fails += CHECK_PROC (prefetchwt1, PREFETCHWT1);
+#if 0
+ /* NB: /proc/cpuinfo doesn't report this feature. */
fails += CHECK_PROC (ptwrite, PTWRITE);
+#endif
fails += CHECK_PROC (pse, PSE);
fails += CHECK_PROC (pse36, PSE_36);
fails += CHECK_PROC (psn, PSN);

@ -0,0 +1,25 @@
From db9b47e9f996bbdb831580ff7343542a017c80ee Mon Sep 17 00:00:00 2001
From: DJ Delorie <dj@redhat.com>
Date: Thu, 9 Mar 2023 22:32:54 -0500
Subject: x86: Don't check PREFETCHWT1 in tst-cpu-features-cpuinfo.c
Don't check PREFETCHWT1 against /proc/cpuinfo since kernel doesn't report
PREFETCHWT1 in /proc/cpuinfo.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
diff --git a/sysdeps/x86/tst-cpu-features-cpuinfo.c b/sysdeps/x86/tst-cpu-features-cpuinfo.c
index c25240774e..e963592c4b 100644
--- a/sysdeps/x86/tst-cpu-features-cpuinfo.c
+++ b/sysdeps/x86/tst-cpu-features-cpuinfo.c
@@ -217,7 +217,10 @@ do_test (int argc, char **argv)
fails += CHECK_PROC (pku, PKU);
fails += CHECK_PROC (popcnt, POPCNT);
fails += CHECK_PROC (3dnowprefetch, PREFETCHW);
+#if 0
+ /* NB: /proc/cpuinfo doesn't report this feature. */
fails += CHECK_PROC (prefetchwt1, PREFETCHWT1);
+#endif
#if 0
/* NB: /proc/cpuinfo doesn't report this feature. */
fails += CHECK_PROC (ptwrite, PTWRITE);

@ -0,0 +1,21 @@
From 6229aa74fb47af17744d765cc49dbf94d3cefe12 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Wed, 26 Jan 2022 12:18:30 -0800
Subject: x86: Use CHECK_FEATURE_PRESENT on PCONFIG
PCONFIG is a privileged instruction. Use CHECK_FEATURE_PRESENT, instead
of CHECK_FEATURE_ACTIVE, on PCONFIG in tst-cpu-features-supports.c.
diff --git a/sysdeps/x86/tst-cpu-features-supports.c b/sysdeps/x86/tst-cpu-features-supports.c
index 9f10f02954..11065b642f 100644
--- a/sysdeps/x86/tst-cpu-features-supports.c
+++ b/sysdeps/x86/tst-cpu-features-supports.c
@@ -143,7 +143,7 @@ do_test (int argc, char **argv)
fails += CHECK_FEATURE_ACTIVE (movdiri, MOVDIRI);
fails += CHECK_FEATURE_ACTIVE (movdir64b, MOVDIR64B);
fails += CHECK_FEATURE_ACTIVE (osxsave, OSXSAVE);
- fails += CHECK_FEATURE_ACTIVE (pconfig, PCONFIG);
+ fails += CHECK_FEATURE_PRESENT (pconfig, PCONFIG);
fails += CHECK_FEATURE_ACTIVE (pku, PKU);
#endif
fails += CHECK_FEATURE_ACTIVE (popcnt, POPCNT);

@ -0,0 +1,111 @@
commit 856bab7717ef6d1033fd7cbf7cfb2ddefbfffb07
Author: Andreas Schwab <schwab@suse.de>
Date: Thu Feb 9 14:56:21 2023 +0100
x86/dl-cacheinfo: remove unsused parameter from handle_amd
Also replace an unreachable assert with __builtin_unreachable.
Conflicts:
sysdeps/x86/dl-cacheinfo.h
(different backport order downstream)
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index 3408700fc0b06e5b..cc2f8862ce88f655 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -311,7 +311,7 @@ handle_intel (int name, const struct cpu_features *cpu_features)
static long int __attribute__ ((noinline))
-handle_amd (int name, const struct cpu_features *cpu_features)
+handle_amd (int name)
{
unsigned int eax;
unsigned int ebx;
@@ -334,24 +334,23 @@ handle_amd (int name, const struct cpu_features *cpu_features)
switch (name)
{
- case _SC_LEVEL1_ICACHE_ASSOC:
- case _SC_LEVEL1_DCACHE_ASSOC:
- case _SC_LEVEL2_CACHE_ASSOC:
- case _SC_LEVEL3_CACHE_ASSOC:
- return ecx?((ebx >> 22) & 0x3ff) + 1 : 0;
- case _SC_LEVEL1_ICACHE_LINESIZE:
- case _SC_LEVEL1_DCACHE_LINESIZE:
- case _SC_LEVEL2_CACHE_LINESIZE:
- case _SC_LEVEL3_CACHE_LINESIZE:
- return ecx?(ebx & 0xfff) + 1 : 0;
- case _SC_LEVEL1_ICACHE_SIZE:
- case _SC_LEVEL1_DCACHE_SIZE:
- case _SC_LEVEL2_CACHE_SIZE:
- case _SC_LEVEL3_CACHE_SIZE:
- return ecx?(((ebx >> 22) & 0x3ff) + 1)*((ebx & 0xfff) + 1)\
- *(ecx + 1):0;
- default:
- assert (! "cannot happen");
+ case _SC_LEVEL1_ICACHE_ASSOC:
+ case _SC_LEVEL1_DCACHE_ASSOC:
+ case _SC_LEVEL2_CACHE_ASSOC:
+ case _SC_LEVEL3_CACHE_ASSOC:
+ return ecx ? ((ebx >> 22) & 0x3ff) + 1 : 0;
+ case _SC_LEVEL1_ICACHE_LINESIZE:
+ case _SC_LEVEL1_DCACHE_LINESIZE:
+ case _SC_LEVEL2_CACHE_LINESIZE:
+ case _SC_LEVEL3_CACHE_LINESIZE:
+ return ecx ? (ebx & 0xfff) + 1 : 0;
+ case _SC_LEVEL1_ICACHE_SIZE:
+ case _SC_LEVEL1_DCACHE_SIZE:
+ case _SC_LEVEL2_CACHE_SIZE:
+ case _SC_LEVEL3_CACHE_SIZE:
+ return ecx ? (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1): 0;
+ default:
+ __builtin_unreachable ();
}
return -1;
}
@@ -701,31 +700,26 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
}
else if (cpu_features->basic.kind == arch_kind_amd)
{
- data = handle_amd (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
- core = handle_amd (_SC_LEVEL2_CACHE_SIZE, cpu_features);
- shared = handle_amd (_SC_LEVEL3_CACHE_SIZE, cpu_features);
+ data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
+ core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
+ shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
shared_per_thread = shared;
- level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE, cpu_features);
- level1_icache_linesize
- = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE, cpu_features);
+ level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
+ level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE);
level1_dcache_size = data;
- level1_dcache_assoc
- = handle_amd (_SC_LEVEL1_DCACHE_ASSOC, cpu_features);
- level1_dcache_linesize
- = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features);
+ level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC);
+ level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE);
level2_cache_size = core;
- level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC, cpu_features);
- level2_cache_linesize
- = handle_amd (_SC_LEVEL2_CACHE_LINESIZE, cpu_features);
+ level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC);
+ level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE);
level3_cache_size = shared;
- level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC, cpu_features);
- level3_cache_linesize
- = handle_amd (_SC_LEVEL3_CACHE_LINESIZE, cpu_features);
+ level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC);
+ level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE);
if (shared <= 0)
/* No shared L3 cache. All we have is the L2 cache. */
- shared = core;
+ shared = core;
if (shared_per_thread <= 0)
shared_per_thread = shared;

@ -0,0 +1,281 @@
commit dcad5c8578130dec7f35fd5b0885304b59f9f543
Author: Sajan Karumanchi <sajan.karumanchi@amd.com>
Date: Tue Aug 1 15:20:55 2023 +0000
x86: Fix for cache computation on AMD legacy cpus.
Some legacy AMD CPUs and hypervisors have the _cpuid_ '0x8000_001D'
set to Zero, thus resulting in zeroed-out computed cache values.
This patch reintroduces the old way of cache computation as a
fail-safe option to handle these exceptions.
Fixed 'level4_cache_size' value through handle_amd().
Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>
Tested-by: Florian Weimer <fweimer@redhat.com>
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index cc2f8862ce88f655..aed1a7be56610e99 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -315,40 +315,206 @@ handle_amd (int name)
{
unsigned int eax;
unsigned int ebx;
- unsigned int ecx;
+ unsigned int ecx = 0;
unsigned int edx;
- unsigned int count = 0x1;
+ unsigned int max_cpuid = 0;
+ unsigned int fn = 0;
/* No level 4 cache (yet). */
if (name > _SC_LEVEL3_CACHE_LINESIZE)
return 0;
- if (name >= _SC_LEVEL3_CACHE_SIZE)
- count = 0x3;
- else if (name >= _SC_LEVEL2_CACHE_SIZE)
- count = 0x2;
- else if (name >= _SC_LEVEL1_DCACHE_SIZE)
- count = 0x0;
+ __cpuid (0x80000000, max_cpuid, ebx, ecx, edx);
+
+ if (max_cpuid >= 0x8000001D)
+ /* Use __cpuid__ '0x8000_001D' to compute cache details. */
+ {
+ unsigned int count = 0x1;
+
+ if (name >= _SC_LEVEL3_CACHE_SIZE)
+ count = 0x3;
+ else if (name >= _SC_LEVEL2_CACHE_SIZE)
+ count = 0x2;
+ else if (name >= _SC_LEVEL1_DCACHE_SIZE)
+ count = 0x0;
+
+ __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
+
+ if (ecx != 0)
+ {
+ switch (name)
+ {
+ case _SC_LEVEL1_ICACHE_ASSOC:
+ case _SC_LEVEL1_DCACHE_ASSOC:
+ case _SC_LEVEL2_CACHE_ASSOC:
+ case _SC_LEVEL3_CACHE_ASSOC:
+ return ((ebx >> 22) & 0x3ff) + 1;
+ case _SC_LEVEL1_ICACHE_LINESIZE:
+ case _SC_LEVEL1_DCACHE_LINESIZE:
+ case _SC_LEVEL2_CACHE_LINESIZE:
+ case _SC_LEVEL3_CACHE_LINESIZE:
+ return (ebx & 0xfff) + 1;
+ case _SC_LEVEL1_ICACHE_SIZE:
+ case _SC_LEVEL1_DCACHE_SIZE:
+ case _SC_LEVEL2_CACHE_SIZE:
+ case _SC_LEVEL3_CACHE_SIZE:
+ return (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1);
+ default:
+ __builtin_unreachable ();
+ }
+ return -1;
+ }
+ }
+
+ /* Legacy cache computation for CPUs prior to Bulldozer family.
+ This is also a fail-safe mechanism for some hypervisors that
+ accidentally configure __cpuid__ '0x8000_001D' to Zero. */
- __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
+ fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
+
+ if (max_cpuid < fn)
+ return 0;
+
+ __cpuid (fn, eax, ebx, ecx, edx);
+
+ if (name < _SC_LEVEL1_DCACHE_SIZE)
+ {
+ name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
+ ecx = edx;
+ }
switch (name)
{
- case _SC_LEVEL1_ICACHE_ASSOC:
- case _SC_LEVEL1_DCACHE_ASSOC:
- case _SC_LEVEL2_CACHE_ASSOC:
+ case _SC_LEVEL1_DCACHE_SIZE:
+ return (ecx >> 14) & 0x3fc00;
+
+ case _SC_LEVEL1_DCACHE_ASSOC:
+ ecx >>= 16;
+ if ((ecx & 0xff) == 0xff)
+ {
+ /* Fully associative. */
+ return (ecx << 2) & 0x3fc00;
+ }
+ return ecx & 0xff;
+
+ case _SC_LEVEL1_DCACHE_LINESIZE:
+ return ecx & 0xff;
+
+ case _SC_LEVEL2_CACHE_SIZE:
+ return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
+
+ case _SC_LEVEL2_CACHE_ASSOC:
+ switch ((ecx >> 12) & 0xf)
+ {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ return (ecx >> 12) & 0xf;
+ case 6:
+ return 8;
+ case 8:
+ return 16;
+ case 10:
+ return 32;
+ case 11:
+ return 48;
+ case 12:
+ return 64;
+ case 13:
+ return 96;
+ case 14:
+ return 128;
+ case 15:
+ return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
+ default:
+ return 0;
+ }
+
+ case _SC_LEVEL2_CACHE_LINESIZE:
+ return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
+
+ case _SC_LEVEL3_CACHE_SIZE:
+ {
+ long int total_l3_cache = 0, l3_cache_per_thread = 0;
+ unsigned int threads = 0;
+ const struct cpu_features *cpu_features;
+
+ if ((edx & 0xf000) == 0)
+ return 0;
+
+ total_l3_cache = (edx & 0x3ffc0000) << 1;
+ cpu_features = __get_cpu_features ();
+
+ /* Figure out the number of logical threads that share L3. */
+ if (max_cpuid >= 0x80000008)
+ {
+ /* Get width of APIC ID. */
+ __cpuid (0x80000008, eax, ebx, ecx, edx);
+ threads = (ecx & 0xff) + 1;
+ }
+
+ if (threads == 0)
+ {
+ /* If APIC ID width is not available, use logical
+ processor count. */
+ __cpuid (0x00000001, eax, ebx, ecx, edx);
+ if ((edx & (1 << 28)) != 0)
+ threads = (ebx >> 16) & 0xff;
+ }
+
+ /* Cap usage of highest cache level to the number of
+ supported threads. */
+ if (threads > 0)
+ l3_cache_per_thread = total_l3_cache/threads;
+
+ /* Get shared cache per ccx for Zen architectures. */
+ if (cpu_features->basic.family >= 0x17)
+ {
+ long int l3_cache_per_ccx = 0;
+ /* Get number of threads share the L3 cache in CCX. */
+ __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
+ unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
+ l3_cache_per_ccx = l3_cache_per_thread * threads_per_ccx;
+ return l3_cache_per_ccx;
+ }
+ else
+ {
+ return l3_cache_per_thread;
+ }
+ }
+
case _SC_LEVEL3_CACHE_ASSOC:
- return ecx ? ((ebx >> 22) & 0x3ff) + 1 : 0;
- case _SC_LEVEL1_ICACHE_LINESIZE:
- case _SC_LEVEL1_DCACHE_LINESIZE:
- case _SC_LEVEL2_CACHE_LINESIZE:
+ switch ((edx >> 12) & 0xf)
+ {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ return (edx >> 12) & 0xf;
+ case 6:
+ return 8;
+ case 8:
+ return 16;
+ case 10:
+ return 32;
+ case 11:
+ return 48;
+ case 12:
+ return 64;
+ case 13:
+ return 96;
+ case 14:
+ return 128;
+ case 15:
+ return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
+ default:
+ return 0;
+ }
+
case _SC_LEVEL3_CACHE_LINESIZE:
- return ecx ? (ebx & 0xfff) + 1 : 0;
- case _SC_LEVEL1_ICACHE_SIZE:
- case _SC_LEVEL1_DCACHE_SIZE:
- case _SC_LEVEL2_CACHE_SIZE:
- case _SC_LEVEL3_CACHE_SIZE:
- return ecx ? (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1): 0;
+ return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
+
default:
__builtin_unreachable ();
}
@@ -703,7 +869,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
- shared_per_thread = shared;
level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE);
@@ -716,13 +881,20 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
level3_cache_size = shared;
level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC);
level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE);
+ level4_cache_size = handle_amd (_SC_LEVEL4_CACHE_SIZE);
if (shared <= 0)
- /* No shared L3 cache. All we have is the L2 cache. */
- shared = core;
+ {
+ /* No shared L3 cache. All we have is the L2 cache. */
+ shared = core;
+ }
+ else if (cpu_features->basic.family < 0x17)
+ {
+ /* Account for exclusive L2 and L3 caches. */
+ shared += core;
+ }
- if (shared_per_thread <= 0)
- shared_per_thread = shared;
+ shared_per_thread = shared;
}
cpu_features->level1_icache_size = level1_icache_size;

@ -0,0 +1,261 @@
commit 103a469dc7755fd9e8ccf362f3dd4c55dc761908
Author: Sajan Karumanchi <sajan.karumanchi@amd.com>
Date: Wed Jan 18 18:29:04 2023 +0100
x86: Cache computation for AMD architecture.
All AMD architectures cache details will be computed based on
__cpuid__ `0x8000_001D` and the reference to __cpuid__ `0x8000_0006` will be
zeroed out for future architectures.
Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index 8f85f70858413ebe..a7d2cc5fef03884b 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -311,117 +311,47 @@ handle_intel (int name, const struct cpu_features *cpu_features)
static long int __attribute__ ((noinline))
-handle_amd (int name)
+handle_amd (int name, const struct cpu_features *cpu_features)
{
unsigned int eax;
unsigned int ebx;
unsigned int ecx;
unsigned int edx;
- __cpuid (0x80000000, eax, ebx, ecx, edx);
+ unsigned int count = 0x1;
/* No level 4 cache (yet). */
if (name > _SC_LEVEL3_CACHE_LINESIZE)
return 0;
- unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
- if (eax < fn)
- return 0;
-
- __cpuid (fn, eax, ebx, ecx, edx);
+ if (name >= _SC_LEVEL3_CACHE_SIZE)
+ count = 0x3;
+ else if (name >= _SC_LEVEL2_CACHE_SIZE)
+ count = 0x2;
+ else if (name >= _SC_LEVEL1_DCACHE_SIZE)
+ count = 0x0;
- if (name < _SC_LEVEL1_DCACHE_SIZE)
- {
- name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
- ecx = edx;
- }
+ __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
switch (name)
{
- case _SC_LEVEL1_DCACHE_SIZE:
- return (ecx >> 14) & 0x3fc00;
-
- case _SC_LEVEL1_DCACHE_ASSOC:
- ecx >>= 16;
- if ((ecx & 0xff) == 0xff)
- /* Fully associative. */
- return (ecx << 2) & 0x3fc00;
- return ecx & 0xff;
-
- case _SC_LEVEL1_DCACHE_LINESIZE:
- return ecx & 0xff;
-
- case _SC_LEVEL2_CACHE_SIZE:
- return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
-
- case _SC_LEVEL2_CACHE_ASSOC:
- switch ((ecx >> 12) & 0xf)
- {
- case 0:
- case 1:
- case 2:
- case 4:
- return (ecx >> 12) & 0xf;
- case 6:
- return 8;
- case 8:
- return 16;
- case 10:
- return 32;
- case 11:
- return 48;
- case 12:
- return 64;
- case 13:
- return 96;
- case 14:
- return 128;
- case 15:
- return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
- default:
- return 0;
- }
- /* NOTREACHED */
-
- case _SC_LEVEL2_CACHE_LINESIZE:
- return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
-
- case _SC_LEVEL3_CACHE_SIZE:
- return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
-
- case _SC_LEVEL3_CACHE_ASSOC:
- switch ((edx >> 12) & 0xf)
- {
- case 0:
- case 1:
- case 2:
- case 4:
- return (edx >> 12) & 0xf;
- case 6:
- return 8;
- case 8:
- return 16;
- case 10:
- return 32;
- case 11:
- return 48;
- case 12:
- return 64;
- case 13:
- return 96;
- case 14:
- return 128;
- case 15:
- return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
- default:
- return 0;
- }
- /* NOTREACHED */
-
- case _SC_LEVEL3_CACHE_LINESIZE:
- return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
-
- default:
- assert (! "cannot happen");
+ case _SC_LEVEL1_ICACHE_ASSOC:
+ case _SC_LEVEL1_DCACHE_ASSOC:
+ case _SC_LEVEL2_CACHE_ASSOC:
+ case _SC_LEVEL3_CACHE_ASSOC:
+ return ecx?((ebx >> 22) & 0x3ff) + 1 : 0;
+ case _SC_LEVEL1_ICACHE_LINESIZE:
+ case _SC_LEVEL1_DCACHE_LINESIZE:
+ case _SC_LEVEL2_CACHE_LINESIZE:
+ case _SC_LEVEL3_CACHE_LINESIZE:
+ return ecx?(ebx & 0xfff) + 1 : 0;
+ case _SC_LEVEL1_ICACHE_SIZE:
+ case _SC_LEVEL1_DCACHE_SIZE:
+ case _SC_LEVEL2_CACHE_SIZE:
+ case _SC_LEVEL3_CACHE_SIZE:
+ return ecx?(((ebx >> 22) & 0x3ff) + 1)*((ebx & 0xfff) + 1)\
+ *(ecx + 1):0;
+ default:
+ assert (! "cannot happen");
}
return -1;
}
@@ -698,10 +628,6 @@ static void
dl_init_cacheinfo (struct cpu_features *cpu_features)
{
/* Find out what brand of processor. */
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- int max_cpuid_ex;
long int data = -1;
long int shared = -1;
long int core = -1;
@@ -771,70 +697,30 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
}
else if (cpu_features->basic.kind == arch_kind_amd)
{
- data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
- core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
- shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
+ data = handle_amd (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
+ core = handle_amd (_SC_LEVEL2_CACHE_SIZE, cpu_features);
+ shared = handle_amd (_SC_LEVEL3_CACHE_SIZE, cpu_features);
- level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
- level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE);
+ level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE, cpu_features);
+ level1_icache_linesize
+ = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE, cpu_features);
level1_dcache_size = data;
- level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC);
- level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE);
+ level1_dcache_assoc
+ = handle_amd (_SC_LEVEL1_DCACHE_ASSOC, cpu_features);
+ level1_dcache_linesize
+ = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features);
level2_cache_size = core;
- level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC);
- level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE);
+ level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC, cpu_features);
+ level2_cache_linesize
+ = handle_amd (_SC_LEVEL2_CACHE_LINESIZE, cpu_features);
level3_cache_size = shared;
- level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC);
- level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE);
-
- /* Get maximum extended function. */
- __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
+ level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC, cpu_features);
+ level3_cache_linesize
+ = handle_amd (_SC_LEVEL3_CACHE_LINESIZE, cpu_features);
if (shared <= 0)
- /* No shared L3 cache. All we have is the L2 cache. */
- shared = core;
- else
- {
- /* Figure out the number of logical threads that share L3. */
- if (max_cpuid_ex >= 0x80000008)
- {
- /* Get width of APIC ID. */
- __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
- threads = 1 << ((ecx >> 12) & 0x0f);
- }
-
- if (threads == 0 || cpu_features->basic.family >= 0x17)
- {
- /* If APIC ID width is not available, use logical
- processor count. */
- __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
-
- if ((edx & (1 << 28)) != 0)
- threads = (ebx >> 16) & 0xff;
- }
-
- /* Cap usage of highest cache level to the number of
- supported threads. */
- if (threads > 0)
- shared /= threads;
-
- /* Get shared cache per ccx for Zen architectures. */
- if (cpu_features->basic.family >= 0x17)
- {
- unsigned int eax;
-
- /* Get number of threads share the L3 cache in CCX. */
- __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
-
- unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
- shared *= threads_per_ccx;
- }
- else
- {
- /* Account for exclusive L2 and L3 caches. */
- shared += core;
- }
- }
+ /* No shared L3 cache. All we have is the L2 cache. */
+ shared = core;
}
cpu_features->level1_icache_size = level1_icache_size;

@ -0,0 +1,200 @@
glibc-rh2169978-2.patch already changes _rtld_global_ro@GLIBC_PRIVATE
layout, so we can include this change alongside it. This will allow us
to include new hwcaps and platform names without changing internal ABI.
commit 5ecc98241229d494aaad23a4a3fe106fe11e1f40
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Aug 25 16:34:20 2022 +0200
s390: Move hwcaps/platform names out of _rtld_global_ro
Changes to these arrays are often backported to stable releases,
but additions to these arrays shift the offsets of the following
_rltd_global_ro members, thus breaking the GLIBC_PRIVATE ABI.
Obviously, this change is itself an internal ABI break, but at least
it will avoid further ABI breaks going forward.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/sysdeps/s390/Makefile b/sysdeps/s390/Makefile
index ade8663218c30ab2..80e88557c9924ff0 100644
--- a/sysdeps/s390/Makefile
+++ b/sysdeps/s390/Makefile
@@ -43,6 +43,8 @@ $(modpfx)gconv-modules-s390.conf: ../sysdeps/s390/gconv-modules-s390.conf \
endif
ifeq ($(subdir),elf)
+sysdep-dl-routines += dl-procinfo-s390
+
ifeq ($(build-shared),yes)
tests += tst-dl-runtime-resolve-noaudit tst-dl-runtime-resolve-audit \
tst-dl-runtime-profile-noaudit
diff --git a/sysdeps/s390/dl-procinfo-s390.c b/sysdeps/s390/dl-procinfo-s390.c
new file mode 100644
index 0000000000000000..559f3827936cd017
--- /dev/null
+++ b/sysdeps/s390/dl-procinfo-s390.c
@@ -0,0 +1,32 @@
+/* Data for s390 version of processor capability information.
+ Copyright (C) 2006-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dl-procinfo.h>
+
+const char _dl_s390_cap_flags[_DL_HWCAP_COUNT][9] =
+ {
+ "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh",
+ "highgprs", "te", "vx", "vxd", "vxe", "gs", "vxe2", "vxp", "sort", "dflt",
+ "vxp2", "nnpa", "pcimio", "sie"
+ };
+
+const char _dl_s390_platforms[_DL_PLATFORMS_COUNT][7] =
+ {
+ "g5", "z900", "z990", "z9-109", "z10", "z196", "zEC12", "z13", "z14", "z15",
+ "z16"
+ };
diff --git a/sysdeps/s390/dl-procinfo.c b/sysdeps/s390/dl-procinfo.c
index 755b54ff13a0fa2f..e63ac00382501e00 100644
--- a/sysdeps/s390/dl-procinfo.c
+++ b/sysdeps/s390/dl-procinfo.c
@@ -17,66 +17,10 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-/* This information must be kept in sync with the _DL_HWCAP_COUNT and
- _DL_PLATFORM_COUNT definitions in procinfo.h.
-
- If anything should be added here check whether the size of each string
- is still ok with the given array size.
-
- All the #ifdefs in the definitions are quite irritating but
- necessary if we want to avoid duplicating the information. There
- are three different modes:
-
- - PROCINFO_DECL is defined. This means we are only interested in
- declarations.
-
- - PROCINFO_DECL is not defined:
-
- + if SHARED is defined the file is included in an array
- initializer. The .element = { ... } syntax is needed.
-
- + if SHARED is not defined a normal array initialization is
- needed.
- */
-
-#ifndef PROCINFO_CLASS
-# define PROCINFO_CLASS
-#endif
-
-#if !defined PROCINFO_DECL && defined SHARED
- ._dl_s390_cap_flags
-#else
-PROCINFO_CLASS const char _dl_s390_cap_flags[23][9]
-#endif
-#ifndef PROCINFO_DECL
-= {
- "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh",
- "highgprs", "te", "vx", "vxd", "vxe", "gs", "vxe2", "vxp", "sort", "dflt",
- "vxp2", "nnpa", "pcimio", "sie"
- }
-#endif
-#if !defined SHARED || defined PROCINFO_DECL
-;
-#else
-,
-#endif
-
-#if !defined PROCINFO_DECL && defined SHARED
- ._dl_s390_platforms
-#else
-PROCINFO_CLASS const char _dl_s390_platforms[11][7]
-#endif
-#ifndef PROCINFO_DECL
-= {
- "g5", "z900", "z990", "z9-109", "z10", "z196", "zEC12", "z13", "z14", "z15",
- "z16"
- }
-#endif
-#if !defined SHARED || defined PROCINFO_DECL
-;
-#else
-,
-#endif
+/* The hwcap and platform strings are now in
+ sysdeps/s390/dl-procinfo-s390.c. */
+/* Needed by sysdeps/unix/sysv/linux/dl-vdso-setup.c (as included from
+ sysdeps/generic/ldsodefs.h). */
#undef PROCINFO_DECL
#undef PROCINFO_CLASS
diff --git a/sysdeps/s390/dl-procinfo.h b/sysdeps/s390/dl-procinfo.h
index d44e1dd97441bd90..eb782fc3014cd012 100644
--- a/sysdeps/s390/dl-procinfo.h
+++ b/sysdeps/s390/dl-procinfo.h
@@ -22,8 +22,10 @@
#include <ldsodefs.h>
#define _DL_HWCAP_COUNT 23
+extern const char _dl_s390_cap_flags[_DL_HWCAP_COUNT][9] attribute_hidden;
#define _DL_PLATFORMS_COUNT 11
+extern const char _dl_s390_platforms[_DL_PLATFORMS_COUNT][7] attribute_hidden;
/* The kernel provides up to 32 capability bits with elf_hwcap. */
#define _DL_FIRST_PLATFORM 32
@@ -79,7 +81,7 @@ static inline const char *
__attribute__ ((unused))
_dl_hwcap_string (int idx)
{
- return GLRO(dl_s390_cap_flags)[idx];
+ return _dl_s390_cap_flags[idx];
};
static inline int
@@ -90,7 +92,7 @@ _dl_string_hwcap (const char *str)
for (i = 0; i < _DL_HWCAP_COUNT; i++)
{
- if (strcmp (str, GLRO(dl_s390_cap_flags)[i]) == 0)
+ if (strcmp (str, _dl_s390_cap_flags[i]) == 0)
return i;
}
return -1;
@@ -105,7 +107,7 @@ _dl_string_platform (const char *str)
if (str != NULL)
for (i = 0; i < _DL_PLATFORMS_COUNT; ++i)
{
- if (strcmp (str, GLRO(dl_s390_platforms)[i]) == 0)
+ if (strcmp (str, _dl_s390_platforms[i]) == 0)
return _DL_FIRST_PLATFORM + i;
}
return -1;
diff --git a/sysdeps/unix/sysv/linux/s390/dl-procinfo.h b/sysdeps/unix/sysv/linux/s390/dl-procinfo.h
index 76ce33e31d3a280d..c99870b2e18b9e9e 100644
--- a/sysdeps/unix/sysv/linux/s390/dl-procinfo.h
+++ b/sysdeps/unix/sysv/linux/s390/dl-procinfo.h
@@ -40,7 +40,7 @@ _dl_procinfo (unsigned int type, unsigned long int word)
for (i = 0; i < _DL_HWCAP_COUNT; ++i)
if (word & (1UL << i))
- _dl_printf (" %s", GLRO(dl_s390_cap_flags)[i]);
+ _dl_printf (" %s", _dl_s390_cap_flags[i]);
_dl_printf ("\n");

@ -0,0 +1,683 @@
Backported with an additional line in
sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list in order to
maintain tunable ordering, which happens to be ABI.
commit 41f67ccbe92b4fd09e1062b383e55e407ae5bfa1
Author: Stefan Liebler <stli@linux.ibm.com>
Date: Thu Feb 2 14:57:50 2023 +0100
S390: Influence hwcaps/stfle via GLIBC_TUNABLES.
This patch enables the option to influence hwcaps and stfle bits used
by the s390 specific ifunc-resolvers. The currently x86-specific
tunable glibc.cpu.hwcaps is also used on s390x to achieve the task. In
addition the user can also set a CPU arch-level like z13 instead of
single HWCAP and STFLE features.
Note that the tunable only handles the features which are really used
in the IFUNC-resolvers. All others are ignored as the values are only
used inside glibc. Thus we can influence:
- HWCAP_S390_VXRS (z13)
- HWCAP_S390_VXRS_EXT (z14)
- HWCAP_S390_VXRS_EXT2 (z15)
- STFLE_MIE3 (z15)
The influenced hwcap/stfle-bits are stored in the s390-specific
cpu_features struct which also contains reserved fields for future
usage.
The ifunc-resolvers and users of stfle bits are adjusted to use the
information from cpu_features struct.
On 31bit, the ELF_MACHINE_IRELATIVE macro is now also defined.
Otherwise the new ifunc-resolvers segfaults as they depend on
the not yet processed_rtld_global_ro@GLIBC_PRIVATE relocation.
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 5ab3212f34e3dc37..561e0df230646de1 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -469,7 +469,11 @@ enable CPU/ARCH feature @code{yyy}, disable CPU/ARCH feature @code{xxx}
and @code{zzz} where the feature name is case-sensitive and has to match
the ones in @code{sysdeps/x86/cpu-features.h}.
-This tunable is specific to i386 and x86-64.
+On s390x, the supported HWCAP and STFLE features can be found in
+@code{sysdeps/s390/cpu-features.c}. In addition the user can also set
+a CPU arch-level like @code{z13} instead of single HWCAP and STFLE features.
+
+This tunable is specific to i386, x86-64 and s390x.
@end deftp
@deftp Tunable glibc.cpu.cached_memopt
diff --git a/sysdeps/s390/cpu-features.c b/sysdeps/s390/cpu-features.c
new file mode 100644
index 0000000000000000..afeb9b56382efa96
--- /dev/null
+++ b/sysdeps/s390/cpu-features.c
@@ -0,0 +1,239 @@
+/* Initialize cpu feature data. s390x version.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <cpu-features.h>
+
+#if HAVE_TUNABLES
+# include <elf/dl-tunables.h>
+# include <ifunc-memcmp.h>
+# include <string.h>
+extern __typeof (memcmp) MEMCMP_DEFAULT;
+#endif
+
+#if HAVE_TUNABLES
+# define S390_COPY_CPU_FEATURES(SRC_PTR, DEST_PTR) \
+ (DEST_PTR)->hwcap = (SRC_PTR)->hwcap; \
+ (DEST_PTR)->stfle_bits[0] = (SRC_PTR)->stfle_bits[0];
+
+static void
+TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
+{
+ /* The current IFUNC selection is always using the most recent
+ features which are available via AT_HWCAP or STFLE-bits. But in
+ some scenarios it is useful to adjust this selection.
+
+ The environment variable:
+
+ GLIBC_TUNABLES=glibc.cpu.hwcaps=-xxx,yyy,zzz,....
+
+ can be used to enable HWCAP/STFLE feature yyy, disable HWCAP/STFLE feature
+ xxx, where the feature name is case-sensitive and has to match the ones
+ used below. Furthermore, the ARCH-level zzz can be used to set various
+ HWCAP/STFLE features at once. */
+
+ /* Copy the features from dl_s390_cpu_features, which contains the features
+ provided by AT_HWCAP and stfle-instruction. */
+ struct cpu_features *cpu_features = &GLRO(dl_s390_cpu_features);
+ struct cpu_features cpu_features_orig;
+ S390_COPY_CPU_FEATURES (cpu_features, &cpu_features_orig);
+ struct cpu_features cpu_features_curr;
+ S390_COPY_CPU_FEATURES (cpu_features, &cpu_features_curr);
+
+ const char *token = valp->strval;
+ do
+ {
+ const char *token_end, *feature;
+ bool disable;
+ size_t token_len;
+ size_t feature_len;
+
+ /* Find token separator or end of string. */
+ for (token_end = token; *token_end != ','; token_end++)
+ if (*token_end == '\0')
+ break;
+
+ /* Determine feature. */
+ token_len = token_end - token;
+ if (*token == '-')
+ {
+ disable = true;
+ feature = token + 1;
+ feature_len = token_len - 1;
+ }
+ else
+ {
+ disable = false;
+ feature = token;
+ feature_len = token_len;
+ }
+
+ /* Handle only the features here which are really used in the
+ IFUNC-resolvers. All others are ignored as the values are only used
+ inside glibc. */
+ bool reset_features = false;
+ unsigned long int hwcap_mask = 0UL;
+ unsigned long long stfle_bits0_mask = 0ULL;
+
+ if ((*feature == 'z' || *feature == 'a'))
+ {
+ if ((feature_len == 5 && *feature == 'z'
+ && MEMCMP_DEFAULT (feature, "zEC12", 5) == 0)
+ || (feature_len == 6 && *feature == 'a'
+ && MEMCMP_DEFAULT (feature, "arch10", 6) == 0))
+ {
+ reset_features = true;
+ disable = true;
+ hwcap_mask = HWCAP_S390_VXRS | HWCAP_S390_VXRS_EXT
+ | HWCAP_S390_VXRS_EXT2;
+ stfle_bits0_mask = S390_STFLE_MASK_ARCH13_MIE3;
+ }
+ else if ((feature_len == 3 && *feature == 'z'
+ && MEMCMP_DEFAULT (feature, "z13", 3) == 0)
+ || (feature_len == 6 && *feature == 'a'
+ && MEMCMP_DEFAULT (feature, "arch11", 6) == 0))
+ {
+ reset_features = true;
+ disable = true;
+ hwcap_mask = HWCAP_S390_VXRS_EXT | HWCAP_S390_VXRS_EXT2;
+ stfle_bits0_mask = S390_STFLE_MASK_ARCH13_MIE3;
+ }
+ else if ((feature_len == 3 && *feature == 'z'
+ && MEMCMP_DEFAULT (feature, "z14", 3) == 0)
+ || (feature_len == 6 && *feature == 'a'
+ && MEMCMP_DEFAULT (feature, "arch12", 6) == 0))
+ {
+ reset_features = true;
+ disable = true;
+ hwcap_mask = HWCAP_S390_VXRS_EXT2;
+ stfle_bits0_mask = S390_STFLE_MASK_ARCH13_MIE3;
+ }
+ else if ((feature_len == 3 && *feature == 'z'
+ && (MEMCMP_DEFAULT (feature, "z15", 3) == 0
+ || MEMCMP_DEFAULT (feature, "z16", 3) == 0))
+ || (feature_len == 6
+ && (MEMCMP_DEFAULT (feature, "arch13", 6) == 0
+ || MEMCMP_DEFAULT (feature, "arch14", 6) == 0)))
+ {
+ /* For z15 or newer we don't have to disable something,
+ but we have to reset to the original values. */
+ reset_features = true;
+ }
+ }
+ else if (*feature == 'H')
+ {
+ if (feature_len == 15
+ && MEMCMP_DEFAULT (feature, "HWCAP_S390_VXRS", 15) == 0)
+ {
+ hwcap_mask = HWCAP_S390_VXRS;
+ if (disable)
+ hwcap_mask |= HWCAP_S390_VXRS_EXT | HWCAP_S390_VXRS_EXT2;
+ }
+ else if (feature_len == 19
+ && MEMCMP_DEFAULT (feature, "HWCAP_S390_VXRS_EXT", 19) == 0)
+ {
+ hwcap_mask = HWCAP_S390_VXRS_EXT;
+ if (disable)
+ hwcap_mask |= HWCAP_S390_VXRS_EXT2;
+ else
+ hwcap_mask |= HWCAP_S390_VXRS;
+ }
+ else if (feature_len == 20
+ && MEMCMP_DEFAULT (feature, "HWCAP_S390_VXRS_EXT2", 20) == 0)
+ {
+ hwcap_mask = HWCAP_S390_VXRS_EXT2;
+ if (!disable)
+ hwcap_mask |= HWCAP_S390_VXRS | HWCAP_S390_VXRS_EXT;
+ }
+ }
+ else if (*feature == 'S')
+ {
+ if (feature_len == 10
+ && MEMCMP_DEFAULT (feature, "STFLE_MIE3", 10) == 0)
+ {
+ stfle_bits0_mask = S390_STFLE_MASK_ARCH13_MIE3;
+ }
+ }
+
+ /* Perform the actions determined above. */
+ if (reset_features)
+ {
+ S390_COPY_CPU_FEATURES (&cpu_features_orig, &cpu_features_curr);
+ }
+
+ if (hwcap_mask != 0UL)
+ {
+ if (disable)
+ cpu_features_curr.hwcap &= ~hwcap_mask;
+ else
+ cpu_features_curr.hwcap |= hwcap_mask;
+ }
+
+ if (stfle_bits0_mask != 0ULL)
+ {
+ if (disable)
+ cpu_features_curr.stfle_bits[0] &= ~stfle_bits0_mask;
+ else
+ cpu_features_curr.stfle_bits[0] |= stfle_bits0_mask;
+ }
+
+ /* Jump over current token ... */
+ token += token_len;
+
+ /* ... and skip token separator for next round. */
+ if (*token == ',') token++;
+ }
+ while (*token != '\0');
+
+ /* Copy back the features after checking that no unsupported features were
+ enabled by user. */
+ cpu_features->hwcap = cpu_features_curr.hwcap & cpu_features_orig.hwcap;
+ cpu_features->stfle_bits[0] = cpu_features_curr.stfle_bits[0]
+ & cpu_features_orig.stfle_bits[0];
+}
+#endif
+
+static inline void
+init_cpu_features (struct cpu_features *cpu_features)
+{
+ /* Fill cpu_features as passed by kernel and machine. */
+ cpu_features->hwcap = GLRO(dl_hwcap);
+
+ /* We want just 1 double word to be returned. */
+ if (__glibc_likely ((cpu_features->hwcap & HWCAP_S390_STFLE)
+ && (cpu_features->hwcap & HWCAP_S390_ZARCH)
+ && (cpu_features->hwcap & HWCAP_S390_HIGH_GPRS)))
+ {
+ register unsigned long reg0 __asm__("0") = 0;
+ __asm__ __volatile__(".machine push" "\n\t"
+ ".machine \"z9-109\"" "\n\t"
+ ".machinemode \"zarch_nohighgprs\"\n\t"
+ "stfle %0" "\n\t"
+ ".machine pop" "\n"
+ : "=QS" (cpu_features->stfle_bits[0]),
+ "+d" (reg0)
+ : : "cc");
+ }
+ else
+ {
+ cpu_features->stfle_bits[0] = 0ULL;
+ }
+
+#if HAVE_TUNABLES
+ TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
+#endif
+}
diff --git a/sysdeps/s390/cpu-features.h b/sysdeps/s390/cpu-features.h
new file mode 100644
index 0000000000000000..5e6b58f7c5bb07e4
--- /dev/null
+++ b/sysdeps/s390/cpu-features.h
@@ -0,0 +1,46 @@
+/* Initialize cpu feature data. s390x version.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef __CPU_FEATURES_S390X_H
+# define __CPU_FEATURES_S390X_H
+
+#define S390_STFLE_BITS_Z10 34 /* General instructions extension */
+#define S390_STFLE_BITS_Z196 45 /* Distinct operands, pop ... */
+#define S390_STFLE_BITS_ARCH13_MIE3 61 /* Miscellaneous-Instruction-Extensions
+ Facility 3, e.g. mvcrl. */
+
+#define S390_STFLE_MASK_ARCH13_MIE3 (1ULL << (63 - S390_STFLE_BITS_ARCH13_MIE3))
+
+
+#define S390_IS_ARCH13_MIE3(STFLE_BITS_ARRAY) \
+ (((STFLE_BITS_ARRAY)[0] & S390_STFLE_MASK_ARCH13_MIE3) != 0)
+
+#define S390_IS_Z196(STFLE_BITS_ARRAY) \
+ (((STFLE_BITS_ARRAY)[0] & (1ULL << (63 - S390_STFLE_BITS_Z196))) != 0)
+
+#define S390_IS_Z10(STFLE_BITS_ARRAY) \
+ (((STFLE_BITS_ARRAY)[0] & (1ULL << (63 - S390_STFLE_BITS_Z10))) != 0)
+
+struct cpu_features
+{
+ unsigned long int hwcap;
+ unsigned long int __reserved_hwcap2;
+ unsigned long long stfle_bits[3];
+ unsigned long long __reserved[11];
+};
+
+#endif /* __CPU_FEATURES_S390X_H */
diff --git a/sysdeps/s390/dl-procinfo.c b/sysdeps/s390/dl-procinfo.c
index e63ac00382501e00..7f03aaba3f500034 100644
--- a/sysdeps/s390/dl-procinfo.c
+++ b/sysdeps/s390/dl-procinfo.c
@@ -22,5 +22,42 @@
/* Needed by sysdeps/unix/sysv/linux/dl-vdso-setup.c (as included from
sysdeps/generic/ldsodefs.h). */
+
+/* All the #ifdefs in the definitions are quite irritating but
+ necessary if we want to avoid duplicating the information. There
+ are three different modes:
+
+ - PROCINFO_DECL is defined. This means we are only interested in
+ declarations.
+
+ - PROCINFO_DECL is not defined:
+
+ + if SHARED is defined the file is included in an array
+ initializer. The .element = { ... } syntax is needed.
+
+ + if SHARED is not defined a normal array initialization is
+ needed.
+ */
+
+#ifndef PROCINFO_CLASS
+# define PROCINFO_CLASS
+#endif
+
+#if !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL && defined SHARED
+ ._dl_s390_cpu_features
+# else
+PROCINFO_CLASS struct cpu_features _dl_s390_cpu_features
+# endif
+# ifndef PROCINFO_DECL
+= { }
+# endif
+# if !defined SHARED || defined PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
+
#undef PROCINFO_DECL
#undef PROCINFO_CLASS
diff --git a/sysdeps/s390/dl-tunables.list b/sysdeps/s390/dl-tunables.list
new file mode 100644
index 0000000000000000..7a09828c48a368ef
--- /dev/null
+++ b/sysdeps/s390/dl-tunables.list
@@ -0,0 +1,25 @@
+# s390 specific tunables.
+# Copyright (C) 2023 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+glibc {
+ cpu {
+ hwcaps {
+ type: STRING
+ }
+ }
+}
diff --git a/sysdeps/s390/ldsodefs.h b/sysdeps/s390/ldsodefs.h
index 61549d4069289b9f..acf6a98b21c7e077 100644
--- a/sysdeps/s390/ldsodefs.h
+++ b/sysdeps/s390/ldsodefs.h
@@ -20,6 +20,7 @@
#define _S390_LDSODEFS_H 1
#include <elf.h>
+#include <cpu-features.h>
struct La_s390_32_regs;
struct La_s390_32_retval;
diff --git a/sysdeps/s390/libc-start.c b/sysdeps/s390/libc-start.c
new file mode 100644
index 0000000000000000..eb35d6679fb7d62c
--- /dev/null
+++ b/sysdeps/s390/libc-start.c
@@ -0,0 +1,33 @@
+/* Override csu/libc-start.c on s390x.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef SHARED
+
+/* Mark symbols hidden in static PIE for early self relocation to work. */
+# if BUILD_PIE_DEFAULT
+# pragma GCC visibility push(hidden)
+# endif
+# include <ldsodefs.h>
+# include <cpu-features.c>
+
+extern struct cpu_features _dl_s390_cpu_features;
+
+# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_s390_cpu_features)
+
+#endif
+#include <csu/libc-start.c>
diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c
index 2ef38b72ddac7c18..af2c75f5df7c7e1d 100644
--- a/sysdeps/s390/multiarch/ifunc-impl-list.c
+++ b/sysdeps/s390/multiarch/ifunc-impl-list.c
@@ -19,6 +19,7 @@
#include <assert.h>
#include <string.h>
#include <wchar.h>
+#include <cpu-features.h>
#include <ifunc-impl-list.h>
#include <ifunc-resolve.h>
#include <ifunc-memset.h>
@@ -81,14 +82,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
size_t i = 0;
/* Get hardware information. */
- unsigned long int dl_hwcap = GLRO (dl_hwcap);
- unsigned long long stfle_bits = 0ULL;
- if ((dl_hwcap & HWCAP_S390_STFLE)
- && (dl_hwcap & HWCAP_S390_ZARCH)
- && (dl_hwcap & HWCAP_S390_HIGH_GPRS))
- {
- S390_STORE_STFLE (stfle_bits);
- }
+ const struct cpu_features *features = &GLRO(dl_s390_cpu_features);
+ unsigned long int dl_hwcap = features->hwcap;
+ const unsigned long long * __attribute__((unused)) stfle_bits
+ = features->stfle_bits;
#if HAVE_MEMSET_IFUNC
IFUNC_IMPL (i, name, memset,
diff --git a/sysdeps/s390/multiarch/ifunc-resolve.h b/sysdeps/s390/multiarch/ifunc-resolve.h
index 4e50f2b22582fee8..c22d59d2a341fff7 100644
--- a/sysdeps/s390/multiarch/ifunc-resolve.h
+++ b/sysdeps/s390/multiarch/ifunc-resolve.h
@@ -19,42 +19,17 @@
#include <unistd.h>
#include <dl-procinfo.h>
+#include <cpu-features.h>
-#define S390_STFLE_BITS_Z10 34 /* General instructions extension */
-#define S390_STFLE_BITS_Z196 45 /* Distinct operands, pop ... */
-#define S390_STFLE_BITS_ARCH13_MIE3 61 /* Miscellaneous-Instruction-Extensions
- Facility 3, e.g. mvcrl. */
-
-#define S390_IS_ARCH13_MIE3(STFLE_BITS) \
- ((STFLE_BITS & (1ULL << (63 - S390_STFLE_BITS_ARCH13_MIE3))) != 0)
-
-#define S390_IS_Z196(STFLE_BITS) \
- ((STFLE_BITS & (1ULL << (63 - S390_STFLE_BITS_Z196))) != 0)
-
-#define S390_IS_Z10(STFLE_BITS) \
- ((STFLE_BITS & (1ULL << (63 - S390_STFLE_BITS_Z10))) != 0)
-
-#define S390_STORE_STFLE(STFLE_BITS) \
- /* We want just 1 double word to be returned. */ \
- register unsigned long reg0 __asm__("0") = 0; \
- \
- __asm__ __volatile__(".machine push" "\n\t" \
- ".machine \"z9-109\"" "\n\t" \
- ".machinemode \"zarch_nohighgprs\"\n\t" \
- "stfle %0" "\n\t" \
- ".machine pop" "\n" \
- : "=QS" (STFLE_BITS), "+d" (reg0) \
- : : "cc");
#define s390_libc_ifunc_expr_stfle_init() \
- unsigned long long stfle_bits = 0ULL; \
- if (__glibc_likely ((hwcap & HWCAP_S390_STFLE) \
- && (hwcap & HWCAP_S390_ZARCH) \
- && (hwcap & HWCAP_S390_HIGH_GPRS))) \
- { \
- S390_STORE_STFLE (stfle_bits); \
- }
+ const unsigned long long *stfle_bits = features->stfle_bits;
+
+#define s390_libc_ifunc_expr_init() \
+ const struct cpu_features *features = &GLRO(dl_s390_cpu_features); \
+ /* The hwcap from kernel is passed as argument, but we \
+ explicitly use the hwcaps from cpu-features struct. */ \
+ hwcap = features->hwcap;
-#define s390_libc_ifunc_expr_init()
#define s390_libc_ifunc_expr(TYPE_FUNC, FUNC, EXPR) \
__ifunc (TYPE_FUNC, FUNC, EXPR, unsigned long int hwcap, \
s390_libc_ifunc_expr_init);
diff --git a/sysdeps/s390/s390-32/dl-machine.h b/sysdeps/s390/s390-32/dl-machine.h
index ba681d1eac7bda53..34e5bcb8d7f18694 100644
--- a/sysdeps/s390/s390-32/dl-machine.h
+++ b/sysdeps/s390/s390-32/dl-machine.h
@@ -29,6 +29,9 @@
#include <dl-irel.h>
#include <dl-static-tls.h>
#include <dl-machine-rel.h>
+#include <cpu-features.c>
+
+#define ELF_MACHINE_IRELATIVE R_390_IRELATIVE
/* This is an older, now obsolete value. */
#define EM_S390_OLD 0xA390
@@ -289,6 +292,12 @@ dl_platform_init (void)
if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
/* Avoid an empty string which would disturb us. */
GLRO(dl_platform) = NULL;
+
+#ifdef SHARED
+ /* init_cpu_features has been called early from __libc_start_main in
+ static executable. */
+ init_cpu_features (&GLRO(dl_s390_cpu_features));
+#endif
}
static inline Elf32_Addr
diff --git a/sysdeps/s390/s390-64/dl-machine.h b/sysdeps/s390/s390-64/dl-machine.h
index af2cffd9f904274e..e308937ca9ca54cf 100644
--- a/sysdeps/s390/s390-64/dl-machine.h
+++ b/sysdeps/s390/s390-64/dl-machine.h
@@ -30,6 +30,7 @@
#include <dl-irel.h>
#include <dl-static-tls.h>
#include <dl-machine-rel.h>
+#include <cpu-features.c>
#define ELF_MACHINE_IRELATIVE R_390_IRELATIVE
@@ -237,6 +238,13 @@ dl_platform_init (void)
if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
/* Avoid an empty string which would disturb us. */
GLRO(dl_platform) = NULL;
+
+#ifdef SHARED
+ /* init_cpu_features has been called early from __libc_start_main in
+ static executable. */
+ init_cpu_features (&GLRO(dl_s390_cpu_features));
+#endif
+
}
static inline Elf64_Addr
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list b/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
index c3bc83f33910af22..3dd7e891c5e37b1a 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
@@ -25,3 +25,4 @@
@order glibc.rtld.optional_static_tls
@order glibc.malloc.tcache_max
@order glibc.malloc.check
+@order glibc.cpu.hwcaps
diff --git a/sysdeps/unix/sysv/linux/s390/sysconf.c b/sysdeps/unix/sysv/linux/s390/sysconf.c
index 2364a8b7abcd138a..14821b5f248cd991 100644
--- a/sysdeps/unix/sysv/linux/s390/sysconf.c
+++ b/sysdeps/unix/sysv/linux/s390/sysconf.c
@@ -18,6 +18,7 @@
#include <unistd.h>
#include <dl-procinfo.h>
+#include <cpu-features.h>
static long int linux_sysconf (int name);
@@ -44,12 +45,14 @@ get_cache_info (int level, int attr, int type)
|| type < CACHE_TYPE_DATA || type > CACHE_TYPE_INSTRUCTION)
return 0L;
+ const struct cpu_features *features = &GLRO(dl_s390_cpu_features);
+
/* Check if ecag-instruction is available.
ecag - extract CPU attribute (only in zarch; arch >= z10; in as 2.24) */
- if (!(GLRO (dl_hwcap) & HWCAP_S390_STFLE)
+ if (!(features->hwcap & HWCAP_S390_STFLE)
#if !defined __s390x__
- || !(GLRO (dl_hwcap) & HWCAP_S390_ZARCH)
- || !(GLRO (dl_hwcap) & HWCAP_S390_HIGH_GPRS)
+ || !(features->hwcap & HWCAP_S390_ZARCH)
+ || !(features->hwcap & HWCAP_S390_HIGH_GPRS)
#endif /* !__s390x__ */
)
{
@@ -62,25 +65,7 @@ get_cache_info (int level, int attr, int type)
return 0L;
}
- /* Store facility list and check for z10.
- (see ifunc-resolver for details) */
- register unsigned long reg0 __asm__("0") = 0;
-#ifdef __s390x__
- unsigned long stfle_bits;
-# define STFLE_Z10_MASK (1UL << (63 - 34))
-#else
- unsigned long long stfle_bits;
-# define STFLE_Z10_MASK (1ULL << (63 - 34))
-#endif /* !__s390x__ */
- __asm__ __volatile__(".machine push" "\n\t"
- ".machinemode \"zarch_nohighgprs\"\n\t"
- ".machine \"z9-109\"" "\n\t"
- "stfle %0" "\n\t"
- ".machine pop" "\n"
- : "=QS" (stfle_bits), "+d" (reg0)
- : : "cc");
-
- if (!(stfle_bits & STFLE_Z10_MASK))
+ if (!S390_IS_Z10 (features->stfle_bits))
{
/* We are at least on a z9 machine.
Return 256byte for LINESIZE for L1 d/i-cache,

@ -0,0 +1,141 @@
From fd78cfa72ea2bab30fdb4e1e0672b34471426c05 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Sat, 18 Feb 2023 12:53:41 -0800
Subject: [PATCH] stdlib: Undo post review change to 16adc58e73f3 [BZ #27749]
Content-type: text/plain; charset=UTF-8
Post review removal of "goto restart" from
https://sourceware.org/pipermail/libc-alpha/2021-April/125470.html
introduced a bug when some atexit handers skipped.
Signed-off-by: Vitaly Buka <vitalybuka@google.com>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
---
stdlib/Makefile | 1 +
stdlib/exit.c | 7 +++-
stdlib/test-atexit-recursive.c | 75 ++++++++++++++++++++++++++++++++++
3 files changed, 81 insertions(+), 2 deletions(-)
create mode 100644 stdlib/test-atexit-recursive.c
diff --git a/stdlib/Makefile b/stdlib/Makefile
index ff5096ad1b..cd32f53b56 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -171,6 +171,7 @@ tests := \
test-a64l \
test-at_quick_exit-race \
test-atexit-race \
+ test-atexit-recursive \
test-bz22786 \
test-canon \
test-canon2 \
diff --git a/stdlib/exit.c b/stdlib/exit.c
index 6b1eed6445..1cd0bdfe94 100644
--- a/stdlib/exit.c
+++ b/stdlib/exit.c
@@ -51,7 +51,10 @@ __run_exit_handlers (int status, struct exit_function_list **listp,
exit (). */
while (true)
{
- struct exit_function_list *cur = *listp;
+ struct exit_function_list *cur;
+
+ restart:
+ cur = *listp;
if (cur == NULL)
{
@@ -113,7 +116,7 @@ __run_exit_handlers (int status, struct exit_function_list **listp,
if (__glibc_unlikely (new_exitfn_called != __new_exitfn_called))
/* The last exit function, or another thread, has registered
more exit functions. Start the loop over. */
- continue;
+ goto restart;
}
*listp = cur->next;
diff --git a/stdlib/test-atexit-recursive.c b/stdlib/test-atexit-recursive.c
new file mode 100644
index 0000000000..0596b9763b
--- /dev/null
+++ b/stdlib/test-atexit-recursive.c
@@ -0,0 +1,75 @@
+/* Support file for atexit/exit, etc. race tests (BZ #27749).
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Check that atexit handler registed from another handler still called. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <support/check.h>
+#include <support/xunistd.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static void
+atexit_cb (void)
+{
+}
+
+static void
+atexit_last (void)
+{
+ _exit (1);
+}
+
+static void
+atexit_recursive (void)
+{
+ atexit (&atexit_cb);
+ atexit (&atexit_last);
+}
+
+_Noreturn static void
+test_and_exit (int count)
+{
+ for (int i = 0; i < count; ++i)
+ atexit (&atexit_cb);
+ atexit (&atexit_recursive);
+ exit (0);
+}
+
+static int
+do_test (void)
+{
+ for (int i = 0; i < 100; ++i)
+ if (xfork () == 0)
+ test_and_exit (i);
+
+ for (int i = 0; i < 100; ++i)
+ {
+ int status;
+ xwaitpid (0, &status, 0);
+ if (!WIFEXITED (status))
+ FAIL_EXIT1 ("Failed iterations %d", i);
+ TEST_COMPARE (WEXITSTATUS (status), 1);
+ }
+
+ return 0;
+}
+
+#define TEST_FUNCTION do_test
+#include <support/test-driver.c>
--
2.39.2

@ -0,0 +1,233 @@
commit d0f07f7df8d9758c838674b70144ac73bcbd1634
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue May 30 13:25:50 2023 +0200
elf: Make more functions available for binding during dlclose (bug 30425)
Previously, after destructors for a DSO have been invoked, ld.so refused
to bind against that DSO in all cases. Relax this restriction somewhat
if the referencing object is itself a DSO that is being unloaded. This
assumes that the symbol reference is not going to be stored anywhere.
The situation in the test case can arise fairly easily with C++ and
objects that are built with different optimization levels and therefore
define different functions with vague linkage.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
Conflicts:
elf/Makefile
(Test conflicts resolved by hand)
diff --git a/elf/Makefile b/elf/Makefile
index 0daa8a85ec1a1bc5..8e1f91bcd917fd4e 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -392,6 +392,7 @@ tests += \
tst-debug1 \
tst-deep1 \
tst-dl-is_dso \
+ tst-dlclose-lazy \
tst-dlmodcount \
tst-dlmopen1 \
tst-dlmopen3 \
@@ -742,6 +743,8 @@ modules-names = \
tst-deep1mod2 \
tst-deep1mod3 \
tst-dlmopen1mod \
+ tst-dlclose-lazy-mod1 \
+ tst-dlclose-lazy-mod2 \
tst-dlmopen-dlerror-mod \
tst-dlmopen-gethostbyname-mod \
tst-dlmopen-twice-mod1 \
@@ -2676,3 +2679,8 @@ tst-audit-tlsdesc-dlopen-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so
$(objpfx)tst-dlmopen-twice.out: \
$(objpfx)tst-dlmopen-twice-mod1.so \
$(objpfx)tst-dlmopen-twice-mod2.so
+
+LDFLAGS-tst-dlclose-lazy-mod1.so = -Wl,-z,lazy,--no-as-needed
+$(objpfx)tst-dlclose-lazy-mod1.so: $(objpfx)tst-dlclose-lazy-mod2.so
+$(objpfx)tst-dlclose-lazy.out: \
+ $(objpfx)tst-dlclose-lazy-mod1.so $(objpfx)tst-dlclose-lazy-mod2.so
diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
index 3391a990c8d288e5..94ae2d0c46696118 100644
--- a/elf/dl-lookup.c
+++ b/elf/dl-lookup.c
@@ -380,8 +380,25 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash,
if ((type_class & ELF_RTYPE_CLASS_COPY) && map->l_type == lt_executable)
continue;
- /* Do not look into objects which are going to be removed. */
- if (map->l_removed)
+ /* Do not look into objects which are going to be removed,
+ except when the referencing object itself is being removed.
+
+ The second part covers the situation when an object lazily
+ binds to another object while running its destructor, but the
+ destructor of the other object has already run, so that
+ dlclose has set l_removed. It may not always be obvious how
+ to avoid such a scenario to programmers creating DSOs,
+ particularly if C++ vague linkage is involved and triggers
+ symbol interposition.
+
+ Accepting these to-be-removed objects makes the lazy and
+ BIND_NOW cases more similar. (With BIND_NOW, the symbol is
+ resolved early, before the destructor call, so the issue does
+ not arise.). Behavior matches the constructor scenario: the
+ implementation allows binding to symbols of objects whose
+ constructors have not run. In fact, not doing this would be
+ mostly incompatible with symbol interposition. */
+ if (map->l_removed && !(undef_map != NULL && undef_map->l_removed))
continue;
/* Print some debugging info if wanted. */
diff --git a/elf/tst-dlclose-lazy-mod1.c b/elf/tst-dlclose-lazy-mod1.c
new file mode 100644
index 0000000000000000..8439dc1925cc8b41
--- /dev/null
+++ b/elf/tst-dlclose-lazy-mod1.c
@@ -0,0 +1,36 @@
+/* Lazy binding during dlclose. Directly loaded module.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* This function is called from exported_function below. It is only
+ defined in this module. The weak attribute mimics how G++
+ implements vague linkage for C++. */
+void __attribute__ ((weak))
+lazily_bound_exported_function (void)
+{
+}
+
+/* Called from tst-dlclose-lazy-mod2.so. */
+void
+exported_function (int call_it)
+{
+ if (call_it)
+ /* Previous to the fix this would crash when called during dlclose
+ since symbols from the DSO were no longer available for binding
+ (bug 30425) after the DSO started being closed by dlclose. */
+ lazily_bound_exported_function ();
+}
diff --git a/elf/tst-dlclose-lazy-mod2.c b/elf/tst-dlclose-lazy-mod2.c
new file mode 100644
index 0000000000000000..767f69ffdb23a685
--- /dev/null
+++ b/elf/tst-dlclose-lazy-mod2.c
@@ -0,0 +1,49 @@
+/* Lazy binding during dlclose. Indirectly loaded module.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+void
+exported_function (int ignored)
+{
+ /* This function is interposed from tst-dlclose-lazy-mod1.so and
+ thus never called. */
+ abort ();
+}
+
+static void __attribute__ ((constructor))
+init (void)
+{
+ puts ("info: tst-dlclose-lazy-mod2.so constructor called");
+
+ /* Trigger lazy binding to the definition in
+ tst-dlclose-lazy-mod1.so, but not for
+ lazily_bound_exported_function in that module. */
+ exported_function (0);
+}
+
+static void __attribute__ ((destructor))
+fini (void)
+{
+ puts ("info: tst-dlclose-lazy-mod2.so destructor called");
+
+ /* Trigger the lazily_bound_exported_function call in
+ exported_function in tst-dlclose-lazy-mod1.so. */
+ exported_function (1);
+}
diff --git a/elf/tst-dlclose-lazy.c b/elf/tst-dlclose-lazy.c
new file mode 100644
index 0000000000000000..976a6bb6f64fa981
--- /dev/null
+++ b/elf/tst-dlclose-lazy.c
@@ -0,0 +1,47 @@
+/* Test lazy binding during dlclose (bug 30425).
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* This test re-creates a situation that can arise naturally for C++
+ applications due to the use of vague linkage and differences in the
+ set of compiler-emitted functions. A function in
+ tst-dlclose-lazy-mod1.so (exported_function) interposes a function
+ in tst-dlclose-lazy-mod2.so. This function is called from the
+ destructor in tst-dlclose-lazy-mod2.so, after the destructor for
+ tst-dlclose-lazy-mod1.so has already completed. Prior to the fix
+ for bug 30425, this would lead to a lazy binding failure in
+ tst-dlclose-lazy-mod1.so because dlclose had already marked the DSO
+ as unavailable for binding (by setting l_removed). */
+
+#include <dlfcn.h>
+#include <support/xdlfcn.h>
+#include <support/check.h>
+
+int
+main (void)
+{
+ /* Load tst-dlclose-lazy-mod1.so, indirectly loading
+ tst-dlclose-lazy-mod2.so. */
+ void *handle = xdlopen ("tst-dlclose-lazy-mod1.so", RTLD_GLOBAL | RTLD_LAZY);
+
+ /* Invoke the destructor of tst-dlclose-lazy-mod2.so, which calls
+ into tst-dlclose-lazy-mod1.so after its destructor has been
+ called. */
+ xdlclose (handle);
+
+ return 0;
+}

@ -0,0 +1,45 @@
From ed2f9dc9420c4c61436328778a70459d0a35556a Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Mon, 8 May 2023 22:10:20 -0500
Subject: [PATCH] x86: Use 64MB as nt-store threshold if no cacheinfo [BZ
#30429]
Content-type: text/plain; charset=UTF-8
If `non_temporal_threshold` is below `minimum_non_temporal_threshold`,
it almost certainly means we failed to read the systems cache info.
In this case, rather than defaulting the minimum correct value, we
should default to a value that gets at least reasonable
performance. 64MB is chosen conservatively to be at the very high
end. This should never cause non-temporal stores when, if we had read
cache info, we wouldn't have otherwise.
Reviewed-by: Florian Weimer <fweimer@redhat.com>
---
sysdeps/x86/dl-cacheinfo.h | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index ec88945b39..877e73d700 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -749,8 +749,16 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
reflected in the manual. */
unsigned long int maximum_non_temporal_threshold = SIZE_MAX >> 4;
unsigned long int minimum_non_temporal_threshold = 0x4040;
+
+ /* If `non_temporal_threshold` less than `minimum_non_temporal_threshold`
+ it most likely means we failed to detect the cache info. We don't want
+ to default to `minimum_non_temporal_threshold` as such a small value,
+ while correct, has bad performance. We default to 64MB as reasonable
+ default bound. 64MB is likely conservative in that most/all systems would
+ choose a lower value so it should never forcing non-temporal stores when
+ they otherwise wouldn't be used. */
if (non_temporal_threshold < minimum_non_temporal_threshold)
- non_temporal_threshold = minimum_non_temporal_threshold;
+ non_temporal_threshold = 64 * 1024 * 1024;
else if (non_temporal_threshold > maximum_non_temporal_threshold)
non_temporal_threshold = maximum_non_temporal_threshold;
--
2.39.3

@ -0,0 +1,223 @@
From af992e7abdc9049714da76cae1e5e18bc4838fb8 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Wed, 7 Jun 2023 13:18:01 -0500
Subject: [PATCH] x86: Increase `non_temporal_threshold` to roughly `sizeof_L3
/ 4`
Content-type: text/plain; charset=UTF-8
Current `non_temporal_threshold` set to roughly '3/4 * sizeof_L3 /
ncores_per_socket'. This patch updates that value to roughly
'sizeof_L3 / 4`
The original value (specifically dividing the `ncores_per_socket`) was
done to limit the amount of other threads' data a `memcpy`/`memset`
could evict.
Dividing by 'ncores_per_socket', however leads to exceedingly low
non-temporal thresholds and leads to using non-temporal stores in
cases where REP MOVSB is multiple times faster.
Furthermore, non-temporal stores are written directly to main memory
so using it at a size much smaller than L3 can place soon to be
accessed data much further away than it otherwise could be. As well,
modern machines are able to detect streaming patterns (especially if
REP MOVSB is used) and provide LRU hints to the memory subsystem. This
in affect caps the total amount of eviction at 1/cache_associativity,
far below meaningfully thrashing the entire cache.
As best I can tell, the benchmarks that lead this small threshold
where done comparing non-temporal stores versus standard cacheable
stores. A better comparison (linked below) is to be REP MOVSB which,
on the measure systems, is nearly 2x faster than non-temporal stores
at the low-end of the previous threshold, and within 10% for over
100MB copies (well past even the current threshold). In cases with a
low number of threads competing for bandwidth, REP MOVSB is ~2x faster
up to `sizeof_L3`.
The divisor of `4` is a somewhat arbitrary value. From benchmarks it
seems Skylake and Icelake both prefer a divisor of `2`, but older CPUs
such as Broadwell prefer something closer to `8`. This patch is meant
to be followed up by another one to make the divisor cpu-specific, but
in the meantime (and for easier backporting), this patch settles on
`4` as a middle-ground.
Benchmarks comparing non-temporal stores, REP MOVSB, and cacheable
stores where done using:
https://github.com/goldsteinn/memcpy-nt-benchmarks
Sheets results (also available in pdf on the github):
https://docs.google.com/spreadsheets/d/e/2PACX-1vS183r0rW_jRX6tG_E90m9qVuFiMbRIJvi5VAE8yYOvEOIEEc3aSNuEsrFbuXw5c3nGboxMmrupZD7K/pubhtml
Reviewed-by: DJ Delorie <dj@redhat.com>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
---
sysdeps/x86/dl-cacheinfo.h | 70 +++++++++++++++++++++++---------------
1 file changed, 43 insertions(+), 27 deletions(-)
[diff rebased by DJ]
diff -rup a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
--- a/sysdeps/x86/dl-cacheinfo.h 2023-07-25 00:38:39.386831871 -0400
+++ b/sysdeps/x86/dl-cacheinfo.h 2023-07-25 00:38:40.372870369 -0400
@@ -408,7 +408,7 @@ handle_zhaoxin (int name)
}
static void
-get_common_cache_info (long int *shared_ptr, unsigned int *threads_ptr,
+get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, unsigned int *threads_ptr,
long int core)
{
unsigned int eax;
@@ -427,6 +427,7 @@ get_common_cache_info (long int *shared_
unsigned int family = cpu_features->basic.family;
unsigned int model = cpu_features->basic.model;
long int shared = *shared_ptr;
+ long int shared_per_thread = *shared_per_thread_ptr;
unsigned int threads = *threads_ptr;
bool inclusive_cache = true;
bool support_count_mask = true;
@@ -442,6 +443,7 @@ get_common_cache_info (long int *shared_
/* Try L2 otherwise. */
level = 2;
shared = core;
+ shared_per_thread = core;
threads_l2 = 0;
threads_l3 = -1;
}
@@ -598,29 +600,28 @@ get_common_cache_info (long int *shared_
}
else
{
-intel_bug_no_cache_info:
- /* Assume that all logical threads share the highest cache
- level. */
- threads
- = ((cpu_features->features[CPUID_INDEX_1].cpuid.ebx >> 16)
- & 0xff);
- }
-
- /* Cap usage of highest cache level to the number of supported
- threads. */
- if (shared > 0 && threads > 0)
- shared /= threads;
+ intel_bug_no_cache_info:
+ /* Assume that all logical threads share the highest cache
+ level. */
+ threads = ((cpu_features->features[CPUID_INDEX_1].cpuid.ebx >> 16)
+ & 0xff);
+
+ /* Get per-thread size of highest level cache. */
+ if (shared_per_thread > 0 && threads > 0)
+ shared_per_thread /= threads;
+ }
}
/* Account for non-inclusive L2 and L3 caches. */
if (!inclusive_cache)
{
if (threads_l2 > 0)
- core /= threads_l2;
+ shared_per_thread += core / threads_l2;
shared += core;
}
*shared_ptr = shared;
+ *shared_per_thread_ptr = shared_per_thread;
*threads_ptr = threads;
}
@@ -630,6 +631,7 @@ dl_init_cacheinfo (struct cpu_features *
/* Find out what brand of processor. */
long int data = -1;
long int shared = -1;
+ long int shared_per_thread = -1;
long int core = -1;
unsigned int threads = 0;
unsigned long int level1_icache_size = -1;
@@ -650,6 +652,7 @@ dl_init_cacheinfo (struct cpu_features *
data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
+ shared_per_thread = shared;
level1_icache_size
= handle_intel (_SC_LEVEL1_ICACHE_SIZE, cpu_features);
@@ -673,13 +676,14 @@ dl_init_cacheinfo (struct cpu_features *
level4_cache_size
= handle_intel (_SC_LEVEL4_CACHE_SIZE, cpu_features);
- get_common_cache_info (&shared, &threads, core);
+ get_common_cache_info (&shared, &shared_per_thread, &threads, core);
}
else if (cpu_features->basic.kind == arch_kind_zhaoxin)
{
data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE);
core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE);
+ shared_per_thread = shared;
level1_icache_size = handle_zhaoxin (_SC_LEVEL1_ICACHE_SIZE);
level1_icache_linesize = handle_zhaoxin (_SC_LEVEL1_ICACHE_LINESIZE);
@@ -693,13 +697,14 @@ dl_init_cacheinfo (struct cpu_features *
level3_cache_assoc = handle_zhaoxin (_SC_LEVEL3_CACHE_ASSOC);
level3_cache_linesize = handle_zhaoxin (_SC_LEVEL3_CACHE_LINESIZE);
- get_common_cache_info (&shared, &threads, core);
+ get_common_cache_info (&shared, &shared_per_thread, &threads, core);
}
else if (cpu_features->basic.kind == arch_kind_amd)
{
data = handle_amd (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
core = handle_amd (_SC_LEVEL2_CACHE_SIZE, cpu_features);
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE, cpu_features);
+ shared_per_thread = shared;
level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE, cpu_features);
level1_icache_linesize
@@ -721,6 +726,9 @@ dl_init_cacheinfo (struct cpu_features *
if (shared <= 0)
/* No shared L3 cache. All we have is the L2 cache. */
shared = core;
+
+ if (shared_per_thread <= 0)
+ shared_per_thread = shared;
}
cpu_features->level1_icache_size = level1_icache_size;
@@ -736,17 +744,25 @@ dl_init_cacheinfo (struct cpu_features *
cpu_features->level3_cache_linesize = level3_cache_linesize;
cpu_features->level4_cache_size = level4_cache_size;
- /* The default setting for the non_temporal threshold is 3/4 of one
- thread's share of the chip's cache. For most Intel and AMD processors
- with an initial release date between 2017 and 2020, a thread's typical
- share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4
- threshold leaves 125 KBytes to 500 KBytes of the thread's data
- in cache after a maximum temporal copy, which will maintain
- in cache a reasonable portion of the thread's stack and other
- active data. If the threshold is set higher than one thread's
- share of the cache, it has a substantial risk of negatively
- impacting the performance of other threads running on the chip. */
- unsigned long int non_temporal_threshold = shared * 3 / 4;
+ /* The default setting for the non_temporal threshold is 1/4 of size
+ of the chip's cache. For most Intel and AMD processors with an
+ initial release date between 2017 and 2023, a thread's typical
+ share of the cache is from 18-64MB. Using the 1/4 L3 is meant to
+ estimate the point where non-temporal stores begin out-competing
+ REP MOVSB. As well the point where the fact that non-temporal
+ stores are forced back to main memory would already occurred to the
+ majority of the lines in the copy. Note, concerns about the
+ entire L3 cache being evicted by the copy are mostly alleviated
+ by the fact that modern HW detects streaming patterns and
+ provides proper LRU hints so that the maximum thrashing
+ capped at 1/associativity. */
+ unsigned long int non_temporal_threshold = shared / 4;
+ /* If no ERMS, we use the per-thread L3 chunking. Normal cacheable stores run
+ a higher risk of actually thrashing the cache as they don't have a HW LRU
+ hint. As well, their performance in highly parallel situations is
+ noticeably worse. */
+ if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+ non_temporal_threshold = shared_per_thread * 3 / 4;
/* SIZE_MAX >> 4 because memmove-vec-unaligned-erms right-shifts the value of
'x86_non_temporal_threshold' by `LOG_4X_MEMCPY_THRESH` (4) and it is best
if that operation cannot overflow. Minimum of 0x4040 (16448) because the

@ -0,0 +1,475 @@
From f193ea20eddc6cef84cba54cf1a647204ee6a86b Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Wed, 7 Jun 2023 13:18:02 -0500
Subject: [PATCH] x86: Refactor Intel `init_cpu_features`
Content-type: text/plain; charset=UTF-8
This patch should have no affect on existing functionality.
The current code, which has a single switch for model detection and
setting prefered features, is difficult to follow/extend. The cases
use magic numbers and many microarchitectures are missing. This makes
it difficult to reason about what is implemented so far and/or
how/where to add support for new features.
This patch splits the model detection and preference setting stages so
that CPU preferences can be set based on a complete list of available
microarchitectures, rather than based on model magic numbers.
Reviewed-by: DJ Delorie <dj@redhat.com>
---
sysdeps/x86/cpu-features.c | 390 +++++++++++++++++++++++++++++--------
1 file changed, 309 insertions(+), 81 deletions(-)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 0a99efdb28..d52a718e92 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -417,6 +417,216 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
== index_arch_Fast_Copy_Backward)),
"Incorrect index_arch_Fast_Unaligned_Load");
+
+/* Intel Family-6 microarch list. */
+enum
+{
+ /* Atom processors. */
+ INTEL_ATOM_BONNELL,
+ INTEL_ATOM_SILVERMONT,
+ INTEL_ATOM_AIRMONT,
+ INTEL_ATOM_GOLDMONT,
+ INTEL_ATOM_GOLDMONT_PLUS,
+ INTEL_ATOM_SIERRAFOREST,
+ INTEL_ATOM_GRANDRIDGE,
+ INTEL_ATOM_TREMONT,
+
+ /* Bigcore processors. */
+ INTEL_BIGCORE_MEROM,
+ INTEL_BIGCORE_PENRYN,
+ INTEL_BIGCORE_DUNNINGTON,
+ INTEL_BIGCORE_NEHALEM,
+ INTEL_BIGCORE_WESTMERE,
+ INTEL_BIGCORE_SANDYBRIDGE,
+ INTEL_BIGCORE_IVYBRIDGE,
+ INTEL_BIGCORE_HASWELL,
+ INTEL_BIGCORE_BROADWELL,
+ INTEL_BIGCORE_SKYLAKE,
+ INTEL_BIGCORE_KABYLAKE,
+ INTEL_BIGCORE_COMETLAKE,
+ INTEL_BIGCORE_SKYLAKE_AVX512,
+ INTEL_BIGCORE_CANNONLAKE,
+ INTEL_BIGCORE_ICELAKE,
+ INTEL_BIGCORE_TIGERLAKE,
+ INTEL_BIGCORE_ROCKETLAKE,
+ INTEL_BIGCORE_SAPPHIRERAPIDS,
+ INTEL_BIGCORE_RAPTORLAKE,
+ INTEL_BIGCORE_EMERALDRAPIDS,
+ INTEL_BIGCORE_METEORLAKE,
+ INTEL_BIGCORE_LUNARLAKE,
+ INTEL_BIGCORE_ARROWLAKE,
+ INTEL_BIGCORE_GRANITERAPIDS,
+
+ /* Mixed (bigcore + atom SOC). */
+ INTEL_MIXED_LAKEFIELD,
+ INTEL_MIXED_ALDERLAKE,
+
+ /* KNL. */
+ INTEL_KNIGHTS_MILL,
+ INTEL_KNIGHTS_LANDING,
+
+ /* Unknown. */
+ INTEL_UNKNOWN,
+};
+
+static unsigned int
+intel_get_fam6_microarch (unsigned int model,
+ __attribute__ ((unused)) unsigned int stepping)
+{
+ switch (model)
+ {
+ case 0x1C:
+ case 0x26:
+ return INTEL_ATOM_BONNELL;
+ case 0x27:
+ case 0x35:
+ case 0x36:
+ /* Really Saltwell, but Saltwell is just a die shrink of Bonnell
+ (microarchitecturally identical). */
+ return INTEL_ATOM_BONNELL;
+ case 0x37:
+ case 0x4A:
+ case 0x4D:
+ case 0x5D:
+ return INTEL_ATOM_SILVERMONT;
+ case 0x4C:
+ case 0x5A:
+ case 0x75:
+ return INTEL_ATOM_AIRMONT;
+ case 0x5C:
+ case 0x5F:
+ return INTEL_ATOM_GOLDMONT;
+ case 0x7A:
+ return INTEL_ATOM_GOLDMONT_PLUS;
+ case 0xAF:
+ return INTEL_ATOM_SIERRAFOREST;
+ case 0xB6:
+ return INTEL_ATOM_GRANDRIDGE;
+ case 0x86:
+ case 0x96:
+ case 0x9C:
+ return INTEL_ATOM_TREMONT;
+ case 0x0F:
+ case 0x16:
+ return INTEL_BIGCORE_MEROM;
+ case 0x17:
+ return INTEL_BIGCORE_PENRYN;
+ case 0x1D:
+ return INTEL_BIGCORE_DUNNINGTON;
+ case 0x1A:
+ case 0x1E:
+ case 0x1F:
+ case 0x2E:
+ return INTEL_BIGCORE_NEHALEM;
+ case 0x25:
+ case 0x2C:
+ case 0x2F:
+ return INTEL_BIGCORE_WESTMERE;
+ case 0x2A:
+ case 0x2D:
+ return INTEL_BIGCORE_SANDYBRIDGE;
+ case 0x3A:
+ case 0x3E:
+ return INTEL_BIGCORE_IVYBRIDGE;
+ case 0x3C:
+ case 0x3F:
+ case 0x45:
+ case 0x46:
+ return INTEL_BIGCORE_HASWELL;
+ case 0x3D:
+ case 0x47:
+ case 0x4F:
+ case 0x56:
+ return INTEL_BIGCORE_BROADWELL;
+ case 0x4E:
+ case 0x5E:
+ return INTEL_BIGCORE_SKYLAKE;
+ case 0x8E:
+ /*
+ Stepping = {9}
+ -> Amberlake
+ Stepping = {10}
+ -> Coffeelake
+ Stepping = {11, 12}
+ -> Whiskeylake
+ else
+ -> Kabylake
+
+ All of these are derivatives of Kabylake (Skylake client).
+ */
+ return INTEL_BIGCORE_KABYLAKE;
+ case 0x9E:
+ /*
+ Stepping = {10, 11, 12, 13}
+ -> Coffeelake
+ else
+ -> Kabylake
+
+ Coffeelake is a derivatives of Kabylake (Skylake client).
+ */
+ return INTEL_BIGCORE_KABYLAKE;
+ case 0xA5:
+ case 0xA6:
+ return INTEL_BIGCORE_COMETLAKE;
+ case 0x66:
+ return INTEL_BIGCORE_CANNONLAKE;
+ case 0x55:
+ /*
+ Stepping = {6, 7}
+ -> Cascadelake
+ Stepping = {11}
+ -> Cooperlake
+ else
+ -> Skylake-avx512
+
+ These are all microarchitecturally indentical, so use
+ Skylake-avx512 for all of them.
+ */
+ return INTEL_BIGCORE_SKYLAKE_AVX512;
+ case 0x6A:
+ case 0x6C:
+ case 0x7D:
+ case 0x7E:
+ case 0x9D:
+ return INTEL_BIGCORE_ICELAKE;
+ case 0x8C:
+ case 0x8D:
+ return INTEL_BIGCORE_TIGERLAKE;
+ case 0xA7:
+ return INTEL_BIGCORE_ROCKETLAKE;
+ case 0x8F:
+ return INTEL_BIGCORE_SAPPHIRERAPIDS;
+ case 0xB7:
+ case 0xBA:
+ case 0xBF:
+ return INTEL_BIGCORE_RAPTORLAKE;
+ case 0xCF:
+ return INTEL_BIGCORE_EMERALDRAPIDS;
+ case 0xAA:
+ case 0xAC:
+ return INTEL_BIGCORE_METEORLAKE;
+ case 0xbd:
+ return INTEL_BIGCORE_LUNARLAKE;
+ case 0xc6:
+ return INTEL_BIGCORE_ARROWLAKE;
+ case 0xAD:
+ case 0xAE:
+ return INTEL_BIGCORE_GRANITERAPIDS;
+ case 0x8A:
+ return INTEL_MIXED_LAKEFIELD;
+ case 0x97:
+ case 0x9A:
+ case 0xBE:
+ return INTEL_MIXED_ALDERLAKE;
+ case 0x85:
+ return INTEL_KNIGHTS_MILL;
+ case 0x57:
+ return INTEL_KNIGHTS_LANDING;
+ default:
+ return INTEL_UNKNOWN;
+ }
+}
+
static inline void
init_cpu_features (struct cpu_features *cpu_features)
{
@@ -453,129 +663,147 @@ init_cpu_features (struct cpu_features *cpu_features)
if (family == 0x06)
{
model += extended_model;
- switch (model)
+ unsigned int microarch
+ = intel_get_fam6_microarch (model, stepping);
+
+ switch (microarch)
{
- case 0x1c:
- case 0x26:
- /* BSF is slow on Atom. */
+ /* Atom / KNL tuning. */
+ case INTEL_ATOM_BONNELL:
+ /* BSF is slow on Bonnell. */
cpu_features->preferred[index_arch_Slow_BSF]
- |= bit_arch_Slow_BSF;
+ |= bit_arch_Slow_BSF;
break;
- case 0x57:
- /* Knights Landing. Enable Silvermont optimizations. */
-
- case 0x7a:
- /* Unaligned load versions are faster than SSSE3
- on Goldmont Plus. */
-
- case 0x5c:
- case 0x5f:
/* Unaligned load versions are faster than SSSE3
- on Goldmont. */
+ on Airmont, Silvermont, Goldmont, and Goldmont Plus. */
+ case INTEL_ATOM_AIRMONT:
+ case INTEL_ATOM_SILVERMONT:
+ case INTEL_ATOM_GOLDMONT:
+ case INTEL_ATOM_GOLDMONT_PLUS:
- case 0x4c:
- case 0x5a:
- case 0x75:
- /* Airmont is a die shrink of Silvermont. */
+ /* Knights Landing. Enable Silvermont optimizations. */
+ case INTEL_KNIGHTS_LANDING:
- case 0x37:
- case 0x4a:
- case 0x4d:
- case 0x5d:
- /* Unaligned load versions are faster than SSSE3
- on Silvermont. */
cpu_features->preferred[index_arch_Fast_Unaligned_Load]
- |= (bit_arch_Fast_Unaligned_Load
- | bit_arch_Fast_Unaligned_Copy
- | bit_arch_Prefer_PMINUB_for_stringop
- | bit_arch_Slow_SSE4_2);
+ |= (bit_arch_Fast_Unaligned_Load
+ | bit_arch_Fast_Unaligned_Copy
+ | bit_arch_Prefer_PMINUB_for_stringop
+ | bit_arch_Slow_SSE4_2);
break;
- case 0x86:
- case 0x96:
- case 0x9c:
+ case INTEL_ATOM_TREMONT:
/* Enable rep string instructions, unaligned load, unaligned
- copy, pminub and avoid SSE 4.2 on Tremont. */
+ copy, pminub and avoid SSE 4.2 on Tremont. */
cpu_features->preferred[index_arch_Fast_Rep_String]
- |= (bit_arch_Fast_Rep_String
- | bit_arch_Fast_Unaligned_Load
- | bit_arch_Fast_Unaligned_Copy
- | bit_arch_Prefer_PMINUB_for_stringop
- | bit_arch_Slow_SSE4_2);
+ |= (bit_arch_Fast_Rep_String
+ | bit_arch_Fast_Unaligned_Load
+ | bit_arch_Fast_Unaligned_Copy
+ | bit_arch_Prefer_PMINUB_for_stringop
+ | bit_arch_Slow_SSE4_2);
break;
+ /*
+ Default tuned Knights microarch.
+ case INTEL_KNIGHTS_MILL:
+ */
+
+ /*
+ Default tuned atom microarch.
+ case INTEL_ATOM_SIERRAFOREST:
+ case INTEL_ATOM_GRANDRIDGE:
+ */
+
+ /* Bigcore/Default Tuning. */
default:
/* Unknown family 0x06 processors. Assuming this is one
of Core i3/i5/i7 processors if AVX is available. */
if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
break;
/* Fall through. */
-
- case 0x1a:
- case 0x1e:
- case 0x1f:
- case 0x25:
- case 0x2c:
- case 0x2e:
- case 0x2f:
+ case INTEL_BIGCORE_NEHALEM:
+ case INTEL_BIGCORE_WESTMERE:
/* Rep string instructions, unaligned load, unaligned copy,
and pminub are fast on Intel Core i3, i5 and i7. */
cpu_features->preferred[index_arch_Fast_Rep_String]
- |= (bit_arch_Fast_Rep_String
- | bit_arch_Fast_Unaligned_Load
- | bit_arch_Fast_Unaligned_Copy
- | bit_arch_Prefer_PMINUB_for_stringop);
+ |= (bit_arch_Fast_Rep_String
+ | bit_arch_Fast_Unaligned_Load
+ | bit_arch_Fast_Unaligned_Copy
+ | bit_arch_Prefer_PMINUB_for_stringop);
break;
+
+ /*
+ Default tuned Bigcore microarch.
+ case INTEL_BIGCORE_SANDYBRIDGE:
+ case INTEL_BIGCORE_IVYBRIDGE:
+ case INTEL_BIGCORE_HASWELL:
+ case INTEL_BIGCORE_BROADWELL:
+ case INTEL_BIGCORE_SKYLAKE:
+ case INTEL_BIGCORE_KABYLAKE:
+ case INTEL_BIGCORE_COMETLAKE:
+ case INTEL_BIGCORE_SKYLAKE_AVX512:
+ case INTEL_BIGCORE_CANNONLAKE:
+ case INTEL_BIGCORE_ICELAKE:
+ case INTEL_BIGCORE_TIGERLAKE:
+ case INTEL_BIGCORE_ROCKETLAKE:
+ case INTEL_BIGCORE_RAPTORLAKE:
+ case INTEL_BIGCORE_METEORLAKE:
+ case INTEL_BIGCORE_LUNARLAKE:
+ case INTEL_BIGCORE_ARROWLAKE:
+ case INTEL_BIGCORE_SAPPHIRERAPIDS:
+ case INTEL_BIGCORE_EMERALDRAPIDS:
+ case INTEL_BIGCORE_GRANITERAPIDS:
+ */
+
+ /*
+ Default tuned Mixed (bigcore + atom SOC).
+ case INTEL_MIXED_LAKEFIELD:
+ case INTEL_MIXED_ALDERLAKE:
+ */
}
- /* Disable TSX on some processors to avoid TSX on kernels that
- weren't updated with the latest microcode package (which
- disables broken feature by default). */
- switch (model)
+ /* Disable TSX on some processors to avoid TSX on kernels that
+ weren't updated with the latest microcode package (which
+ disables broken feature by default). */
+ switch (microarch)
{
- case 0x55:
+ case INTEL_BIGCORE_SKYLAKE_AVX512:
+ /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */
if (stepping <= 5)
goto disable_tsx;
break;
- case 0x8e:
- /* NB: Although the errata documents that for model == 0x8e,
- only 0xb stepping or lower are impacted, the intention of
- the errata was to disable TSX on all client processors on
- all steppings. Include 0xc stepping which is an Intel
- Core i7-8665U, a client mobile processor. */
- case 0x9e:
+
+ case INTEL_BIGCORE_KABYLAKE:
+ /* NB: Although the errata documents that for model == 0x8e
+ (kabylake skylake client), only 0xb stepping or lower are
+ impacted, the intention of the errata was to disable TSX on
+ all client processors on all steppings. Include 0xc
+ stepping which is an Intel Core i7-8665U, a client mobile
+ processor. */
if (stepping > 0xc)
break;
/* Fall through. */
- case 0x4e:
- case 0x5e:
- {
+ case INTEL_BIGCORE_SKYLAKE:
/* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
processors listed in:
https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
*/
-disable_tsx:
+ disable_tsx:
CPU_FEATURE_UNSET (cpu_features, HLE);
CPU_FEATURE_UNSET (cpu_features, RTM);
CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
- }
- break;
- case 0x3f:
- /* Xeon E7 v3 with stepping >= 4 has working TSX. */
- if (stepping >= 4)
break;
- /* Fall through. */
- case 0x3c:
- case 0x45:
- case 0x46:
- /* Disable Intel TSX on Haswell processors (except Xeon E7 v3
- with stepping >= 4) to avoid TSX on kernels that weren't
- updated with the latest microcode package (which disables
- broken feature by default). */
- CPU_FEATURE_UNSET (cpu_features, RTM);
- break;
+
+ case INTEL_BIGCORE_HASWELL:
+ /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working
+ TSX. Haswell also include other model numbers that have
+ working TSX. */
+ if (model == 0x3f && stepping >= 4)
+ break;
+
+ CPU_FEATURE_UNSET (cpu_features, RTM);
+ break;
}
}
--
2.39.3

@ -0,0 +1,178 @@
From 180897c161a171d8ef0faee1c6c9fd6b57d8b13b Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Wed, 7 Jun 2023 13:18:03 -0500
Subject: [PATCH] x86: Make the divisor in setting `non_temporal_threshold` cpu
specific
Content-type: text/plain; charset=UTF-8
Different systems prefer a different divisors.
From benchmarks[1] so far the following divisors have been found:
ICX : 2
SKX : 2
BWD : 8
For Intel, we are generalizing that BWD and older prefers 8 as a
divisor, and SKL and newer prefers 2. This number can be further tuned
as benchmarks are run.
[1]: https://github.com/goldsteinn/memcpy-nt-benchmarks
Reviewed-by: DJ Delorie <dj@redhat.com>
---
sysdeps/x86/cpu-features.c | 31 ++++++++++++++++++++---------
sysdeps/x86/dl-cacheinfo.h | 32 ++++++++++++++++++------------
sysdeps/x86/dl-diagnostics-cpu.c | 11 ++++++----
sysdeps/x86/include/cpu-features.h | 3 +++
4 files changed, 51 insertions(+), 26 deletions(-)
[DJ - edited for ABI compatibility]
diff -rup a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
--- a/sysdeps/x86/cpu-features.c 2023-07-26 17:56:19.679300711 -0400
+++ b/sysdeps/x86/cpu-features.c 2023-07-28 15:27:00.336324265 -0400
@@ -35,6 +35,9 @@ extern void TUNABLE_CALLBACK (set_x86_sh
# endif
#endif
+unsigned long int __rtld_global_ro_cachesize_non_temporal_divisor
+ attribute_hidden;
+
#if CET_ENABLED
# include <dl-cet.h>
#endif
@@ -614,6 +617,7 @@ init_cpu_features (struct cpu_features *
unsigned int stepping = 0;
enum cpu_features_kind kind;
+ __rtld_global_ro_cachesize_non_temporal_divisor = 4;
#if !HAS_CPUID
if (__get_cpuid_max (0, 0) == 0)
{
@@ -694,13 +698,13 @@ init_cpu_features (struct cpu_features *
/* Bigcore/Default Tuning. */
default:
+ default_tuning:
/* Unknown family 0x06 processors. Assuming this is one
of Core i3/i5/i7 processors if AVX is available. */
if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
break;
- /* Fall through. */
- case INTEL_BIGCORE_NEHALEM:
- case INTEL_BIGCORE_WESTMERE:
+
+ enable_modern_features:
/* Rep string instructions, unaligned load, unaligned copy,
and pminub are fast on Intel Core i3, i5 and i7. */
cpu_features->preferred[index_arch_Fast_Rep_String]
@@ -710,12 +714,23 @@ init_cpu_features (struct cpu_features *
| bit_arch_Prefer_PMINUB_for_stringop);
break;
- /*
- Default tuned Bigcore microarch.
+ case INTEL_BIGCORE_NEHALEM:
+ case INTEL_BIGCORE_WESTMERE:
+ /* Older CPUs prefer non-temporal stores at lower threshold. */
+ __rtld_global_ro_cachesize_non_temporal_divisor = 8;
+ goto enable_modern_features;
+
+ /* Older Bigcore microarch (smaller non-temporal store
+ threshold). */
case INTEL_BIGCORE_SANDYBRIDGE:
case INTEL_BIGCORE_IVYBRIDGE:
case INTEL_BIGCORE_HASWELL:
case INTEL_BIGCORE_BROADWELL:
+ __rtld_global_ro_cachesize_non_temporal_divisor = 8;
+ goto default_tuning;
+
+ /* Newer Bigcore microarch (larger non-temporal store
+ threshold). */
case INTEL_BIGCORE_SKYLAKE:
case INTEL_BIGCORE_KABYLAKE:
case INTEL_BIGCORE_COMETLAKE:
@@ -731,13 +746,14 @@ init_cpu_features (struct cpu_features *
case INTEL_BIGCORE_SAPPHIRERAPIDS:
case INTEL_BIGCORE_EMERALDRAPIDS:
case INTEL_BIGCORE_GRANITERAPIDS:
- */
+ __rtld_global_ro_cachesize_non_temporal_divisor = 2;
+ goto default_tuning;
- /*
- Default tuned Mixed (bigcore + atom SOC).
+ /* Default tuned Mixed (bigcore + atom SOC). */
case INTEL_MIXED_LAKEFIELD:
case INTEL_MIXED_ALDERLAKE:
- */
+ __rtld_global_ro_cachesize_non_temporal_divisor = 2;
+ goto default_tuning;
}
/* Disable TSX on some processors to avoid TSX on kernels that
diff -rup a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
--- a/sysdeps/x86/dl-cacheinfo.h 2023-07-26 17:56:18.662261475 -0400
+++ b/sysdeps/x86/dl-cacheinfo.h 2023-07-26 17:56:20.756342261 -0400
@@ -744,19 +744,25 @@ dl_init_cacheinfo (struct cpu_features *
cpu_features->level3_cache_linesize = level3_cache_linesize;
cpu_features->level4_cache_size = level4_cache_size;
- /* The default setting for the non_temporal threshold is 1/4 of size
- of the chip's cache. For most Intel and AMD processors with an
- initial release date between 2017 and 2023, a thread's typical
- share of the cache is from 18-64MB. Using the 1/4 L3 is meant to
- estimate the point where non-temporal stores begin out-competing
- REP MOVSB. As well the point where the fact that non-temporal
- stores are forced back to main memory would already occurred to the
- majority of the lines in the copy. Note, concerns about the
- entire L3 cache being evicted by the copy are mostly alleviated
- by the fact that modern HW detects streaming patterns and
- provides proper LRU hints so that the maximum thrashing
- capped at 1/associativity. */
- unsigned long int non_temporal_threshold = shared / 4;
+ unsigned long int cachesize_non_temporal_divisor
+ = __rtld_global_ro_cachesize_non_temporal_divisor;
+ if (cachesize_non_temporal_divisor <= 0)
+ cachesize_non_temporal_divisor = 4;
+
+ /* The default setting for the non_temporal threshold is [1/8, 1/2] of size
+ of the chip's cache (depending on `cachesize_non_temporal_divisor` which
+ is microarch specific. The defeault is 1/4). For most Intel and AMD
+ processors with an initial release date between 2017 and 2023, a thread's
+ typical share of the cache is from 18-64MB. Using a reasonable size
+ fraction of L3 is meant to estimate the point where non-temporal stores
+ begin out-competing REP MOVSB. As well the point where the fact that
+ non-temporal stores are forced back to main memory would already occurred
+ to the majority of the lines in the copy. Note, concerns about the entire
+ L3 cache being evicted by the copy are mostly alleviated by the fact that
+ modern HW detects streaming patterns and provides proper LRU hints so that
+ the maximum thrashing capped at 1/associativity. */
+ unsigned long int non_temporal_threshold
+ = shared / cachesize_non_temporal_divisor;
/* If no ERMS, we use the per-thread L3 chunking. Normal cacheable stores run
a higher risk of actually thrashing the cache as they don't have a HW LRU
hint. As well, their performance in highly parallel situations is
diff -rup a/sysdeps/x86/dl-diagnostics-cpu.c b/sysdeps/x86/dl-diagnostics-cpu.c
--- a/sysdeps/x86/dl-diagnostics-cpu.c 2021-08-01 21:33:43.000000000 -0400
+++ b/sysdeps/x86/dl-diagnostics-cpu.c 2023-07-26 17:56:20.761342454 -0400
@@ -117,4 +117,6 @@ _dl_diagnostics_cpu (void)
+ sizeof (cpu_features->level4_cache_size)
== sizeof (*cpu_features),
"last cpu_features field has been printed");
+ print_cpu_features_value ("cachesize_non_temporal_divisor",
+ __rtld_global_ro_cachesize_non_temporal_divisor);
}
diff -rup a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
--- a/sysdeps/x86/include/cpu-features.h 2021-08-01 21:33:43.000000000 -0400
+++ b/sysdeps/x86/include/cpu-features.h 2023-07-27 13:51:52.081494751 -0400
@@ -919,6 +919,10 @@ struct cpu_features
unsigned long int level4_cache_size;
};
+/* When no user non_temporal_threshold is specified. We default to
+ cachesize / cachesize_non_temporal_divisor. */
+extern unsigned long int __rtld_global_ro_cachesize_non_temporal_divisor;
+
/* Get a pointer to the CPU features structure. */
extern const struct cpu_features *_dl_x86_get_cpu_features (void)
__attribute__ ((pure));

@ -0,0 +1,49 @@
From 47f747217811db35854ea06741be3685e8bbd44d Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Mon, 17 Jul 2023 23:14:33 -0500
Subject: [PATCH] x86: Fix slight bug in `shared_per_thread` cache size
calculation.
Content-type: text/plain; charset=UTF-8
After:
```
commit af992e7abdc9049714da76cae1e5e18bc4838fb8
Author: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Wed Jun 7 13:18:01 2023 -0500
x86: Increase `non_temporal_threshold` to roughly `sizeof_L3 / 4`
```
Split `shared` (cumulative cache size) from `shared_per_thread` (cache
size per socket), the `shared_per_thread` *can* be slightly off from
the previous calculation.
Previously we added `core` even if `threads_l2` was invalid, and only
used `threads_l2` to divide `core` if it was present. The changed
version only included `core` if `threads_l2` was valid.
This change restores the old behavior if `threads_l2` is invalid by
adding the entire value of `core`.
Reviewed-by: DJ Delorie <dj@redhat.com>
---
sysdeps/x86/dl-cacheinfo.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index c98fa57a7b..43be2c1229 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -614,8 +614,8 @@ get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, u
/* Account for non-inclusive L2 and L3 caches. */
if (!inclusive_cache)
{
- if (threads_l2 > 0)
- shared_per_thread += core / threads_l2;
+ long int core_per_thread = threads_l2 > 0 ? (core / threads_l2) : core;
+ shared_per_thread += core_per_thread;
shared += core;
}
--
2.39.3

@ -0,0 +1,55 @@
From 8b9a0af8ca012217bf90d1dc0694f85b49ae09da Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Tue, 18 Jul 2023 10:27:59 -0500
Subject: [PATCH] [PATCH v1] x86: Use `3/4*sizeof(per-thread-L3)` as low bound
for NT threshold.
Content-type: text/plain; charset=UTF-8
On some machines we end up with incomplete cache information. This can
make the new calculation of `sizeof(total-L3)/custom-divisor` end up
lower than intended (and lower than the prior value). So reintroduce
the old bound as a lower bound to avoid potentially regressing code
where we don't have complete information to make the decision.
Reviewed-by: DJ Delorie <dj@redhat.com>
---
sysdeps/x86/dl-cacheinfo.h | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
[diff rebased by DJ]
diff -rup a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
--- a/sysdeps/x86/dl-cacheinfo.h 2023-07-25 00:38:43.343986368 -0400
+++ b/sysdeps/x86/dl-cacheinfo.h 2023-07-25 00:38:44.336025100 -0400
@@ -751,8 +751,8 @@ dl_init_cacheinfo (struct cpu_features *
/* The default setting for the non_temporal threshold is [1/8, 1/2] of size
of the chip's cache (depending on `cachesize_non_temporal_divisor` which
- is microarch specific. The defeault is 1/4). For most Intel and AMD
- processors with an initial release date between 2017 and 2023, a thread's
+ is microarch specific. The default is 1/4). For most Intel processors
+ with an initial release date between 2017 and 2023, a thread's
typical share of the cache is from 18-64MB. Using a reasonable size
fraction of L3 is meant to estimate the point where non-temporal stores
begin out-competing REP MOVSB. As well the point where the fact that
@@ -763,12 +763,21 @@ dl_init_cacheinfo (struct cpu_features *
the maximum thrashing capped at 1/associativity. */
unsigned long int non_temporal_threshold
= shared / cachesize_non_temporal_divisor;
+
+ /* If the computed non_temporal_threshold <= 3/4 * per-thread L3, we most
+ likely have incorrect/incomplete cache info in which case, default to
+ 3/4 * per-thread L3 to avoid regressions. */
+ unsigned long int non_temporal_threshold_lowbound
+ = shared_per_thread * 3 / 4;
+ if (non_temporal_threshold < non_temporal_threshold_lowbound)
+ non_temporal_threshold = non_temporal_threshold_lowbound;
+
/* If no ERMS, we use the per-thread L3 chunking. Normal cacheable stores run
a higher risk of actually thrashing the cache as they don't have a HW LRU
hint. As well, their performance in highly parallel situations is
noticeably worse. */
if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS))
- non_temporal_threshold = shared_per_thread * 3 / 4;
+ non_temporal_threshold = non_temporal_threshold_lowbound;
/* SIZE_MAX >> 4 because memmove-vec-unaligned-erms right-shifts the value of
'x86_non_temporal_threshold' by `LOG_4X_MEMCPY_THRESH` (4) and it is best
if that operation cannot overflow. Minimum of 0x4040 (16448) because the

@ -0,0 +1,37 @@
commit 885a7f0feee951f514a121788f46f33b2867110f
Author: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Fri Aug 11 12:29:11 2023 -0500
x86: Fix incorrect scope of setting `shared_per_thread` [BZ# 30745]
The:
```
if (shared_per_thread > 0 && threads > 0)
shared_per_thread /= threads;
```
Code was accidentally moved to inside the else scope. This doesn't
match how it was previously (before af992e7abd).
This patch fixes that by putting the division after the `else` block.
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index aed1a7be56610e99..f950e488cfbe42dd 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -770,11 +770,10 @@ get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, u
level. */
threads = ((cpu_features->features[CPUID_INDEX_1].cpuid.ebx >> 16)
& 0xff);
-
- /* Get per-thread size of highest level cache. */
- if (shared_per_thread > 0 && threads > 0)
- shared_per_thread /= threads;
}
+ /* Get per-thread size of highest level cache. */
+ if (shared_per_thread > 0 && threads > 0)
+ shared_per_thread /= threads;
}
/* Account for non-inclusive L2 and L3 caches. */

@ -0,0 +1,27 @@
commit abcf8db7fa46b73fd5b8193ce11f9312301b84c7
Author: Andreas Schwab <schwab@suse.de>
Date: Wed Jun 7 11:21:48 2023 +0200
resolv_conf: release lock on allocation failure (bug 30527)
When the initial allocation of global fails, the local lock is left
locked.
Reported by Steffen Lammel of SAP HANA development.
diff --git a/resolv/resolv_conf.c b/resolv/resolv_conf.c
index 9010bf0ea4d84557..aab79e9f4e323df9 100644
--- a/resolv/resolv_conf.c
+++ b/resolv/resolv_conf.c
@@ -93,7 +93,10 @@ get_locked_global (void)
{
global_copy = calloc (1, sizeof (*global));
if (global_copy == NULL)
- return NULL;
+ {
+ __libc_lock_unlock (lock);
+ return NULL;
+ }
atomic_store_relaxed (&global, global_copy);
resolv_conf_array_init (&global_copy->array);
}

@ -0,0 +1,137 @@
commit 1d44530a5be2442e064baa48139adc9fdfb1fc6b
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Jun 15 12:08:22 2023 +0200
string: strerror must not return NULL (bug 30555)
For strerror, this fixes commit 28aff047818eb1726394296d27b ("string:
Implement strerror in terms of strerror_l"). This commit avoids
returning NULL for strerror_l as well, although POSIX allows this
behavior for strerror_l.
Reviewed-by: Arjun Shankar <arjun@redhat.com>
Conflicts:
string/Makefile
(usual test differences)
diff --git a/string/Makefile b/string/Makefile
index f0fce2a0b8dea752..a385c8fdfed330b2 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -63,7 +63,7 @@ tests := tester inl-tester noinl-tester testcopy test-ffs \
tst-strtok_r bug-strcoll2 tst-cmp tst-xbzero-opt \
test-endian-types test-endian-file-scope \
test-endian-sign-conversion tst-memmove-overflow \
- test-sig_np
+ test-sig_np tst-strerror-fail
# Both tests require the .mo translation files generated by msgfmt.
tests-translation := tst-strsignal \
diff --git a/string/strerror_l.c b/string/strerror_l.c
index a381c79c5a0ba2a1..20aa3efe4714faee 100644
--- a/string/strerror_l.c
+++ b/string/strerror_l.c
@@ -43,10 +43,15 @@ __strerror_l (int errnum, locale_t loc)
struct tls_internal_t *tls_internal = __glibc_tls_internal ();
free (tls_internal->strerror_l_buf);
if (__asprintf (&tls_internal->strerror_l_buf, "%s%d",
- translate ("Unknown error ", loc), errnum) == -1)
- tls_internal->strerror_l_buf = NULL;
-
- err = tls_internal->strerror_l_buf;
+ translate ("Unknown error ", loc), errnum) > 0)
+ err = tls_internal->strerror_l_buf;
+ else
+ {
+ /* The memory was freed above. */
+ tls_internal->strerror_l_buf = NULL;
+ /* Provide a fallback translation. */
+ err = (char *) translate ("Unknown error", loc);
+ }
}
else
err = (char *) translate (err, loc);
diff --git a/string/tst-strerror-fail.c b/string/tst-strerror-fail.c
new file mode 100644
index 0000000000000000..e0fa45ab2b12f6b3
--- /dev/null
+++ b/string/tst-strerror-fail.c
@@ -0,0 +1,77 @@
+/* Check that strerror, strerror_l do not return NULL on failure (bug 30555).
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#include <locale.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/namespace.h>
+#include <support/xdlfcn.h>
+
+/* Interposed malloc that can be used to inject allocation failures. */
+
+static volatile bool fail_malloc;
+
+void *
+malloc (size_t size)
+{
+ if (fail_malloc)
+ return NULL;
+
+ static void *(*original_malloc) (size_t);
+ if (original_malloc == NULL)
+ original_malloc = xdlsym (RTLD_NEXT, "malloc");
+ return original_malloc (size);
+}
+
+/* Callbacks for the actual tests. Use fork to run both tests with a
+ clean state. */
+
+static void
+test_strerror (void *closure)
+{
+ fail_malloc = true;
+ const char *s = strerror (999);
+ fail_malloc = false;
+ TEST_COMPARE_STRING (s, "Unknown error");
+}
+
+static void
+test_strerror_l (void *closure)
+{
+ locale_t loc = newlocale (LC_ALL, "C", (locale_t) 0);
+ TEST_VERIFY (loc != (locale_t) 0);
+ fail_malloc = true;
+ const char *s = strerror_l (999, loc);
+ fail_malloc = false;
+ TEST_COMPARE_STRING (s, "Unknown error");
+ freelocale (loc);
+}
+
+static int
+do_test (void)
+{
+ support_isolate_in_subprocess (test_strerror, NULL);
+ support_isolate_in_subprocess (test_strerror_l, NULL);
+
+ return 0;
+}
+
+#include <support/test-driver.c>

@ -0,0 +1,147 @@
commit d653fd2d9ebe23c2b16b76edf717c5dbd5ce9b77
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Mar 10 08:50:51 2022 +0100
malloc: Exit early on test failure in tst-realloc
This addresses more (correct) use-after-free warnings reported by
GCC 12 on some targets.
Fixes commit c094c232eb3246154265bb035182f92fe1b17ab8 ("Avoid
-Wuse-after-free in tests [BZ #26779].").
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
diff --git a/malloc/tst-realloc.c b/malloc/tst-realloc.c
index 80711beab1257ed5..e985b9d565086257 100644
--- a/malloc/tst-realloc.c
+++ b/malloc/tst-realloc.c
@@ -20,15 +20,7 @@
#include <stdio.h>
#include <string.h>
#include <libc-diag.h>
-
-static int errors = 0;
-
-static void
-merror (const char *msg)
-{
- ++errors;
- printf ("Error: %s\n", msg);
-}
+#include <support/check.h>
static int
do_test (void)
@@ -51,11 +43,11 @@ do_test (void)
save = errno;
if (p != NULL)
- merror ("realloc (NULL, -1) succeeded.");
+ FAIL_EXIT1 ("realloc (NULL, -1) succeeded.");
/* errno should be set to ENOMEM on failure (POSIX). */
if (p == NULL && save != ENOMEM)
- merror ("errno is not set correctly");
+ FAIL_EXIT1 ("errno is not set correctly");
errno = 0;
@@ -64,18 +56,18 @@ do_test (void)
save = errno;
if (p == NULL)
- merror ("realloc (NULL, 10) failed.");
+ FAIL_EXIT1 ("realloc (NULL, 10) failed.");
free (p);
p = calloc (20, 1);
if (p == NULL)
- merror ("calloc (20, 1) failed.");
+ FAIL_EXIT1 ("calloc (20, 1) failed.");
/* Check increasing size preserves contents (C89). */
p = realloc (p, 200);
if (p == NULL)
- merror ("realloc (p, 200) failed.");
+ FAIL_EXIT1 ("realloc (p, 200) failed.");
c = p;
ok = 1;
@@ -87,20 +79,20 @@ do_test (void)
}
if (ok == 0)
- merror ("first 20 bytes were not cleared");
+ FAIL_EXIT1 ("first 20 bytes were not cleared");
free (p);
p = realloc (NULL, 100);
if (p == NULL)
- merror ("realloc (NULL, 100) failed.");
+ FAIL_EXIT1 ("realloc (NULL, 100) failed.");
memset (p, 0xff, 100);
/* Check decreasing size preserves contents (C89). */
p = realloc (p, 16);
if (p == NULL)
- merror ("realloc (p, 16) failed.");
+ FAIL_EXIT1 ("realloc (p, 16) failed.");
c = p;
ok = 1;
@@ -112,7 +104,7 @@ do_test (void)
}
if (ok == 0)
- merror ("first 16 bytes were not correct");
+ FAIL_EXIT1 ("first 16 bytes were not correct");
/* Check failed realloc leaves original untouched (C89). */
DIAG_PUSH_NEEDS_COMMENT;
@@ -124,7 +116,7 @@ do_test (void)
c = realloc (p, -1);
DIAG_POP_NEEDS_COMMENT;
if (c != NULL)
- merror ("realloc (p, -1) succeeded.");
+ FAIL_EXIT1 ("realloc (p, -1) succeeded.");
c = p;
ok = 1;
@@ -136,29 +128,21 @@ do_test (void)
}
if (ok == 0)
- merror ("first 16 bytes were not correct after failed realloc");
+ FAIL_EXIT1 ("first 16 bytes were not correct after failed realloc");
-#if __GNUC_PREREQ (12, 0)
- /* Ignore a valid warning about using a pointer made indeterminate
- by a prior call to realloc(). */
- DIAG_IGNORE_NEEDS_COMMENT (12, "-Wuse-after-free");
-#endif
/* realloc (p, 0) frees p (C89) and returns NULL (glibc). */
p = realloc (p, 0);
-#if __GNUC_PREREQ (12, 0)
- DIAG_POP_NEEDS_COMMENT;
-#endif
if (p != NULL)
- merror ("realloc (p, 0) returned non-NULL.");
+ FAIL_EXIT1 ("realloc (p, 0) returned non-NULL.");
/* realloc (NULL, 0) acts like malloc (0) (glibc). */
p = realloc (NULL, 0);
if (p == NULL)
- merror ("realloc (NULL, 0) returned NULL.");
+ FAIL_EXIT1 ("realloc (NULL, 0) returned NULL.");
free (p);
- return errors != 0;
+ return 0;
}
#define TEST_FUNCTION do_test ()

@ -0,0 +1,382 @@
commit 3e5760fcb48528d48deeb60cb885a97bb731160c
Author: Joseph Myers <joseph@codesourcery.com>
Date: Wed Sep 28 20:09:34 2022 +0000
Update _FloatN header support for C++ in GCC 13
GCC 13 adds support for _FloatN and _FloatNx types in C++, so breaking
the installed glibc headers that assume such support is not present.
GCC mostly works around this with fixincludes, but that doesn't help
for building glibc and its tests (glibc doesn't itself contain C++
code, but there's C++ code built for tests). Update glibc's
bits/floatn-common.h and bits/floatn.h headers to handle the GCC 13
support directly.
In general the changes match those made by fixincludes, though I think
the ones in sysdeps/powerpc/bits/floatn.h, where the header tests
__LDBL_MANT_DIG__ == 113 or uses #elif, wouldn't match the existing
fixincludes patterns.
Some places involving special C++ handling in relation to _FloatN
support are not changed. There's no need to change the
__HAVE_FLOATN_NOT_TYPEDEF definition (also in a form that wouldn't be
matched by the fixincludes fixes) because it's only used in relation
to macro definitions using features not supported for C++
(__builtin_types_compatible_p and _Generic). And there's no need to
change the inline function overloads for issignaling, iszero and
iscanonical in C++ because cases where types have the same format but
are no longer compatible types are handled automatically by the C++
overload resolution rules.
This patch also does not change the overload handling for iseqsig, and
there I think changes *are* needed, beyond those in this patch or made
by fixincludes. The way that overload is defined, via a template
parameter to a structure type, requires overloads whenever the types
are incompatible, even if they have the same format. So I think we
need to add overloads with GCC 13 for every supported _FloatN and
_FloatNx type, rather than just having one for _Float128 when it has a
different ABI to long double as at present (but for older GCC, such
overloads must not be defined for types that end up defined as
typedefs for another type).
Tested with build-many-glibcs.py: compilers build for
aarch64-linux-gnu ia64-linux-gnu mips64-linux-gnu powerpc-linux-gnu
powerpc64le-linux-gnu x86_64-linux-gnu; glibcs build for
aarch64-linux-gnu ia64-linux-gnu i686-linux-gnu mips-linux-gnu
mips64-linux-gnu-n32 powerpc-linux-gnu powerpc64le-linux-gnu
x86_64-linux-gnu.
diff --git a/bits/floatn-common.h b/bits/floatn-common.h
index b43c9532d8b01cca..45d4555f48483fff 100644
--- a/bits/floatn-common.h
+++ b/bits/floatn-common.h
@@ -78,7 +78,7 @@
or _FloatNx types, if __HAVE_<type> is 1. The corresponding
literal suffixes exist since GCC 7, for C only. */
# if __HAVE_FLOAT16
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
/* No corresponding suffix available for this type. */
# define __f16(x) ((_Float16) x##f)
# else
@@ -87,7 +87,7 @@
# endif
# if __HAVE_FLOAT32
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
# define __f32(x) x##f
# else
# define __f32(x) x##f32
@@ -95,7 +95,7 @@
# endif
# if __HAVE_FLOAT64
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
# ifdef __NO_LONG_DOUBLE_MATH
# define __f64(x) x##l
# else
@@ -107,7 +107,7 @@
# endif
# if __HAVE_FLOAT32X
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
# define __f32x(x) x
# else
# define __f32x(x) x##f32x
@@ -115,7 +115,7 @@
# endif
# if __HAVE_FLOAT64X
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
# if __HAVE_FLOAT64X_LONG_DOUBLE
# define __f64x(x) x##l
# else
@@ -127,7 +127,7 @@
# endif
# if __HAVE_FLOAT128X
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
# error "_Float128X supported but no constant suffix"
# else
# define __f128x(x) x##f128x
@@ -136,7 +136,7 @@
/* Defined to a complex type if __HAVE_<type> is 1. */
# if __HAVE_FLOAT16
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
typedef _Complex float __cfloat16 __attribute__ ((__mode__ (__HC__)));
# define __CFLOAT16 __cfloat16
# else
@@ -145,7 +145,7 @@ typedef _Complex float __cfloat16 __attribute__ ((__mode__ (__HC__)));
# endif
# if __HAVE_FLOAT32
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
# define __CFLOAT32 _Complex float
# else
# define __CFLOAT32 _Complex _Float32
@@ -153,7 +153,7 @@ typedef _Complex float __cfloat16 __attribute__ ((__mode__ (__HC__)));
# endif
# if __HAVE_FLOAT64
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
# ifdef __NO_LONG_DOUBLE_MATH
# define __CFLOAT64 _Complex long double
# else
@@ -165,7 +165,7 @@ typedef _Complex float __cfloat16 __attribute__ ((__mode__ (__HC__)));
# endif
# if __HAVE_FLOAT32X
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
# define __CFLOAT32X _Complex double
# else
# define __CFLOAT32X _Complex _Float32x
@@ -173,7 +173,7 @@ typedef _Complex float __cfloat16 __attribute__ ((__mode__ (__HC__)));
# endif
# if __HAVE_FLOAT64X
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
# if __HAVE_FLOAT64X_LONG_DOUBLE
# define __CFLOAT64X _Complex long double
# else
@@ -185,7 +185,7 @@ typedef _Complex float __cfloat16 __attribute__ ((__mode__ (__HC__)));
# endif
# if __HAVE_FLOAT128X
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
# error "_Float128X supported but no complex type"
# else
# define __CFLOAT128X _Complex _Float128x
@@ -195,7 +195,7 @@ typedef _Complex float __cfloat16 __attribute__ ((__mode__ (__HC__)));
/* The remaining of this file provides support for older compilers. */
# if __HAVE_FLOAT16
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
typedef float _Float16 __attribute__ ((__mode__ (__HF__)));
# endif
@@ -210,7 +210,7 @@ typedef float _Float16 __attribute__ ((__mode__ (__HF__)));
# if __HAVE_FLOAT32
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
typedef float _Float32;
# endif
@@ -234,7 +234,7 @@ typedef float _Float32;
# ifdef __NO_LONG_DOUBLE_MATH
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
typedef long double _Float64;
# endif
@@ -247,7 +247,7 @@ typedef long double _Float64;
# else
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
typedef double _Float64;
# endif
@@ -264,7 +264,7 @@ typedef double _Float64;
# if __HAVE_FLOAT32X
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
typedef double _Float32x;
# endif
@@ -281,7 +281,7 @@ typedef double _Float32x;
# if __HAVE_FLOAT64X_LONG_DOUBLE
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
typedef long double _Float64x;
# endif
@@ -294,7 +294,7 @@ typedef long double _Float64x;
# else
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
typedef _Float128 _Float64x;
# endif
@@ -311,7 +311,7 @@ typedef _Float128 _Float64x;
# if __HAVE_FLOAT128X
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
# error "_Float128x supported but no type"
# endif
diff --git a/sysdeps/ia64/bits/floatn.h b/sysdeps/ia64/bits/floatn.h
index 60c5a130e12d88a1..3d493909aeebf81e 100644
--- a/sysdeps/ia64/bits/floatn.h
+++ b/sysdeps/ia64/bits/floatn.h
@@ -56,7 +56,7 @@
/* Defined to concatenate the literal suffix to be used with _Float128
types, if __HAVE_FLOAT128 is 1. */
# if __HAVE_FLOAT128
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
/* The literal suffix f128 exists only since GCC 7.0. */
# define __f128(x) x##q
# else
@@ -66,7 +66,7 @@
/* Defined to a complex binary128 type if __HAVE_FLOAT128 is 1. */
# if __HAVE_FLOAT128
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
/* Add a typedef for older GCC compilers which don't natively support
_Complex _Float128. */
typedef _Complex float __cfloat128 __attribute__ ((__mode__ (__TC__)));
@@ -80,7 +80,7 @@ typedef _Complex float __cfloat128 __attribute__ ((__mode__ (__TC__)));
# if __HAVE_FLOAT128
/* The type _Float128 exists only since GCC 7.0. */
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
typedef __float128 _Float128;
# endif
diff --git a/sysdeps/ieee754/ldbl-128/bits/floatn.h b/sysdeps/ieee754/ldbl-128/bits/floatn.h
index da50ae796f681c60..d75a3d12e890c0be 100644
--- a/sysdeps/ieee754/ldbl-128/bits/floatn.h
+++ b/sysdeps/ieee754/ldbl-128/bits/floatn.h
@@ -55,7 +55,7 @@
/* Defined to concatenate the literal suffix to be used with _Float128
types, if __HAVE_FLOAT128 is 1. */
# if __HAVE_FLOAT128
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
/* The literal suffix f128 exists only since GCC 7.0. */
# define __f128(x) x##l
# else
@@ -65,7 +65,7 @@
/* Defined to a complex binary128 type if __HAVE_FLOAT128 is 1. */
# if __HAVE_FLOAT128
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
# define __CFLOAT128 _Complex long double
# else
# define __CFLOAT128 _Complex _Float128
@@ -76,7 +76,7 @@
# if __HAVE_FLOAT128
/* The type _Float128 exists only since GCC 7.0. */
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
typedef long double _Float128;
# endif
diff --git a/sysdeps/mips/ieee754/bits/floatn.h b/sysdeps/mips/ieee754/bits/floatn.h
index b7720a2889e59e8f..fe7be983592e1e0e 100644
--- a/sysdeps/mips/ieee754/bits/floatn.h
+++ b/sysdeps/mips/ieee754/bits/floatn.h
@@ -55,7 +55,7 @@
/* Defined to concatenate the literal suffix to be used with _Float128
types, if __HAVE_FLOAT128 is 1. */
# if __HAVE_FLOAT128
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
/* The literal suffix f128 exists only since GCC 7.0. */
# define __f128(x) x##l
# else
@@ -65,7 +65,7 @@
/* Defined to a complex binary128 type if __HAVE_FLOAT128 is 1. */
# if __HAVE_FLOAT128
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
# define __CFLOAT128 _Complex long double
# else
# define __CFLOAT128 _Complex _Float128
@@ -76,7 +76,7 @@
# if __HAVE_FLOAT128
/* The type _Float128 exists only since GCC 7.0. */
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
typedef long double _Float128;
# endif
diff --git a/sysdeps/powerpc/bits/floatn.h b/sysdeps/powerpc/bits/floatn.h
index fab164e0a2907668..a5a572f646dac2bf 100644
--- a/sysdeps/powerpc/bits/floatn.h
+++ b/sysdeps/powerpc/bits/floatn.h
@@ -57,7 +57,7 @@
/* Defined to concatenate the literal suffix to be used with _Float128
types, if __HAVE_FLOAT128 is 1. */
# if __HAVE_FLOAT128
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
/* The literal suffix (f128) exist for powerpc only since GCC 7.0. */
# if __LDBL_MANT_DIG__ == 113
# define __f128(x) x##l
@@ -71,10 +71,10 @@
/* Defined to a complex binary128 type if __HAVE_FLOAT128 is 1. */
# if __HAVE_FLOAT128
-# if __LDBL_MANT_DIG__ == 113 && defined __cplusplus
+# if __LDBL_MANT_DIG__ == 113 && defined __cplusplus && !__GNUC_PREREQ (13, 0)
typedef long double _Float128;
# define __CFLOAT128 _Complex long double
-# elif !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# elif !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
/* The type _Float128 exist for powerpc only since GCC 7.0. */
typedef __float128 _Float128;
/* Add a typedef for older GCC and C++ compilers which don't natively support
diff --git a/sysdeps/x86/bits/floatn.h b/sysdeps/x86/bits/floatn.h
index f0c51716a99c1886..8674273f46b87069 100644
--- a/sysdeps/x86/bits/floatn.h
+++ b/sysdeps/x86/bits/floatn.h
@@ -58,7 +58,7 @@
/* Defined to concatenate the literal suffix to be used with _Float128
types, if __HAVE_FLOAT128 is 1. */
# if __HAVE_FLOAT128
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
/* The literal suffix f128 exists only since GCC 7.0. */
# define __f128(x) x##q
# else
@@ -68,7 +68,7 @@
/* Defined to a complex binary128 type if __HAVE_FLOAT128 is 1. */
# if __HAVE_FLOAT128
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
/* Add a typedef for older GCC compilers which don't natively support
_Complex _Float128. */
typedef _Complex float __cfloat128 __attribute__ ((__mode__ (__TC__)));
@@ -82,7 +82,7 @@ typedef _Complex float __cfloat128 __attribute__ ((__mode__ (__TC__)));
# if __HAVE_FLOAT128
/* The type _Float128 exists only since GCC 7.0. */
-# if !__GNUC_PREREQ (7, 0) || defined __cplusplus
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
typedef __float128 _Float128;
# endif

@ -0,0 +1,897 @@
commit f66780ba46805760a328f01967836416b06c93ca
Author: Joseph Myers <joseph@codesourcery.com>
Date: Mon Oct 31 23:20:08 2022 +0000
Fix build with GCC 13 _FloatN, _FloatNx built-in functions
GCC 13 has added more _FloatN and _FloatNx versions of existing
<math.h> and <complex.h> built-in functions, for use in libstdc++-v3.
This breaks the glibc build because of how those functions are defined
as aliases to functions with the same ABI but different types. Add
appropriate -fno-builtin-* options for compiling relevant files, as
already done for the case of long double functions aliasing double
ones and based on the list of files used there.
I fixed some mistakes in that list of double files that I noticed
while implementing this fix, but there may well be more such
(harmless) cases, in this list or the new one (files that don't
actually exist or don't define the named functions as aliases so don't
need the options). I did try to exclude cases where glibc doesn't
define certain functions for _FloatN or _FloatNx types at all from the
new uses of -fno-builtin-* options. As with the options for double
files (see the commit message for commit
49348beafe9ba150c9bd48595b3f372299bddbb0, "Fix build with GCC 10 when
long double = double."), it's deliberate that the options are used
even if GCC currently doesn't have a built-in version of a given
functions, so providing some level of future-proofing against more
such built-in functions being added in future.
Tested with build-many-glibcs.py for aarch64-linux-gnu
powerpc-linux-gnu powerpc64le-linux-gnu x86_64-linux-gnu (compilers
and glibcs builds) with GCC mainline.
Conflicts:
math/Makefile
(missing narrowing fma, sqrt downstream)
diff --git a/math/Makefile b/math/Makefile
index ceb1eb2085c8bfd4..2edb044d9d590de1 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -661,16 +661,18 @@ CFLAGS-s_csinh.c += -fno-builtin-csinhl
CFLAGS-s_csqrt.c += -fno-builtin-csqrtl
CFLAGS-s_ctan.c += -fno-builtin-ctanl
CFLAGS-s_ctanh.c += -fno-builtin-ctanhl
-CFLAGS-s_dadd.c += -fno-builtin-daddl
-CFLAGS-s_ddiv.c += -fno-builtin-ddivl
-CFLAGS-s_dmul.c += -fno-builtin-dmull
-CFLAGS-s_dsub.c += -fno-builtin-dsubl
CFLAGS-s_erf.c += -fno-builtin-erfl
CFLAGS-s_erfc.c += -fno-builtin-erfcl
CFLAGS-e_exp.c += -fno-builtin-expl
CFLAGS-w_exp10.c += -fno-builtin-exp10l
CFLAGS-e_exp2.c += -fno-builtin-exp2l
CFLAGS-s_expm1.c += -fno-builtin-expm1l
+CFLAGS-s_f32xaddf64.c += -fno-builtin-daddl
+CFLAGS-s_f32xdivf64.c += -fno-builtin-ddivl
+CFLAGS-s_f32xfmaf64.c += -fno-builtin-dfmal
+CFLAGS-s_f32xmulf64.c += -fno-builtin-dmull
+CFLAGS-s_f32xsqrtf64.c += -fno-builtin-dsqrtl
+CFLAGS-s_f32xsubf64.c += -fno-builtin-dsubl
CFLAGS-s_fabs.c += -fno-builtin-fabsl
CFLAGS-s_fadd.c += -fno-builtin-faddl
CFLAGS-s_fdim.c += -fno-builtin-fdiml
@@ -688,7 +690,6 @@ CFLAGS-s_frexp.c += -fno-builtin-frexpl
CFLAGS-s_fromfp.c += -fno-builtin-fromfpl
CFLAGS-s_fromfpx.c += -fno-builtin-fromfpxl
CFLAGS-s_fsub.c += -fno-builtin-fsubl
-CFLAGS-s_gamma.c += -fno-builtin-gammal
CFLAGS-s_getpayload.c += -fno-builtin-getpayloadl
CFLAGS-w_hypot.c += -fno-builtin-hypotl
CFLAGS-w_ilogb.c += -fno-builtin-ilogbl
@@ -747,6 +748,240 @@ CFLAGS-s_y1.c += -fno-builtin-y1l
CFLAGS-s_yn.c += -fno-builtin-ynl
endif
+# Likewise, for _Float32x and _Float64 aliases.
+CFLAGS-w_acos.c += -fno-builtin-acosf32x -fno-builtin-acosf64
+CFLAGS-w_acosh.c += -fno-builtin-acoshf32x -fno-builtin-acoshf64
+CFLAGS-w_asin.c += -fno-builtin-asinf32x -fno-builtin-asinf64
+CFLAGS-s_asinh.c += -fno-builtin-asinhf32x -fno-builtin-asinhf64
+CFLAGS-s_atan.c += -fno-builtin-atanf32x -fno-builtin-atanf64
+CFLAGS-w_atan2.c += -fno-builtin-atan2f32x -fno-builtin-atan2f64
+CFLAGS-w_atanh.c += -fno-builtin-atanhf32x -fno-builtin-atanhf64
+CFLAGS-s_cabs.c += -fno-builtin-cabsf32x -fno-builtin-cabsf64
+CFLAGS-s_cacos.c += -fno-builtin-cacosf32x -fno-builtin-cacosf64
+CFLAGS-s_cacosh.c += -fno-builtin-cacoshf32x -fno-builtin-cacoshf64
+CFLAGS-s_canonicalize.c += -fno-builtin-canonicalizef32x -fno-builtin-canonicalizef64
+CFLAGS-s_carg.c += -fno-builtin-cargf32x -fno-builtin-cargf64
+CFLAGS-s_casin.c += -fno-builtin-casinf32x -fno-builtin-casinf64
+CFLAGS-s_casinh.c += -fno-builtin-casinhf32x -fno-builtin-casinhf64
+CFLAGS-s_catan.c += -fno-builtin-catanf32x -fno-builtin-catanf64
+CFLAGS-s_catanh.c += -fno-builtin-catanhf32x -fno-builtin-catanhf64
+CFLAGS-s_cbrt.c += -fno-builtin-cbrtf32x -fno-builtin-cbrtf64
+CFLAGS-s_ccos.c += -fno-builtin-ccosf32x -fno-builtin-ccosf64
+CFLAGS-s_ccosh.c += -fno-builtin-ccoshf32x -fno-builtin-ccoshf64
+CFLAGS-s_ceil.c += -fno-builtin-ceilf32x -fno-builtin-ceilf64
+CFLAGS-s_cexp.c += -fno-builtin-cexpf32x -fno-builtin-cexpf64
+CFLAGS-s_cimag.c += -fno-builtin-cimagf32x -fno-builtin-cimagf64
+CFLAGS-s_clog.c += -fno-builtin-clogf32x -fno-builtin-clogf64
+CFLAGS-s_clog10.c += -fno-builtin-clog10f32x -fno-builtin-clog10f64
+CFLAGS-s_conj.c += -fno-builtin-conjf32x -fno-builtin-conjf64
+CFLAGS-s_copysign.c += -fno-builtin-copysignf32x -fno-builtin-copysignf64
+CFLAGS-s_cos.c += -fno-builtin-cosf32x -fno-builtin-cosf64
+CFLAGS-w_cosh.c += -fno-builtin-coshf32x -fno-builtin-coshf64
+CFLAGS-s_cpow.c += -fno-builtin-cpowf32x -fno-builtin-cpowf64
+CFLAGS-s_cproj.c += -fno-builtin-cprojf32x -fno-builtin-cprojf64
+CFLAGS-s_creal.c += -fno-builtin-crealf32x -fno-builtin-crealf64
+CFLAGS-s_csin.c += -fno-builtin-csinf32x -fno-builtin-csinf64
+CFLAGS-s_csinh.c += -fno-builtin-csinhf32x -fno-builtin-csinhf64
+CFLAGS-s_csqrt.c += -fno-builtin-csqrtf32x -fno-builtin-csqrtf64
+CFLAGS-s_ctan.c += -fno-builtin-ctanf32x -fno-builtin-ctanf64
+CFLAGS-s_ctanh.c += -fno-builtin-ctanhf32x -fno-builtin-ctanhf64
+CFLAGS-s_erf.c += -fno-builtin-erff32x -fno-builtin-erff64
+CFLAGS-s_erfc.c += -fno-builtin-erfcf32x -fno-builtin-erfcf64
+CFLAGS-e_exp.c += -fno-builtin-expf32x -fno-builtin-expf64
+CFLAGS-w_exp10.c += -fno-builtin-exp10f32x -fno-builtin-exp10f64
+CFLAGS-e_exp2.c += -fno-builtin-exp2f32x -fno-builtin-exp2f64
+CFLAGS-s_expm1.c += -fno-builtin-expm1f32x -fno-builtin-expm1f64
+CFLAGS-s_fabs.c += -fno-builtin-fabsf32x -fno-builtin-fabsf64
+CFLAGS-s_fadd.c += -fno-builtin-f32addf32x -fno-builtin-f32addf64
+CFLAGS-s_fdim.c += -fno-builtin-fdimf32x -fno-builtin-fdimf64
+CFLAGS-s_fdiv.c += -fno-builtin-f32divf32x -fno-builtin-f32divf64
+CFLAGS-s_ffma.c += -fno-builtin-f32fmaf32x -fno-builtin-f32fmaf64
+CFLAGS-s_floor.c += -fno-builtin-floorf32x -fno-builtin-floorf64
+CFLAGS-s_fma.c += -fno-builtin-fmaf32x -fno-builtin-fmaf64
+CFLAGS-s_fmax.c += -fno-builtin-fmaxf32x -fno-builtin-fmaxf64
+CFLAGS-s_fmaximum.c += -fno-builtin-fmaximumf32x -fno-builtin-fmaximumf64
+CFLAGS-s_fmaximum_mag.c += -fno-builtin-fmaximum_magf32x -fno-builtin-fmaximum_magf64
+CFLAGS-s_fmaximum_mag_num.c += -fno-builtin-fmaximum_mag_numf32x -fno-builtin-fmaximum_mag_numf64
+CFLAGS-s_fmaximum_num.c += -fno-builtin-fmaximum_numf32x -fno-builtin-fmaximum_numf64
+CFLAGS-s_fmaxmag.c += -fno-builtin-fmaxmagf32x -fno-builtin-fmaxmagf64
+CFLAGS-s_fmin.c += -fno-builtin-fminf32x -fno-builtin-fminf64
+CFLAGS-s_fminimum.c += -fno-builtin-fminimumf32x -fno-builtin-fminimumf64
+CFLAGS-s_fminimum_mag.c += -fno-builtin-fminimum_magf32x -fno-builtin-fminimum_magf64
+CFLAGS-s_fminimum_mag_num.c += -fno-builtin-fminimum_mag_numf32x -fno-builtin-fminimum_mag_numf64
+CFLAGS-s_fminimum_num.c += -fno-builtin-fminimum_numf32x -fno-builtin-fminimum_numf64
+CFLAGS-s_fminmag.c += -fno-builtin-fminmagf32x -fno-builtin-fminmagf64
+CFLAGS-w_fmod.c += -fno-builtin-fmodf32x -fno-builtin-fmodf64
+CFLAGS-s_fmul.c += -fno-builtin-f32mulf32x -fno-builtin-f32mulf64
+CFLAGS-s_frexp.c += -fno-builtin-frexpf32x -fno-builtin-frexpf64
+CFLAGS-s_fromfp.c += -fno-builtin-fromfpf32x -fno-builtin-fromfpf64
+CFLAGS-s_fromfpx.c += -fno-builtin-fromfpxf32x -fno-builtin-fromfpxf64
+CFLAGS-s_fsqrt.c += -fno-builtin-f32sqrtf32x -fno-builtin-f32sqrtf64
+CFLAGS-s_fsub.c += -fno-builtin-f32subf32x -fno-builtin-f32subf64
+CFLAGS-s_getpayload.c += -fno-builtin-getpayloadf32x -fno-builtin-getpayloadf64
+CFLAGS-w_hypot.c += -fno-builtin-hypotf32x -fno-builtin-hypotf64
+CFLAGS-w_ilogb.c += -fno-builtin-ilogbf32x -fno-builtin-ilogbf64
+CFLAGS-w_j0.c += -fno-builtin-j0f32x -fno-builtin-j0f64
+CFLAGS-w_j1.c += -fno-builtin-j1f32x -fno-builtin-j1f64
+CFLAGS-w_jn.c += -fno-builtin-jnf32x -fno-builtin-jnf64
+CFLAGS-s_ldexp.c += -fno-builtin-ldexpf32x -fno-builtin-ldexpf64
+CFLAGS-w_lgamma.c += -fno-builtin-lgammaf32x -fno-builtin-lgammaf64
+CFLAGS-w_lgamma_r.c += -fno-builtin-lgammaf32x_r -fno-builtin-lgammaf64_r
+CFLAGS-w_llogb.c += -fno-builtin-llogbf32x -fno-builtin-llogbf64
+CFLAGS-s_llrint.c += -fno-builtin-llrintf32x -fno-builtin-llrintf64
+CFLAGS-s_llround.c += -fno-builtin-llroundf32x -fno-builtin-llroundf64
+CFLAGS-e_log.c += -fno-builtin-logf32x -fno-builtin-logf64
+CFLAGS-w_log10.c += -fno-builtin-log10f32x -fno-builtin-log10f64
+CFLAGS-w_log1p.c += -fno-builtin-log1pf32x -fno-builtin-log1pf64
+CFLAGS-e_log2.c += -fno-builtin-log2f32x -fno-builtin-log2f64
+CFLAGS-s_logb.c += -fno-builtin-logbf32x -fno-builtin-logbf64
+CFLAGS-s_lrint.c += -fno-builtin-lrintf32x -fno-builtin-lrintf64
+CFLAGS-s_lround.c += -fno-builtin-lroundf32x -fno-builtin-lroundf64
+CFLAGS-s_modf.c += -fno-builtin-modff32x -fno-builtin-modff64
+CFLAGS-s_nan.c += -fno-builtin-nanf32x -fno-builtin-nanf64
+CFLAGS-s_nearbyint.c += -fno-builtin-nearbyintf32x -fno-builtin-nearbyintf64
+CFLAGS-s_nextafter.c += -fno-builtin-nextafterf32x -fno-builtin-nextafterf64
+CFLAGS-s_nextdown.c += -fno-builtin-nextdownf32x -fno-builtin-nextdownf64
+CFLAGS-s_nextup.c += -fno-builtin-nextupf32x -fno-builtin-nextupf64
+CFLAGS-e_pow.c += -fno-builtin-powf32x -fno-builtin-powf64
+CFLAGS-w_remainder.c += -fno-builtin-remainderf32x -fno-builtin-remainderf64
+CFLAGS-s_remquo.c += -fno-builtin-remquof32x -fno-builtin-remquof64
+CFLAGS-s_rint.c += -fno-builtin-rintf32x -fno-builtin-rintf64
+CFLAGS-s_round.c += -fno-builtin-roundf32x -fno-builtin-roundf64
+CFLAGS-s_roundeven.c += -fno-builtin-roundevenf32x -fno-builtin-roundevenf64
+CFLAGS-w_scalbln.c += -fno-builtin-scalblnf32x -fno-builtin-scalblnf64
+CFLAGS-s_scalbn.c += -fno-builtin-scalbnf32x -fno-builtin-scalbnf64
+CFLAGS-s_setpayload.c += -fno-builtin-setpayloadf32x -fno-builtin-setpayloadf64
+CFLAGS-s_setpayloadsig.c += -fno-builtin-setpayloadsigf32x -fno-builtin-setpayloadsigf64
+CFLAGS-s_sin.c += -fno-builtin-sinf32x -fno-builtin-sinf64
+CFLAGS-s_sincos.c += -fno-builtin-sincosf32x -fno-builtin-sincosf64
+CFLAGS-w_sinh.c += -fno-builtin-sinhf32x -fno-builtin-sinhf64
+CFLAGS-w_sqrt.c += -fno-builtin-sqrtf32x -fno-builtin-sqrtf64
+CFLAGS-s_tan.c += -fno-builtin-tanf32x -fno-builtin-tanf64
+CFLAGS-s_tanh.c += -fno-builtin-tanhf32x -fno-builtin-tanhf64
+CFLAGS-w_tgamma.c += -fno-builtin-tgammaf32x -fno-builtin-tgammaf64
+CFLAGS-s_totalorder.c += -fno-builtin-totalorderf32x -fno-builtin-totalorderf64
+CFLAGS-s_totalordermag.c += -fno-builtin-totalordermagf32x -fno-builtin-totalordermagf64
+CFLAGS-s_trunc.c += -fno-builtin-truncf32x -fno-builtin-truncf64
+CFLAGS-s_ufromfp.c += -fno-builtin-ufromfpf32x -fno-builtin-ufromfpf64
+CFLAGS-s_ufromfpx.c += -fno-builtin-ufromfpxf32x -fno-builtin-ufromfpxf64
+CFLAGS-s_y0.c += -fno-builtin-y0f32x -fno-builtin-y0f64
+CFLAGS-s_y1.c += -fno-builtin-y1f32x -fno-builtin-y1f64
+CFLAGS-s_yn.c += -fno-builtin-ynf32x -fno-builtin-ynf64
+
+# Likewise, for _Float32 aliases.
+CFLAGS-w_acosf.c += -fno-builtin-acosf32
+CFLAGS-w_acoshf.c += -fno-builtin-acoshf32
+CFLAGS-w_asinf.c += -fno-builtin-asinf32
+CFLAGS-s_asinhf.c += -fno-builtin-asinhf32
+CFLAGS-s_atanf.c += -fno-builtin-atanf32
+CFLAGS-w_atan2f.c += -fno-builtin-atan2f32
+CFLAGS-w_atanhf.c += -fno-builtin-atanhf32
+CFLAGS-s_cabsf.c += -fno-builtin-cabsf32
+CFLAGS-s_cacosf.c += -fno-builtin-cacosf32
+CFLAGS-s_cacoshf.c += -fno-builtin-cacoshf32
+CFLAGS-s_canonicalizef.c += -fno-builtin-canonicalizef32
+CFLAGS-s_cargf.c += -fno-builtin-cargf32
+CFLAGS-s_casinf.c += -fno-builtin-casinf32
+CFLAGS-s_casinhf.c += -fno-builtin-casinhf32
+CFLAGS-s_catanf.c += -fno-builtin-catanf32
+CFLAGS-s_catanhf.c += -fno-builtin-catanhf32
+CFLAGS-s_cbrtf.c += -fno-builtin-cbrtf32
+CFLAGS-s_ccosf.c += -fno-builtin-ccosf32
+CFLAGS-s_ccoshf.c += -fno-builtin-ccoshf32
+CFLAGS-s_ceilf.c += -fno-builtin-ceilf32
+CFLAGS-s_cexpf.c += -fno-builtin-cexpf32
+CFLAGS-s_cimagf.c += -fno-builtin-cimagf32
+CFLAGS-s_clogf.c += -fno-builtin-clogf32
+CFLAGS-s_clog10f.c += -fno-builtin-clog10f32
+CFLAGS-s_conjf.c += -fno-builtin-conjf32
+CFLAGS-s_copysignf.c += -fno-builtin-copysignf32
+CFLAGS-s_cosf.c += -fno-builtin-cosf32
+CFLAGS-w_coshf.c += -fno-builtin-coshf32
+CFLAGS-s_cpowf.c += -fno-builtin-cpowf32
+CFLAGS-s_cprojf.c += -fno-builtin-cprojf32
+CFLAGS-s_crealf.c += -fno-builtin-crealf32
+CFLAGS-s_csinf.c += -fno-builtin-csinf32
+CFLAGS-s_csinhf.c += -fno-builtin-csinhf32
+CFLAGS-s_csqrtf.c += -fno-builtin-csqrtf32
+CFLAGS-s_ctanf.c += -fno-builtin-ctanf32
+CFLAGS-s_ctanhf.c += -fno-builtin-ctanhf32
+CFLAGS-s_erff.c += -fno-builtin-erff32
+CFLAGS-s_erfcf.c += -fno-builtin-erfcf32
+CFLAGS-e_expf.c += -fno-builtin-expf32
+CFLAGS-w_exp10f.c += -fno-builtin-exp10f32
+CFLAGS-e_exp2f.c += -fno-builtin-exp2f32
+CFLAGS-s_expm1f.c += -fno-builtin-expm1f32
+CFLAGS-s_fabsf.c += -fno-builtin-fabsf32
+CFLAGS-s_fdimf.c += -fno-builtin-fdimf32
+CFLAGS-s_floorf.c += -fno-builtin-floorf32
+CFLAGS-s_fmaf.c += -fno-builtin-fmaf32
+CFLAGS-s_fmaxf.c += -fno-builtin-fmaxf32
+CFLAGS-s_fmaximumf.c += -fno-builtin-fmaximumf32
+CFLAGS-s_fmaximum_magf.c += -fno-builtin-fmaximum_magf32
+CFLAGS-s_fmaximum_mag_numf.c += -fno-builtin-fmaximum_mag_numf32
+CFLAGS-s_fmaximum_numf.c += -fno-builtin-fmaximum_numf32
+CFLAGS-s_fmaxmagf.c += -fno-builtin-fmaxmagf32
+CFLAGS-s_fminf.c += -fno-builtin-fminf32
+CFLAGS-s_fminimumf.c += -fno-builtin-fminimumf32
+CFLAGS-s_fminimum_magf.c += -fno-builtin-fminimum_magf32
+CFLAGS-s_fminimum_mag_numf.c += -fno-builtin-fminimum_mag_numf32
+CFLAGS-s_fminimum_numf.c += -fno-builtin-fminimum_numf32
+CFLAGS-s_fminmagf.c += -fno-builtin-fminmagf32
+CFLAGS-w_fmodf.c += -fno-builtin-fmodf32
+CFLAGS-s_frexpf.c += -fno-builtin-frexpf32
+CFLAGS-s_fromfpf.c += -fno-builtin-fromfpf32
+CFLAGS-s_fromfpxf.c += -fno-builtin-fromfpxf32
+CFLAGS-s_getpayloadf.c += -fno-builtin-getpayloadf32
+CFLAGS-w_hypotf.c += -fno-builtin-hypotf32
+CFLAGS-w_ilogbf.c += -fno-builtin-ilogbf32
+CFLAGS-w_j0f.c += -fno-builtin-j0f32
+CFLAGS-w_j1f.c += -fno-builtin-j1f32
+CFLAGS-w_jnf.c += -fno-builtin-jnf32
+CFLAGS-s_ldexpf.c += -fno-builtin-ldexpf32
+CFLAGS-w_lgammaf.c += -fno-builtin-lgammaf32
+CFLAGS-w_lgammaf_r.c += -fno-builtin-lgammaf32_r
+CFLAGS-w_llogbf.c += -fno-builtin-llogbf32
+CFLAGS-s_llrintf.c += -fno-builtin-llrintf32
+CFLAGS-s_llroundf.c += -fno-builtin-llroundf32
+CFLAGS-e_logf.c += -fno-builtin-logf32
+CFLAGS-w_log10f.c += -fno-builtin-log10f32
+CFLAGS-w_log1pf.c += -fno-builtin-log1pf32
+CFLAGS-e_log2f.c += -fno-builtin-log2f32
+CFLAGS-s_logbf.c += -fno-builtin-logbf32
+CFLAGS-s_lrintf.c += -fno-builtin-lrintf32
+CFLAGS-s_lroundf.c += -fno-builtin-lroundf32
+CFLAGS-s_modff.c += -fno-builtin-modff32
+CFLAGS-s_nanf.c += -fno-builtin-nanf32
+CFLAGS-s_nearbyintf.c += -fno-builtin-nearbyintf32
+CFLAGS-s_nextafterf.c += -fno-builtin-nextafterf32
+CFLAGS-s_nextdownf.c += -fno-builtin-nextdownf32
+CFLAGS-s_nextupf.c += -fno-builtin-nextupf32
+CFLAGS-e_powf.c += -fno-builtin-powf32
+CFLAGS-w_remainderf.c += -fno-builtin-remainderf32
+CFLAGS-s_remquof.c += -fno-builtin-remquof32
+CFLAGS-s_rintf.c += -fno-builtin-rintf32
+CFLAGS-s_roundf.c += -fno-builtin-roundf32
+CFLAGS-s_roundevenf.c += -fno-builtin-roundevenf32
+CFLAGS-w_scalblnf.c += -fno-builtin-scalblnf32
+CFLAGS-s_scalbnf.c += -fno-builtin-scalbnf32
+CFLAGS-s_setpayloadf.c += -fno-builtin-setpayloadf32
+CFLAGS-s_setpayloadsigf.c += -fno-builtin-setpayloadsigf32
+CFLAGS-s_sinf.c += -fno-builtin-sinf32
+CFLAGS-s_sincosf.c += -fno-builtin-sincosf32
+CFLAGS-w_sinhf.c += -fno-builtin-sinhf32
+CFLAGS-w_sqrtf.c += -fno-builtin-sqrtf32
+CFLAGS-s_tanf.c += -fno-builtin-tanf32
+CFLAGS-s_tanhf.c += -fno-builtin-tanhf32
+CFLAGS-w_tgammaf.c += -fno-builtin-tgammaf32
+CFLAGS-s_totalorderf.c += -fno-builtin-totalorderf32
+CFLAGS-s_totalordermagf.c += -fno-builtin-totalordermagf32
+CFLAGS-s_truncf.c += -fno-builtin-truncf32
+CFLAGS-s_ufromfpf.c += -fno-builtin-ufromfpf32
+CFLAGS-s_ufromfpxf.c += -fno-builtin-ufromfpxf32
+CFLAGS-s_y0f.c += -fno-builtin-y0f32
+CFLAGS-s_y1f.c += -fno-builtin-y1f32
+CFLAGS-s_ynf.c += -fno-builtin-ynf32
+
# These files quiet sNaNs in a way that is optimized away without
# -fsignaling-nans.
CFLAGS-s_modf.c += -fsignaling-nans
diff --git a/sysdeps/ieee754/float128/Makefile b/sysdeps/ieee754/float128/Makefile
index 571a841809234edd..f869e80f268ba446 100644
--- a/sysdeps/ieee754/float128/Makefile
+++ b/sysdeps/ieee754/float128/Makefile
@@ -10,3 +10,130 @@ endif
ifeq ($(subdir),wcsmbs)
routines += wcstof128_l wcstof128 wcstof128_nan
endif
+
+ifeq ($(subdir),math)
+CFLAGS-w_acosf128.c += -fno-builtin-acosf64x
+CFLAGS-w_acoshf128.c += -fno-builtin-acoshf64x
+CFLAGS-w_asinf128.c += -fno-builtin-asinf64x
+CFLAGS-s_asinhf128.c += -fno-builtin-asinhf64x
+CFLAGS-s_atanf128.c += -fno-builtin-atanf64x
+CFLAGS-w_atan2f128.c += -fno-builtin-atan2f64x
+CFLAGS-w_atanhf128.c += -fno-builtin-atanhf64x
+CFLAGS-s_cabsf128.c += -fno-builtin-cabsf64x
+CFLAGS-s_cacosf128.c += -fno-builtin-cacosf64x
+CFLAGS-s_cacoshf128.c += -fno-builtin-cacoshf64x
+CFLAGS-s_canonicalizef128.c += -fno-builtin-canonicalizef64x
+CFLAGS-s_cargf128.c += -fno-builtin-cargf64x
+CFLAGS-s_casinf128.c += -fno-builtin-casinf64x
+CFLAGS-s_casinhf128.c += -fno-builtin-casinhf64x
+CFLAGS-s_catanf128.c += -fno-builtin-catanf64x
+CFLAGS-s_catanhf128.c += -fno-builtin-catanhf64x
+CFLAGS-s_cbrtf128.c += -fno-builtin-cbrtf64x
+CFLAGS-s_ccosf128.c += -fno-builtin-ccosf64x
+CFLAGS-s_ccoshf128.c += -fno-builtin-ccoshf64x
+CFLAGS-s_ceilf128.c += -fno-builtin-ceilf64x
+CFLAGS-s_cexpf128.c += -fno-builtin-cexpf64x
+CFLAGS-s_cimagf128.c += -fno-builtin-cimagf64x
+CFLAGS-s_clogf128.c += -fno-builtin-clogf64x
+CFLAGS-s_clog10f128.c += -fno-builtin-clog10f64x
+CFLAGS-s_conjf128.c += -fno-builtin-conjf64x
+CFLAGS-s_copysignf128.c += -fno-builtin-copysignf64x
+CFLAGS-s_cosf128.c += -fno-builtin-cosf64x
+CFLAGS-w_coshf128.c += -fno-builtin-coshf64x
+CFLAGS-s_cpowf128.c += -fno-builtin-cpowf64x
+CFLAGS-s_cprojf128.c += -fno-builtin-cprojf64x
+CFLAGS-s_crealf128.c += -fno-builtin-crealf64x
+CFLAGS-s_csinf128.c += -fno-builtin-csinf64x
+CFLAGS-s_csinhf128.c += -fno-builtin-csinhf64x
+CFLAGS-s_csqrtf128.c += -fno-builtin-csqrtf64x
+CFLAGS-s_ctanf128.c += -fno-builtin-ctanf64x
+CFLAGS-s_ctanhf128.c += -fno-builtin-ctanhf64x
+CFLAGS-s_daddf128.c += -fno-builtin-f64addf64x
+CFLAGS-s_ddivf128.c += -fno-builtin-f64divf64x
+CFLAGS-s_dfmaf128.c += -fno-builtin-f64fmaf64x
+CFLAGS-s_dmulf128.c += -fno-builtin-f64mulf64x
+CFLAGS-s_dsqrtf128.c += -fno-builtin-f64sqrtf64x
+CFLAGS-s_dsubf128.c += -fno-builtin-f64subf64x
+CFLAGS-s_erff128.c += -fno-builtin-erff64x
+CFLAGS-s_erfcf128.c += -fno-builtin-erfcf64x
+CFLAGS-e_expf128.c += -fno-builtin-expf64x
+CFLAGS-w_exp10f128.c += -fno-builtin-exp10f64x
+CFLAGS-e_exp2f128.c += -fno-builtin-exp2f64x
+CFLAGS-s_expm1f128.c += -fno-builtin-expm1f64x
+CFLAGS-s_fabsf128.c += -fno-builtin-fabsf64x
+CFLAGS-s_faddf128.c += -fno-builtin-f32addf64x
+CFLAGS-s_fdimf128.c += -fno-builtin-fdimf64x
+CFLAGS-s_fdivf128.c += -fno-builtin-f32divf64x
+CFLAGS-s_ffmaf128.c += -fno-builtin-f32fmaf64x
+CFLAGS-s_floorf128.c += -fno-builtin-floorf64x
+CFLAGS-s_fmaf128.c += -fno-builtin-fmaf64x
+CFLAGS-s_fmaxf128.c += -fno-builtin-fmaxf64x
+CFLAGS-s_fmaximumf128.c += -fno-builtin-fmaximumf64x
+CFLAGS-s_fmaximum_magf128.c += -fno-builtin-fmaximum_magf64x
+CFLAGS-s_fmaximum_mag_numf128.c += -fno-builtin-fmaximum_mag_numf64x
+CFLAGS-s_fmaximum_numf128.c += -fno-builtin-fmaximum_numf64x
+CFLAGS-s_fmaxmagf128.c += -fno-builtin-fmaxmagf64x
+CFLAGS-s_fminf128.c += -fno-builtin-fminf64x
+CFLAGS-s_fminimumf128.c += -fno-builtin-fminimumf64x
+CFLAGS-s_fminimum_magf128.c += -fno-builtin-fminimum_magf64x
+CFLAGS-s_fminimum_mag_numf128.c += -fno-builtin-fminimum_mag_numf64x
+CFLAGS-s_fminimum_numf128.c += -fno-builtin-fminimum_numf64x
+CFLAGS-s_fminmagf128.c += -fno-builtin-fminmagf64x
+CFLAGS-w_fmodf128.c += -fno-builtin-fmodf64x
+CFLAGS-s_fmulf128.c += -fno-builtin-f32mulf64x
+CFLAGS-s_frexpf128.c += -fno-builtin-frexpf64x
+CFLAGS-s_fromfpf128.c += -fno-builtin-fromfpf64x
+CFLAGS-s_fromfpxf128.c += -fno-builtin-fromfpxf64x
+CFLAGS-s_fsqrtf128.c += -fno-builtin-f32sqrtf64x
+CFLAGS-s_fsubf128.c += -fno-builtin-f32subf64x
+CFLAGS-s_getpayloadf128.c += -fno-builtin-getpayloadf64x
+CFLAGS-w_hypotf128.c += -fno-builtin-hypotf64x
+CFLAGS-w_ilogbf128.c += -fno-builtin-ilogbf64x
+CFLAGS-w_j0f128.c += -fno-builtin-j0f64x
+CFLAGS-w_j1f128.c += -fno-builtin-j1f64x
+CFLAGS-w_jnf128.c += -fno-builtin-jnf64x
+CFLAGS-s_ldexpf128.c += -fno-builtin-ldexpf64x
+CFLAGS-w_lgammaf128.c += -fno-builtin-lgammaf64x
+CFLAGS-w_lgammaf128_r.c += -fno-builtin-lgammaf64x_r
+CFLAGS-w_llogbf128.c += -fno-builtin-llogbf64x
+CFLAGS-s_llrintf128.c += -fno-builtin-llrintf64x
+CFLAGS-s_llroundf128.c += -fno-builtin-llroundf64x
+CFLAGS-e_logf128.c += -fno-builtin-logf64x
+CFLAGS-w_log10f128.c += -fno-builtin-log10f64x
+CFLAGS-w_log1pf128.c += -fno-builtin-log1pf64x
+CFLAGS-e_log2f128.c += -fno-builtin-log2f64x
+CFLAGS-s_logbf128.c += -fno-builtin-logbf64x
+CFLAGS-s_lrintf128.c += -fno-builtin-lrintf64x
+CFLAGS-s_lroundf128.c += -fno-builtin-lroundf64x
+CFLAGS-s_modff128.c += -fno-builtin-modff64x
+CFLAGS-s_nanf128.c += -fno-builtin-nanf64x
+CFLAGS-s_nearbyintf128.c += -fno-builtin-nearbyintf64x
+CFLAGS-s_nextafterf128.c += -fno-builtin-nextafterf64x
+CFLAGS-s_nextdownf128.c += -fno-builtin-nextdownf64x
+CFLAGS-s_nextupf128.c += -fno-builtin-nextupf64x
+CFLAGS-e_powf128.c += -fno-builtin-powf64x
+CFLAGS-w_remainderf128.c += -fno-builtin-remainderf64x
+CFLAGS-s_remquof128.c += -fno-builtin-remquof64x
+CFLAGS-s_rintf128.c += -fno-builtin-rintf64x
+CFLAGS-s_roundf128.c += -fno-builtin-roundf64x
+CFLAGS-s_roundevenf128.c += -fno-builtin-roundevenf64x
+CFLAGS-w_scalblnf128.c += -fno-builtin-scalblnf64x
+CFLAGS-s_scalbnf128.c += -fno-builtin-scalbnf64x
+CFLAGS-s_setpayloadf128.c += -fno-builtin-setpayloadf64x
+CFLAGS-s_setpayloadsigf128.c += -fno-builtin-setpayloadsigf64x
+CFLAGS-s_sinf128.c += -fno-builtin-sinf64x
+CFLAGS-s_sincosf128.c += -fno-builtin-sincosf64x
+CFLAGS-w_sinhf128.c += -fno-builtin-sinhf64x
+CFLAGS-w_sqrtf128.c += -fno-builtin-sqrtf64x
+CFLAGS-s_tanf128.c += -fno-builtin-tanf64x
+CFLAGS-s_tanhf128.c += -fno-builtin-tanhf64x
+CFLAGS-w_tgammaf128.c += -fno-builtin-tgammaf64x
+CFLAGS-s_totalorderf128.c += -fno-builtin-totalorderf64x
+CFLAGS-s_totalordermagf128.c += -fno-builtin-totalordermagf64x
+CFLAGS-s_truncf128.c += -fno-builtin-truncf64x
+CFLAGS-s_ufromfpf128.c += -fno-builtin-ufromfpf64x
+CFLAGS-s_ufromfpxf128.c += -fno-builtin-ufromfpxf64x
+CFLAGS-s_y0f128.c += -fno-builtin-y0f64x
+CFLAGS-s_y1f128.c += -fno-builtin-y1f64x
+CFLAGS-s_ynf128.c += -fno-builtin-ynf64x
+endif
diff --git a/sysdeps/ieee754/ldbl-128/Makefile b/sysdeps/ieee754/ldbl-128/Makefile
index 8fd6dad343bde2c9..9cbfc7ff6e8cd6f7 100644
--- a/sysdeps/ieee754/ldbl-128/Makefile
+++ b/sysdeps/ieee754/ldbl-128/Makefile
@@ -1 +1,128 @@
long-double-fcts = yes
+
+ifeq ($(subdir),math)
+CFLAGS-w_acosl.c += -fno-builtin-acosf64x -fno-builtin-acosf128
+CFLAGS-w_acoshl.c += -fno-builtin-acoshf64x -fno-builtin-acoshf128
+CFLAGS-w_asinl.c += -fno-builtin-asinf64x -fno-builtin-asinf128
+CFLAGS-s_asinhl.c += -fno-builtin-asinhf64x -fno-builtin-asinhf128
+CFLAGS-s_atanl.c += -fno-builtin-atanf64x -fno-builtin-atanf128
+CFLAGS-w_atan2l.c += -fno-builtin-atan2f64x -fno-builtin-atan2f128
+CFLAGS-w_atanhl.c += -fno-builtin-atanhf64x -fno-builtin-atanhf128
+CFLAGS-s_cabsl.c += -fno-builtin-cabsf64x -fno-builtin-cabsf128
+CFLAGS-s_cacosl.c += -fno-builtin-cacosf64x -fno-builtin-cacosf128
+CFLAGS-s_cacoshl.c += -fno-builtin-cacoshf64x -fno-builtin-cacoshf128
+CFLAGS-s_canonicalizel.c += -fno-builtin-canonicalizef64x -fno-builtin-canonicalizef128
+CFLAGS-s_cargl.c += -fno-builtin-cargf64x -fno-builtin-cargf128
+CFLAGS-s_casinl.c += -fno-builtin-casinf64x -fno-builtin-casinf128
+CFLAGS-s_casinhl.c += -fno-builtin-casinhf64x -fno-builtin-casinhf128
+CFLAGS-s_catanl.c += -fno-builtin-catanf64x -fno-builtin-catanf128
+CFLAGS-s_catanhl.c += -fno-builtin-catanhf64x -fno-builtin-catanhf128
+CFLAGS-s_cbrtl.c += -fno-builtin-cbrtf64x -fno-builtin-cbrtf128
+CFLAGS-s_ccosl.c += -fno-builtin-ccosf64x -fno-builtin-ccosf128
+CFLAGS-s_ccoshl.c += -fno-builtin-ccoshf64x -fno-builtin-ccoshf128
+CFLAGS-s_ceill.c += -fno-builtin-ceilf64x -fno-builtin-ceilf128
+CFLAGS-s_cexpl.c += -fno-builtin-cexpf64x -fno-builtin-cexpf128
+CFLAGS-s_cimagl.c += -fno-builtin-cimagf64x -fno-builtin-cimagf128
+CFLAGS-s_clogl.c += -fno-builtin-clogf64x -fno-builtin-clogf128
+CFLAGS-s_clog10l.c += -fno-builtin-clog10f64x -fno-builtin-clog10f128
+CFLAGS-s_conjl.c += -fno-builtin-conjf64x -fno-builtin-conjf128
+CFLAGS-s_copysignl.c += -fno-builtin-copysignf64x -fno-builtin-copysignf128
+CFLAGS-s_cosl.c += -fno-builtin-cosf64x -fno-builtin-cosf128
+CFLAGS-w_coshl.c += -fno-builtin-coshf64x -fno-builtin-coshf128
+CFLAGS-s_cpowl.c += -fno-builtin-cpowf64x -fno-builtin-cpowf128
+CFLAGS-s_cprojl.c += -fno-builtin-cprojf64x -fno-builtin-cprojf128
+CFLAGS-s_creall.c += -fno-builtin-crealf64x -fno-builtin-crealf128
+CFLAGS-s_csinl.c += -fno-builtin-csinf64x -fno-builtin-csinf128
+CFLAGS-s_csinhl.c += -fno-builtin-csinhf64x -fno-builtin-csinhf128
+CFLAGS-s_csqrtl.c += -fno-builtin-csqrtf64x -fno-builtin-csqrtf128
+CFLAGS-s_ctanl.c += -fno-builtin-ctanf64x -fno-builtin-ctanf128
+CFLAGS-s_ctanhl.c += -fno-builtin-ctanhf64x -fno-builtin-ctanhf128
+CFLAGS-s_daddl.c += -fno-builtin-f64addf64x -fno-builtin-f64addf128
+CFLAGS-s_ddivl.c += -fno-builtin-f64divf64x -fno-builtin-f64divf128
+CFLAGS-s_dfmal.c += -fno-builtin-f64fmaf64x -fno-builtin-f64fmaf128
+CFLAGS-s_dmull.c += -fno-builtin-f64mulf64x -fno-builtin-f64mulf128
+CFLAGS-s_dsqrtl.c += -fno-builtin-f64sqrtf64x -fno-builtin-f64sqrtf128
+CFLAGS-s_dsubl.c += -fno-builtin-f64subf64x -fno-builtin-f64subf128
+CFLAGS-s_erfl.c += -fno-builtin-erff64x -fno-builtin-erff128
+CFLAGS-s_erfcl.c += -fno-builtin-erfcf64x -fno-builtin-erfcf128
+CFLAGS-e_expl.c += -fno-builtin-expf64x -fno-builtin-expf128
+CFLAGS-w_exp10l.c += -fno-builtin-exp10f64x -fno-builtin-exp10f128
+CFLAGS-e_exp2l.c += -fno-builtin-exp2f64x -fno-builtin-exp2f128
+CFLAGS-s_expm1l.c += -fno-builtin-expm1f64x -fno-builtin-expm1f128
+CFLAGS-s_fabsl.c += -fno-builtin-fabsf64x -fno-builtin-fabsf128
+CFLAGS-s_faddl.c += -fno-builtin-f32addf64x -fno-builtin-f32addf128
+CFLAGS-s_fdiml.c += -fno-builtin-fdimf64x -fno-builtin-fdimf128
+CFLAGS-s_fdivl.c += -fno-builtin-f32divf64x -fno-builtin-f32divf128
+CFLAGS-s_ffmal.c += -fno-builtin-f32fmaf64x -fno-builtin-f32fmaf128
+CFLAGS-s_floorl.c += -fno-builtin-floorf64x -fno-builtin-floorf128
+CFLAGS-s_fmal.c += -fno-builtin-fmaf64x -fno-builtin-fmaf128
+CFLAGS-s_fmaxl.c += -fno-builtin-fmaxf64x -fno-builtin-fmaxf128
+CFLAGS-s_fmaximuml.c += -fno-builtin-fmaximumf64x -fno-builtin-fmaximumf128
+CFLAGS-s_fmaximum_magl.c += -fno-builtin-fmaximum_magf64x -fno-builtin-fmaximum_magf128
+CFLAGS-s_fmaximum_mag_numl.c += -fno-builtin-fmaximum_mag_numf64x -fno-builtin-fmaximum_mag_numf128
+CFLAGS-s_fmaximum_numl.c += -fno-builtin-fmaximum_numf64x -fno-builtin-fmaximum_numf128
+CFLAGS-s_fmaxmagl.c += -fno-builtin-fmaxmagf64x -fno-builtin-fmaxmagf128
+CFLAGS-s_fminl.c += -fno-builtin-fminf64x -fno-builtin-fminf128
+CFLAGS-s_fminimuml.c += -fno-builtin-fminimumf64x -fno-builtin-fminimumf128
+CFLAGS-s_fminimum_magl.c += -fno-builtin-fminimum_magf64x -fno-builtin-fminimum_magf128
+CFLAGS-s_fminimum_mag_numl.c += -fno-builtin-fminimum_mag_numf64x -fno-builtin-fminimum_mag_numf128
+CFLAGS-s_fminimum_numl.c += -fno-builtin-fminimum_numf64x -fno-builtin-fminimum_numf128
+CFLAGS-s_fminmagl.c += -fno-builtin-fminmagf64x -fno-builtin-fminmagf128
+CFLAGS-w_fmodl.c += -fno-builtin-fmodf64x -fno-builtin-fmodf128
+CFLAGS-s_fmull.c += -fno-builtin-f32mulf64x -fno-builtin-f32mulf128
+CFLAGS-s_frexpl.c += -fno-builtin-frexpf64x -fno-builtin-frexpf128
+CFLAGS-s_fromfpl.c += -fno-builtin-fromfpf64x -fno-builtin-fromfpf128
+CFLAGS-s_fromfpxl.c += -fno-builtin-fromfpxf64x -fno-builtin-fromfpxf128
+CFLAGS-s_fsqrtl.c += -fno-builtin-f32sqrtf64x -fno-builtin-f32sqrtf128
+CFLAGS-s_fsubl.c += -fno-builtin-f32subf64x -fno-builtin-f32subf128
+CFLAGS-s_getpayloadl.c += -fno-builtin-getpayloadf64x -fno-builtin-getpayloadf128
+CFLAGS-w_hypotl.c += -fno-builtin-hypotf64x -fno-builtin-hypotf128
+CFLAGS-w_ilogbl.c += -fno-builtin-ilogbf64x -fno-builtin-ilogbf128
+CFLAGS-w_j0l.c += -fno-builtin-j0f64x -fno-builtin-j0f128
+CFLAGS-w_j1l.c += -fno-builtin-j1f64x -fno-builtin-j1f128
+CFLAGS-w_jnl.c += -fno-builtin-jnf64x -fno-builtin-jnf128
+CFLAGS-s_ldexpl.c += -fno-builtin-ldexpf64x -fno-builtin-ldexpf128
+CFLAGS-w_lgammal.c += -fno-builtin-lgammaf64x -fno-builtin-lgammaf128
+CFLAGS-w_lgammal_r.c += -fno-builtin-lgammaf64x_r
+CFLAGS-w_llogbl.c += -fno-builtin-llogbf64x -fno-builtin-llogbf128
+CFLAGS-s_llrintl.c += -fno-builtin-llrintf64x -fno-builtin-llrintf128
+CFLAGS-s_llroundl.c += -fno-builtin-llroundf64x -fno-builtin-llroundf128
+CFLAGS-e_logl.c += -fno-builtin-logf64x -fno-builtin-logf128
+CFLAGS-w_log10l.c += -fno-builtin-log10f64x -fno-builtin-log10f128
+CFLAGS-w_log1pl.c += -fno-builtin-log1pf64x -fno-builtin-log1pf128
+CFLAGS-e_log2l.c += -fno-builtin-log2f64x -fno-builtin-log2f128
+CFLAGS-s_logbl.c += -fno-builtin-logbf64x -fno-builtin-logbf128
+CFLAGS-s_lrintl.c += -fno-builtin-lrintf64x -fno-builtin-lrintf128
+CFLAGS-s_lroundl.c += -fno-builtin-lroundf64x -fno-builtin-lroundf128
+CFLAGS-s_modfl.c += -fno-builtin-modff64x -fno-builtin-modff128
+CFLAGS-s_nanl.c += -fno-builtin-nanf64x -fno-builtin-nanf128
+CFLAGS-s_nearbyintl.c += -fno-builtin-nearbyintf64x -fno-builtin-nearbyintf128
+CFLAGS-s_nextafterl.c += -fno-builtin-nextafterf64x -fno-builtin-nextafterf128
+CFLAGS-s_nextdownl.c += -fno-builtin-nextdownf64x -fno-builtin-nextdownf128
+CFLAGS-s_nextupl.c += -fno-builtin-nextupf64x -fno-builtin-nextupf128
+CFLAGS-e_powl.c += -fno-builtin-powf64x -fno-builtin-powf128
+CFLAGS-w_remainderl.c += -fno-builtin-remainderf64x -fno-builtin-remainderf128
+CFLAGS-s_remquol.c += -fno-builtin-remquof64x -fno-builtin-remquof128
+CFLAGS-s_rintl.c += -fno-builtin-rintf64x -fno-builtin-rintf128
+CFLAGS-s_roundl.c += -fno-builtin-roundf64x -fno-builtin-roundf128
+CFLAGS-s_roundevenl.c += -fno-builtin-roundevenf64x -fno-builtin-roundevenf128
+CFLAGS-w_scalblnl.c += -fno-builtin-scalblnf64x -fno-builtin-scalblnf128
+CFLAGS-s_scalbnl.c += -fno-builtin-scalbnf64x -fno-builtin-scalbnf128
+CFLAGS-s_setpayloadl.c += -fno-builtin-setpayloadf64x -fno-builtin-setpayloadf128
+CFLAGS-s_setpayloadsigl.c += -fno-builtin-setpayloadsigf64x -fno-builtin-setpayloadsigf128
+CFLAGS-s_sinl.c += -fno-builtin-sinf64x -fno-builtin-sinf128
+CFLAGS-s_sincosl.c += -fno-builtin-sincosf64x -fno-builtin-sincosf128
+CFLAGS-w_sinhl.c += -fno-builtin-sinhf64x -fno-builtin-sinhf128
+CFLAGS-w_sqrtl.c += -fno-builtin-sqrtf64x -fno-builtin-sqrtf128
+CFLAGS-s_tanl.c += -fno-builtin-tanf64x -fno-builtin-tanf128
+CFLAGS-s_tanhl.c += -fno-builtin-tanhf64x -fno-builtin-tanhf128
+CFLAGS-w_tgammal.c += -fno-builtin-tgammaf64x -fno-builtin-tgammaf128
+CFLAGS-s_totalorderl.c += -fno-builtin-totalorderf64x -fno-builtin-totalorderf128
+CFLAGS-s_totalordermagl.c += -fno-builtin-totalordermagf64x -fno-builtin-totalordermagf128
+CFLAGS-s_truncl.c += -fno-builtin-truncf64x -fno-builtin-truncf128
+CFLAGS-s_ufromfpl.c += -fno-builtin-ufromfpf64x -fno-builtin-ufromfpf128
+CFLAGS-s_ufromfpxl.c += -fno-builtin-ufromfpxf64x -fno-builtin-ufromfpxf128
+CFLAGS-s_y0l.c += -fno-builtin-y0f64x -fno-builtin-y0f128
+CFLAGS-s_y1l.c += -fno-builtin-y1f64x -fno-builtin-y1f128
+CFLAGS-s_ynl.c += -fno-builtin-ynf64x -fno-builtin-ynf128
+endif
diff --git a/sysdeps/ieee754/ldbl-96/Makefile b/sysdeps/ieee754/ldbl-96/Makefile
index 75aed7ae6ca217ed..f28f91fa6a1e61a7 100644
--- a/sysdeps/ieee754/ldbl-96/Makefile
+++ b/sysdeps/ieee754/ldbl-96/Makefile
@@ -21,4 +21,130 @@ tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96 test-sinl-pseudo
ifeq ($(have-ssp),yes)
CFLAGS-test-sinl-pseudo.c += -fstack-protector-all
endif
+
+CFLAGS-w_acosl.c += -fno-builtin-acosf64x
+CFLAGS-w_acoshl.c += -fno-builtin-acoshf64x
+CFLAGS-w_asinl.c += -fno-builtin-asinf64x
+CFLAGS-s_asinhl.c += -fno-builtin-asinhf64x
+CFLAGS-s_atanl.c += -fno-builtin-atanf64x
+CFLAGS-w_atan2l.c += -fno-builtin-atan2f64x
+CFLAGS-w_atanhl.c += -fno-builtin-atanhf64x
+CFLAGS-s_cabsl.c += -fno-builtin-cabsf64x
+CFLAGS-s_cacosl.c += -fno-builtin-cacosf64x
+CFLAGS-s_cacoshl.c += -fno-builtin-cacoshf64x
+CFLAGS-s_canonicalizel.c += -fno-builtin-canonicalizef64x
+CFLAGS-s_cargl.c += -fno-builtin-cargf64x
+CFLAGS-s_casinl.c += -fno-builtin-casinf64x
+CFLAGS-s_casinhl.c += -fno-builtin-casinhf64x
+CFLAGS-s_catanl.c += -fno-builtin-catanf64x
+CFLAGS-s_catanhl.c += -fno-builtin-catanhf64x
+CFLAGS-s_cbrtl.c += -fno-builtin-cbrtf64x
+CFLAGS-s_ccosl.c += -fno-builtin-ccosf64x
+CFLAGS-s_ccoshl.c += -fno-builtin-ccoshf64x
+CFLAGS-s_ceill.c += -fno-builtin-ceilf64x
+CFLAGS-s_cexpl.c += -fno-builtin-cexpf64x
+CFLAGS-s_cimagl.c += -fno-builtin-cimagf64x
+CFLAGS-s_clogl.c += -fno-builtin-clogf64x
+CFLAGS-s_clog10l.c += -fno-builtin-clog10f64x
+CFLAGS-s_conjl.c += -fno-builtin-conjf64x
+CFLAGS-s_copysignl.c += -fno-builtin-copysignf64x
+CFLAGS-s_cosl.c += -fno-builtin-cosf64x
+CFLAGS-w_coshl.c += -fno-builtin-coshf64x
+CFLAGS-s_cpowl.c += -fno-builtin-cpowf64x
+CFLAGS-s_cprojl.c += -fno-builtin-cprojf64x
+CFLAGS-s_creall.c += -fno-builtin-crealf64x
+CFLAGS-s_csinl.c += -fno-builtin-csinf64x
+CFLAGS-s_csinhl.c += -fno-builtin-csinhf64x
+CFLAGS-s_csqrtl.c += -fno-builtin-csqrtf64x
+CFLAGS-s_ctanl.c += -fno-builtin-ctanf64x
+CFLAGS-s_ctanhl.c += -fno-builtin-ctanhf64x
+CFLAGS-s_daddl.c += -fno-builtin-f64addf64x
+CFLAGS-s_ddivl.c += -fno-builtin-f64divf64x
+CFLAGS-s_dfmal.c += -fno-builtin-f64fmaf64x
+CFLAGS-s_dmull.c += -fno-builtin-f64mulf64x
+CFLAGS-s_dsqrtl.c += -fno-builtin-f64sqrtf64x
+CFLAGS-s_dsubl.c += -fno-builtin-f64subf64x
+CFLAGS-s_erfl.c += -fno-builtin-erff64x
+CFLAGS-s_erfcl.c += -fno-builtin-erfcf64x
+CFLAGS-e_expl.c += -fno-builtin-expf64x
+CFLAGS-w_exp10l.c += -fno-builtin-exp10f64x
+CFLAGS-e_exp2l.c += -fno-builtin-exp2f64x
+CFLAGS-s_expm1l.c += -fno-builtin-expm1f64x
+CFLAGS-s_fabsl.c += -fno-builtin-fabsf64x
+CFLAGS-s_faddl.c += -fno-builtin-f32addf64x
+CFLAGS-s_fdiml.c += -fno-builtin-fdimf64x
+CFLAGS-s_fdivl.c += -fno-builtin-f32divf64x
+CFLAGS-s_ffmal.c += -fno-builtin-f32fmaf64x
+CFLAGS-s_floorl.c += -fno-builtin-floorf64x
+CFLAGS-s_fmal.c += -fno-builtin-fmaf64x
+CFLAGS-s_fmaxl.c += -fno-builtin-fmaxf64x
+CFLAGS-s_fmaximuml.c += -fno-builtin-fmaximumf64x
+CFLAGS-s_fmaximum_magl.c += -fno-builtin-fmaximum_magf64x
+CFLAGS-s_fmaximum_mag_numl.c += -fno-builtin-fmaximum_mag_numf64x
+CFLAGS-s_fmaximum_numl.c += -fno-builtin-fmaximum_numf64x
+CFLAGS-s_fmaxmagl.c += -fno-builtin-fmaxmagf64x
+CFLAGS-s_fminl.c += -fno-builtin-fminf64x
+CFLAGS-s_fminimuml.c += -fno-builtin-fminimumf64x
+CFLAGS-s_fminimum_magl.c += -fno-builtin-fminimum_magf64x
+CFLAGS-s_fminimum_mag_numl.c += -fno-builtin-fminimum_mag_numf64x
+CFLAGS-s_fminimum_numl.c += -fno-builtin-fminimum_numf64x
+CFLAGS-s_fminmagl.c += -fno-builtin-fminmagf64x
+CFLAGS-w_fmodl.c += -fno-builtin-fmodf64x
+CFLAGS-s_fmull.c += -fno-builtin-f32mulf64x
+CFLAGS-s_frexpl.c += -fno-builtin-frexpf64x
+CFLAGS-s_fromfpl.c += -fno-builtin-fromfpf64x
+CFLAGS-s_fromfpxl.c += -fno-builtin-fromfpxf64x
+CFLAGS-s_fsqrtl.c += -fno-builtin-f32sqrtf64x
+CFLAGS-s_fsubl.c += -fno-builtin-f32subf64x
+CFLAGS-s_getpayloadl.c += -fno-builtin-getpayloadf64x
+CFLAGS-w_hypotl.c += -fno-builtin-hypotf64x
+CFLAGS-w_ilogbl.c += -fno-builtin-ilogbf64x
+CFLAGS-w_j0l.c += -fno-builtin-j0f64x
+CFLAGS-w_j1l.c += -fno-builtin-j1f64x
+CFLAGS-w_jnl.c += -fno-builtin-jnf64x
+CFLAGS-s_ldexpl.c += -fno-builtin-ldexpf64x
+CFLAGS-w_lgammal.c += -fno-builtin-lgammaf64x
+CFLAGS-w_lgammal_r.c += -fno-builtin-lgammaf64x_r
+CFLAGS-w_llogbl.c += -fno-builtin-llogbf64x
+CFLAGS-s_llrintl.c += -fno-builtin-llrintf64x
+CFLAGS-s_llroundl.c += -fno-builtin-llroundf64x
+CFLAGS-e_logl.c += -fno-builtin-logf64x
+CFLAGS-w_log10l.c += -fno-builtin-log10f64x
+CFLAGS-w_log1pl.c += -fno-builtin-log1pf64x
+CFLAGS-e_log2l.c += -fno-builtin-log2f64x
+CFLAGS-s_logbl.c += -fno-builtin-logbf64x
+CFLAGS-s_lrintl.c += -fno-builtin-lrintf64x
+CFLAGS-s_lroundl.c += -fno-builtin-lroundf64x
+CFLAGS-s_modfl.c += -fno-builtin-modff64x
+CFLAGS-s_nanl.c += -fno-builtin-nanf64x
+CFLAGS-s_nearbyintl.c += -fno-builtin-nearbyintf64x
+CFLAGS-s_nextafterl.c += -fno-builtin-nextafterf64x
+CFLAGS-s_nextdownl.c += -fno-builtin-nextdownf64x
+CFLAGS-s_nextupl.c += -fno-builtin-nextupf64x
+CFLAGS-e_powl.c += -fno-builtin-powf64x
+CFLAGS-w_remainderl.c += -fno-builtin-remainderf64x
+CFLAGS-s_remquol.c += -fno-builtin-remquof64x
+CFLAGS-s_rintl.c += -fno-builtin-rintf64x
+CFLAGS-s_roundl.c += -fno-builtin-roundf64x
+CFLAGS-s_roundevenl.c += -fno-builtin-roundevenf64x
+CFLAGS-w_scalblnl.c += -fno-builtin-scalblnf64x
+CFLAGS-s_scalbnl.c += -fno-builtin-scalbnf64x
+CFLAGS-s_setpayloadl.c += -fno-builtin-setpayloadf64x
+CFLAGS-s_setpayloadsigl.c += -fno-builtin-setpayloadsigf64x
+CFLAGS-s_sinl.c += -fno-builtin-sinf64x
+CFLAGS-s_sincosl.c += -fno-builtin-sincosf64x
+CFLAGS-w_sinhl.c += -fno-builtin-sinhf64x
+CFLAGS-w_sqrtl.c += -fno-builtin-sqrtf64x
+CFLAGS-s_tanl.c += -fno-builtin-tanf64x
+CFLAGS-s_tanhl.c += -fno-builtin-tanhf64x
+CFLAGS-w_tgammal.c += -fno-builtin-tgammaf64x
+CFLAGS-s_totalorderl.c += -fno-builtin-totalorderf64x
+CFLAGS-s_totalordermagl.c += -fno-builtin-totalordermagf64x
+CFLAGS-s_truncl.c += -fno-builtin-truncf64x
+CFLAGS-s_ufromfpl.c += -fno-builtin-ufromfpf64x
+CFLAGS-s_ufromfpxl.c += -fno-builtin-ufromfpxf64x
+CFLAGS-s_y0l.c += -fno-builtin-y0f64x
+CFLAGS-s_y1l.c += -fno-builtin-y1f64x
+CFLAGS-s_ynl.c += -fno-builtin-ynf64x
+
endif # $(subdir) == math
diff --git a/sysdeps/powerpc/powerpc32/fpu/Makefile b/sysdeps/powerpc/powerpc32/fpu/Makefile
index b8b6bb0fa2efcf8c..4c0c65c18a5daea8 100644
--- a/sysdeps/powerpc/powerpc32/fpu/Makefile
+++ b/sysdeps/powerpc/powerpc32/fpu/Makefile
@@ -1,8 +1,8 @@
ifeq ($(subdir),math)
# lrint is aliased to lrintf, so suppress compiler builtins to
# avoid mismatched signatures.
-CFLAGS-s_lrint.c += -fno-builtin-lrintf
-CFLAGS-s_lround.c += -fno-builtin-lroundf
+CFLAGS-s_lrint.c += -fno-builtin-lrintf -fno-builtin-lrintf32
+CFLAGS-s_lround.c += -fno-builtin-lroundf -fno-builtin-lroundf32
endif
ifeq ($(subdir),misc)
diff --git a/sysdeps/powerpc/powerpc64/fpu/Makefile b/sysdeps/powerpc/powerpc64/fpu/Makefile
index 05075c2a75c294c3..9359049b555d4457 100644
--- a/sysdeps/powerpc/powerpc64/fpu/Makefile
+++ b/sysdeps/powerpc/powerpc64/fpu/Makefile
@@ -1,7 +1,9 @@
ifeq ($(subdir),math)
# lrintf and llrintf are aliased to llrint, so suppress compiler builtins to
# avoid mismatched signatures.
-CFLAGS-s_llrint.c += -fno-builtin-lrintf -fno-builtin-llrintf
+CFLAGS-s_llrint.c += -fno-builtin-lrintf -fno-builtin-llrintf \
+ -fno-builtin-lrintf32 -fno-builtin-llrintf32
# Same as before but for lroundf and llroundf
-CFLAGS-s_llround.c += -fno-builtin-lroundf -fno-builtin-llroundf
+CFLAGS-s_llround.c += -fno-builtin-lroundf -fno-builtin-llroundf \
+ -fno-builtin-lroundf32 -fno-builtin-llroundf32
endif
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile
index cc073b53d3292ff8..858061484e1ab419 100644
--- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile
@@ -207,6 +207,131 @@ endef
object-suffixes-left := $(all-object-suffixes)
include $(o-iterator)
+CFLAGS-w_acosf128-ifunc.c += -fno-builtin-acosf64x
+CFLAGS-w_acoshf128-ifunc.c += -fno-builtin-acoshf64x
+CFLAGS-w_asinf128-ifunc.c += -fno-builtin-asinf64x
+CFLAGS-s_asinhf128-ifunc.c += -fno-builtin-asinhf64x
+CFLAGS-s_atanf128-ifunc.c += -fno-builtin-atanf64x
+CFLAGS-w_atan2f128-ifunc.c += -fno-builtin-atan2f64x
+CFLAGS-w_atanhf128-ifunc.c += -fno-builtin-atanhf64x
+CFLAGS-s_cabsf128-ifunc.c += -fno-builtin-cabsf64x
+CFLAGS-s_cacosf128-ifunc.c += -fno-builtin-cacosf64x
+CFLAGS-s_cacoshf128-ifunc.c += -fno-builtin-cacoshf64x
+CFLAGS-s_canonicalizef128-ifunc.c += -fno-builtin-canonicalizef64x
+CFLAGS-s_cargf128-ifunc.c += -fno-builtin-cargf64x
+CFLAGS-s_casinf128-ifunc.c += -fno-builtin-casinf64x
+CFLAGS-s_casinhf128-ifunc.c += -fno-builtin-casinhf64x
+CFLAGS-s_catanf128-ifunc.c += -fno-builtin-catanf64x
+CFLAGS-s_catanhf128-ifunc.c += -fno-builtin-catanhf64x
+CFLAGS-s_cbrtf128-ifunc.c += -fno-builtin-cbrtf64x
+CFLAGS-s_ccosf128-ifunc.c += -fno-builtin-ccosf64x
+CFLAGS-s_ccoshf128-ifunc.c += -fno-builtin-ccoshf64x
+CFLAGS-s_ceilf128-ifunc.c += -fno-builtin-ceilf64x
+CFLAGS-s_cexpf128-ifunc.c += -fno-builtin-cexpf64x
+CFLAGS-s_cimagf128-ifunc.c += -fno-builtin-cimagf64x
+CFLAGS-s_clogf128-ifunc.c += -fno-builtin-clogf64x
+CFLAGS-s_clog10f128-ifunc.c += -fno-builtin-clog10f64x
+CFLAGS-s_conjf128-ifunc.c += -fno-builtin-conjf64x
+CFLAGS-s_copysignf128-ifunc.c += -fno-builtin-copysignf64x
+CFLAGS-s_cosf128-ifunc.c += -fno-builtin-cosf64x
+CFLAGS-w_coshf128-ifunc.c += -fno-builtin-coshf64x
+CFLAGS-s_cpowf128-ifunc.c += -fno-builtin-cpowf64x
+CFLAGS-s_cprojf128-ifunc.c += -fno-builtin-cprojf64x
+CFLAGS-s_crealf128-ifunc.c += -fno-builtin-crealf64x
+CFLAGS-s_csinf128-ifunc.c += -fno-builtin-csinf64x
+CFLAGS-s_csinhf128-ifunc.c += -fno-builtin-csinhf64x
+CFLAGS-s_csqrtf128-ifunc.c += -fno-builtin-csqrtf64x
+CFLAGS-s_ctanf128-ifunc.c += -fno-builtin-ctanf64x
+CFLAGS-s_ctanhf128-ifunc.c += -fno-builtin-ctanhf64x
+CFLAGS-s_daddf128-ifunc.c += -fno-builtin-f64addf64x
+CFLAGS-s_ddivf128-ifunc.c += -fno-builtin-f64divf64x
+CFLAGS-s_dfmaf128-ifunc.c += -fno-builtin-f64fmaf64x
+CFLAGS-s_dmulf128-ifunc.c += -fno-builtin-f64mulf64x
+CFLAGS-s_dsqrtf128-ifunc.c += -fno-builtin-f64sqrtf64x
+CFLAGS-s_dsubf128-ifunc.c += -fno-builtin-f64subf64x
+CFLAGS-s_erff128-ifunc.c += -fno-builtin-erff64x
+CFLAGS-s_erfcf128-ifunc.c += -fno-builtin-erfcf64x
+CFLAGS-e_expf128-ifunc.c += -fno-builtin-expf64x
+CFLAGS-w_exp10f128-ifunc.c += -fno-builtin-exp10f64x
+CFLAGS-e_exp2f128-ifunc.c += -fno-builtin-exp2f64x
+CFLAGS-s_expm1f128-ifunc.c += -fno-builtin-expm1f64x
+CFLAGS-s_fabsf128-ifunc.c += -fno-builtin-fabsf64x
+CFLAGS-s_faddf128-ifunc.c += -fno-builtin-f32addf64x
+CFLAGS-s_fdimf128-ifunc.c += -fno-builtin-fdimf64x
+CFLAGS-s_fdivf128-ifunc.c += -fno-builtin-f32divf64x
+CFLAGS-s_ffmaf128-ifunc.c += -fno-builtin-f32fmaf64x
+CFLAGS-s_floorf128-ifunc.c += -fno-builtin-floorf64x
+CFLAGS-s_fmaf128-ifunc.c += -fno-builtin-fmaf64x
+CFLAGS-s_fmaxf128-ifunc.c += -fno-builtin-fmaxf64x
+CFLAGS-s_fmaximumf128-ifunc.c += -fno-builtin-fmaximumf64x
+CFLAGS-s_fmaximum_magf128-ifunc.c += -fno-builtin-fmaximum_magf64x
+CFLAGS-s_fmaximum_mag_numf128-ifunc.c += -fno-builtin-fmaximum_mag_numf64x
+CFLAGS-s_fmaximum_numf128-ifunc.c += -fno-builtin-fmaximum_numf64x
+CFLAGS-s_fmaxmagf128-ifunc.c += -fno-builtin-fmaxmagf64x
+CFLAGS-s_fminf128-ifunc.c += -fno-builtin-fminf64x
+CFLAGS-s_fminimumf128-ifunc.c += -fno-builtin-fminimumf64x
+CFLAGS-s_fminimum_magf128-ifunc.c += -fno-builtin-fminimum_magf64x
+CFLAGS-s_fminimum_mag_numf128-ifunc.c += -fno-builtin-fminimum_mag_numf64x
+CFLAGS-s_fminimum_numf128-ifunc.c += -fno-builtin-fminimum_numf64x
+CFLAGS-s_fminmagf128-ifunc.c += -fno-builtin-fminmagf64x
+CFLAGS-w_fmodf128-ifunc.c += -fno-builtin-fmodf64x
+CFLAGS-s_fmulf128-ifunc.c += -fno-builtin-f32mulf64x
+CFLAGS-s_frexpf128-ifunc.c += -fno-builtin-frexpf64x
+CFLAGS-s_fromfpf128-ifunc.c += -fno-builtin-fromfpf64x
+CFLAGS-s_fromfpxf128-ifunc.c += -fno-builtin-fromfpxf64x
+CFLAGS-s_fsqrtf128-ifunc.c += -fno-builtin-f32sqrtf64x
+CFLAGS-s_fsubf128-ifunc.c += -fno-builtin-f32subf64x
+CFLAGS-s_getpayloadf128-ifunc.c += -fno-builtin-getpayloadf64x
+CFLAGS-w_hypotf128-ifunc.c += -fno-builtin-hypotf64x
+CFLAGS-w_ilogbf128-ifunc.c += -fno-builtin-ilogbf64x
+CFLAGS-w_j0f128-ifunc.c += -fno-builtin-j0f64x
+CFLAGS-w_j1f128-ifunc.c += -fno-builtin-j1f64x
+CFLAGS-w_jnf128-ifunc.c += -fno-builtin-jnf64x
+CFLAGS-s_ldexpf128-ifunc.c += -fno-builtin-ldexpf64x
+CFLAGS-w_lgammaf128-ifunc.c += -fno-builtin-lgammaf64x
+CFLAGS-w_lgammaf128_r-ifunc.c += -fno-builtin-lgammaf64x_r
+CFLAGS-w_llogbf128-ifunc.c += -fno-builtin-llogbf64x
+CFLAGS-s_llrintf128-ifunc.c += -fno-builtin-llrintf64x
+CFLAGS-s_llroundf128-ifunc.c += -fno-builtin-llroundf64x
+CFLAGS-e_logf128-ifunc.c += -fno-builtin-logf64x
+CFLAGS-w_log10f128-ifunc.c += -fno-builtin-log10f64x
+CFLAGS-w_log1pf128-ifunc.c += -fno-builtin-log1pf64x
+CFLAGS-e_log2f128-ifunc.c += -fno-builtin-log2f64x
+CFLAGS-s_logbf128-ifunc.c += -fno-builtin-logbf64x
+CFLAGS-s_lrintf128-ifunc.c += -fno-builtin-lrintf64x
+CFLAGS-s_lroundf128-ifunc.c += -fno-builtin-lroundf64x
+CFLAGS-s_modff128-ifunc.c += -fno-builtin-modff64x
+CFLAGS-s_nanf128-ifunc.c += -fno-builtin-nanf64x
+CFLAGS-s_nearbyintf128-ifunc.c += -fno-builtin-nearbyintf64x
+CFLAGS-s_nextafterf128-ifunc.c += -fno-builtin-nextafterf64x
+CFLAGS-s_nextdownf128-ifunc.c += -fno-builtin-nextdownf64x
+CFLAGS-s_nextupf128-ifunc.c += -fno-builtin-nextupf64x
+CFLAGS-e_powf128-ifunc.c += -fno-builtin-powf64x
+CFLAGS-w_remainderf128-ifunc.c += -fno-builtin-remainderf64x
+CFLAGS-s_remquof128-ifunc.c += -fno-builtin-remquof64x
+CFLAGS-s_rintf128-ifunc.c += -fno-builtin-rintf64x
+CFLAGS-s_roundf128-ifunc.c += -fno-builtin-roundf64x
+CFLAGS-s_roundevenf128-ifunc.c += -fno-builtin-roundevenf64x
+CFLAGS-w_scalblnf128-ifunc.c += -fno-builtin-scalblnf64x
+CFLAGS-s_scalbnf128-ifunc.c += -fno-builtin-scalbnf64x
+CFLAGS-s_setpayloadf128-ifunc.c += -fno-builtin-setpayloadf64x
+CFLAGS-s_setpayloadsigf128-ifunc.c += -fno-builtin-setpayloadsigf64x
+CFLAGS-s_sinf128-ifunc.c += -fno-builtin-sinf64x
+CFLAGS-s_sincosf128-ifunc.c += -fno-builtin-sincosf64x
+CFLAGS-w_sinhf128-ifunc.c += -fno-builtin-sinhf64x
+CFLAGS-w_sqrtf128-ifunc.c += -fno-builtin-sqrtf64x
+CFLAGS-s_tanf128-ifunc.c += -fno-builtin-tanf64x
+CFLAGS-s_tanhf128-ifunc.c += -fno-builtin-tanhf64x
+CFLAGS-w_tgammaf128-ifunc.c += -fno-builtin-tgammaf64x
+CFLAGS-s_totalorderf128-ifunc.c += -fno-builtin-totalorderf64x
+CFLAGS-s_totalordermagf128-ifunc.c += -fno-builtin-totalordermagf64x
+CFLAGS-s_truncf128-ifunc.c += -fno-builtin-truncf64x
+CFLAGS-s_ufromfpf128-ifunc.c += -fno-builtin-ufromfpf64x
+CFLAGS-s_ufromfpxf128-ifunc.c += -fno-builtin-ufromfpxf64x
+CFLAGS-s_y0f128-ifunc.c += -fno-builtin-y0f64x
+CFLAGS-s_y1f128-ifunc.c += -fno-builtin-y1f64x
+CFLAGS-s_ynf128-ifunc.c += -fno-builtin-ynf64x
+
endif # do_f128_multiarch
libm-sysdep_routines += e_log-ppc64
diff --git a/sysdeps/x86_64/x32/Makefile b/sysdeps/x86_64/x32/Makefile
index 8748956563babf8f..31732aa248fe62cf 100644
--- a/sysdeps/x86_64/x32/Makefile
+++ b/sysdeps/x86_64/x32/Makefile
@@ -2,7 +2,8 @@ ifeq ($(subdir),math)
# Since x32 returns 32-bit long int and 64-bit long long int in the
# same 64-bit register, we make the 32b-bit lround an alias of the
# 64-bit llround. Add -fno-builtin-lround to silence the compiler.
-CFLAGS-s_llround.c += -fno-builtin-lround
+CFLAGS-s_llround.c += -fno-builtin-lround -fno-builtin-lroundf32x \
+ -fno-builtin-lroundf64
endif
ifeq ($(subdir),string)

@ -0,0 +1,89 @@
commit 9cc9d61ee12f2f8620d8e0ea3c42af02bf07fe1e
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Nov 4 18:37:16 2022 +0100
elf: Disable some subtests of ifuncmain1, ifuncmain5 for !PIE
diff --git a/elf/ifuncmain1.c b/elf/ifuncmain1.c
index 747fc02648a5493e..6effce3d77b1f706 100644
--- a/elf/ifuncmain1.c
+++ b/elf/ifuncmain1.c
@@ -19,7 +19,14 @@ typedef int (*foo_p) (void);
#endif
foo_p foo_ptr = foo;
+
+/* Address-significant access to protected symbols is not supported in
+ position-dependent mode on several architectures because GCC
+ generates relocations that assume that the address is local to the
+ main program. */
+#ifdef __PIE__
foo_p foo_procted_ptr = foo_protected;
+#endif
extern foo_p get_foo_p (void);
extern foo_p get_foo_hidden_p (void);
@@ -37,12 +44,16 @@ main (void)
if ((*foo_ptr) () != -1)
abort ();
+#ifdef __PIE__
if (foo_procted_ptr != foo_protected)
abort ();
+#endif
if (foo_protected () != 0)
abort ();
+#ifdef __PIE__
if ((*foo_procted_ptr) () != 0)
abort ();
+#endif
p = get_foo_p ();
if (p != foo)
@@ -55,8 +66,10 @@ main (void)
abort ();
p = get_foo_protected_p ();
+#ifdef __PIE__
if (p != foo_protected)
abort ();
+#endif
if (ret_foo_protected != 0 || (*p) () != ret_foo_protected)
abort ();
diff --git a/elf/ifuncmain5.c b/elf/ifuncmain5.c
index f398085cb46719d1..6fda768fb6908aed 100644
--- a/elf/ifuncmain5.c
+++ b/elf/ifuncmain5.c
@@ -14,12 +14,19 @@ get_foo (void)
return foo;
}
+
+/* Address-significant access to protected symbols is not supported in
+ position-dependent mode on several architectures because GCC
+ generates relocations that assume that the address is local to the
+ main program. */
+#ifdef __PIE__
foo_p
__attribute__ ((noinline))
get_foo_protected (void)
{
return foo_protected;
}
+#endif
int
main (void)
@@ -30,9 +37,11 @@ main (void)
if ((*p) () != -1)
abort ();
+#ifdef __PIE__
p = get_foo_protected ();
if ((*p) () != 0)
abort ();
+#endif
return 0;
}

@ -0,0 +1,938 @@
commit 8a78f833d670f86302f2d0c32eb1e4357d9166ff
Author: Joseph Myers <joseph@codesourcery.com>
Date: Fri Jan 6 19:33:29 2023 +0000
C2x semantics for <tgmath.h>
<tgmath.h> implements semantics for integer generic arguments that
handle cases involving _FloatN / _FloatNx types as specified in TS
18661-3 plus some defect fixes.
C2x has further changes to the semantics for <tgmath.h> macros with
such types, which should also be considered defect fixes (although
handled through the integration of TS 18661-3 in C2x rather than
through an issue tracking process). Specifically, the rules were
changed because of problems raised with using the macros with the
evaluation format types such as float_t and _Float32_t: the older
version of the rules didn't allow passing _FloatN / _FloatNx types to
the narrowing macros returning float or double, or passing float /
double / long double to the narrowing macros returning _FloatN /
_FloatNx, which was a problem with the evaluation format types which
could be either kind of type depending on the value of
FLT_EVAL_METHOD.
Thus the new rules allow cases of mixing types which were not allowed
before, and, as part of the changes, the handling of integer arguments
was also changed: if there is any _FloatNx generic argument, integer
generic arguments are treated as _Float32x (not double), while the
rule about treating integer arguments to narrowing macros returning
_FloatN or _FloatNx as _Float64 not double was removed (no longer
needed now double is a valid argument to such macros).
I've implemented the changes in GCC's __builtin_tgmath, which thus
requires updates to glibc's test expectations so that the tests
continue to build with GCC 13 (the test is also updated to test the
argument types that weren't allowed before but are now valid under C2x
rules).
Given those test changes, it's then also necessary to fix the
implementations in <tgmath.h> to have appropriate semantics with older
GCC so that the tests pass with GCC versions before GCC 13 as well.
For some cases (non-narrowing macros with two or three generic
arguments; narrowing macros returning _Float32x), the older version of
__builtin_tgmath doesn't correspond sufficiently well to C2x
semantics, so in those cases <tgmath.h> is adjusted to use the older
macro implementation instead of __builtin_tgmath. The older macro
implementation is itself adjusted to give the desired semantics, with
GCC 7 and later. (It's not possible to get the right semantics in all
cases for the narrowing macros with GCC 6 and before when the _FloatN
/ _FloatNx names are typedefs rather than distinct types.)
Tested as follows: with the full glibc testsuite for x86_64, GCC 6, 7,
11, 13; with execution of the math/tests for aarch64, arm, powerpc and
powerpc64le, GCC 6, 7, 12 and 13 (powerpc64le only with GCC 12 and
13); with build-many-glibcs.py with GCC 6, 7, 12 and 13.
Conflicts:
math/tgmath.h
(missing support for narrowing fma/sqrt downstream
means that the definitions for __TGMATH_1_NARROW_*
and __TGMATH_3_NARROW_* are not needed)
diff --git a/math/gen-tgmath-tests.py b/math/gen-tgmath-tests.py
index 364963da6525e08d..be5e8cd9a07ef071 100755
--- a/math/gen-tgmath-tests.py
+++ b/math/gen-tgmath-tests.py
@@ -19,14 +19,13 @@
# As glibc does not support decimal floating point, the types to
# consider for generic parameters are standard and binary
-# floating-point types, and integer types which are treated as double.
-# The corresponding complex types may also be used (including complex
-# integer types, which are a GNU extension, but are currently disabled
-# here because they do not work properly with tgmath.h).
-
-# The proposed resolution to TS 18661-1 DR#9
-# <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2149.htm#dr_9>
-# makes the <tgmath.h> rules for selecting a function to call
+# floating-point types, and integer types which are treated as
+# _Float32x if any argument has a _FloatNx type and otherwise as
+# double. The corresponding complex types may also be used (including
+# complex integer types, which are a GNU extension, but are currently
+# disabled here because they do not work properly with tgmath.h).
+
+# C2x makes the <tgmath.h> rules for selecting a function to call
# correspond to the usual arithmetic conversions (applied successively
# to the arguments for generic parameters in order), which choose the
# type whose set of values contains that of the other type (undefined
@@ -69,10 +68,6 @@ class Type(object):
# Real argument types that correspond to a standard floating type
# (float, double or long double; not _FloatN or _FloatNx).
standard_real_argument_types_list = []
- # Real argument types other than float, double and long double
- # (i.e., those that are valid as arguments to narrowing macros
- # returning _FloatN or _FloatNx).
- non_standard_real_argument_types_list = []
# The real floating types by their order properties (which are
# tuples giving the positions in both the possible orders above).
real_types_order = {}
@@ -86,13 +81,16 @@ class Type(object):
float64_type = None
# The type _Complex _Float64.
complex_float64_type = None
+ # The type _Float32x.
+ float32x_type = None
+ # The type _Complex _Float32x.
+ complex_float32x_type = None
# The type _Float64x.
float64x_type = None
- # The type _Float64x if available, otherwise _Float64.
- float32x_ext_type = None
def __init__(self, name, suffix=None, mant_dig=None, condition='1',
- order=None, integer=False, complex=False, real_type=None):
+ order=None, integer=False, complex=False, real_type=None,
+ floatnx=False):
"""Initialize a Type object, creating any corresponding complex type
in the process."""
self.name = name
@@ -102,6 +100,7 @@ class Type(object):
self.order = order
self.integer = integer
self.complex = complex
+ self.floatnx = floatnx
if complex:
self.complex_type = self
self.real_type = real_type
@@ -119,8 +118,6 @@ class Type(object):
Type.real_argument_types_list.append(self)
if not self.name.startswith('_Float'):
Type.standard_real_argument_types_list.append(self)
- if self.name not in ('float', 'double', 'long double'):
- Type.non_standard_real_argument_types_list.append(self)
if self.order is not None:
Type.real_types_order[self.order] = self
if self.name == 'double':
@@ -133,26 +130,28 @@ class Type(object):
Type.float64_type = self
if self.name == '_Complex _Float64':
Type.complex_float64_type = self
+ if self.name == '_Float32x':
+ Type.float32x_type = self
+ if self.name == '_Complex _Float32x':
+ Type.complex_float32x_type = self
if self.name == '_Float64x':
Type.float64x_type = self
- if self.name == 'Float32x_ext':
- Type.float32x_ext_type = self
@staticmethod
def create_type(name, suffix=None, mant_dig=None, condition='1', order=None,
integer=False, complex_name=None, complex_ok=True,
- internal=False):
+ floatnx=False, internal=False):
"""Create and register a Type object for a real type, creating any
corresponding complex type in the process."""
real_type = Type(name, suffix=suffix, mant_dig=mant_dig,
condition=condition, order=order, integer=integer,
- complex=False)
+ complex=False, floatnx=floatnx)
if complex_ok:
if complex_name is None:
complex_name = '_Complex %s' % name
complex_type = Type(complex_name, condition=condition,
integer=integer, complex=True,
- real_type=real_type)
+ real_type=real_type, floatnx=floatnx)
else:
complex_type = None
real_type.complex_type = complex_type
@@ -160,13 +159,13 @@ class Type(object):
if complex_type is not None:
complex_type.register_type(internal)
- def floating_type(self, floatn):
+ def floating_type(self, integer_float32x):
"""Return the corresponding floating type."""
if self.integer:
- if floatn:
- return (Type.complex_float64_type
+ if integer_float32x:
+ return (Type.complex_float32x_type
if self.complex
- else Type.float64_type)
+ else Type.float32x_type)
else:
return (Type.complex_double_type
if self.complex
@@ -174,9 +173,9 @@ class Type(object):
else:
return self
- def real_floating_type(self, floatn):
+ def real_floating_type(self, integer_float32x):
"""Return the corresponding real floating type."""
- return self.real_type.floating_type(floatn)
+ return self.real_type.floating_type(integer_float32x)
def __str__(self):
"""Return string representation of a type."""
@@ -194,7 +193,8 @@ class Type(object):
condition='defined HUGE_VAL_F32', order=(2, 2))
Type.create_type('_Float32x', 'f32x', 'FLT32X_MANT_DIG',
complex_name='__CFLOAT32X',
- condition='defined HUGE_VAL_F32X', order=(3, 3))
+ condition='defined HUGE_VAL_F32X', order=(3, 3),
+ floatnx=True)
Type.create_type('double', '', 'DBL_MANT_DIG', order=(4, 4))
Type.create_type('long double', 'l', 'LDBL_MANT_DIG', order=(5, 7))
Type.create_type('_Float64', 'f64', 'FLT64_MANT_DIG',
@@ -202,7 +202,8 @@ class Type(object):
condition='defined HUGE_VAL_F64', order=(6, 5))
Type.create_type('_Float64x', 'f64x', 'FLT64X_MANT_DIG',
complex_name='__CFLOAT64X',
- condition='defined HUGE_VAL_F64X', order=(7, 6))
+ condition='defined HUGE_VAL_F64X', order=(7, 6),
+ floatnx=True)
Type.create_type('_Float128', 'f128', 'FLT128_MANT_DIG',
complex_name='__CFLOAT128',
condition='defined HUGE_VAL_F128', order=(8, 8))
@@ -235,21 +236,16 @@ class Type(object):
complex_name='complex_long_double_Float64x',
condition='defined HUGE_VAL_F64X', order=(7, 7),
internal=True)
- # An internal type for the argument type used by f32x*
- # narrowing macros (_Float64x if available, otherwise
- # _Float64).
- Type.create_type('Float32x_ext', None, 'FLT32X_EXT_MANT_DIG',
- complex_name='complex_Float32x_ext',
- condition='1', internal=True)
@staticmethod
- def can_combine_types(types, floatn):
+ def can_combine_types(types):
"""Return a C preprocessor conditional for whether the given list of
types can be used together as type-generic macro arguments."""
have_long_double = False
have_float128 = False
+ integer_float32x = any(t.floatnx for t in types)
for t in types:
- t = t.real_floating_type(floatn)
+ t = t.real_floating_type(integer_float32x)
if t.name == 'long double':
have_long_double = True
if t.name == '_Float128' or t.name == '_Float64x':
@@ -262,14 +258,15 @@ class Type(object):
return '1'
@staticmethod
- def combine_types(types, floatn):
+ def combine_types(types):
"""Return the result of combining a set of types."""
have_complex = False
combined = None
+ integer_float32x = any(t.floatnx for t in types)
for t in types:
if t.complex:
have_complex = True
- t = t.real_floating_type(floatn)
+ t = t.real_floating_type(integer_float32x)
if combined is None:
combined = t
else:
@@ -375,18 +372,8 @@ class Tests(object):
'# endif\n')
float64x_text = if_cond_text([Type.float64x_type.condition],
float64x_text)
- float32x_ext_text = ('#ifdef HUGE_VAL_F64X\n'
- 'typedef _Float64x Float32x_ext;\n'
- 'typedef __CFLOAT64X complex_Float32x_ext;\n'
- '# define FLT32X_EXT_MANT_DIG FLT64X_MANT_DIG\n'
- '#else\n'
- 'typedef _Float64 Float32x_ext;\n'
- 'typedef __CFLOAT64 complex_Float32x_ext;\n'
- '# define FLT32X_EXT_MANT_DIG FLT64_MANT_DIG\n'
- '#endif\n')
self.header_list.append(float64_text)
self.header_list.append(float64x_text)
- self.header_list.append(float32x_ext_text)
self.types_seen = set()
for t in Type.all_types_list:
self.add_type_var(t.name, t.condition)
@@ -439,39 +426,33 @@ class Tests(object):
narrowing_std = True
narrow_cond = '1'
narrow_args = [Type.double_type, Type.long_double_type]
- narrow_fallback = Type.double_type
elif ret == 'double':
narrowing = True
narrowing_std = True
narrow_cond = '1'
narrow_args = [Type.long_double_type]
- narrow_fallback = Type.long_double_type
elif ret.startswith('_Float'):
narrowing = True
- narrow_args = []
+ narrow_args_1 = []
+ narrow_args_2 = []
nret_type = None
- narrow_fallback = None
for order, real_type in sorted(Type.real_types_order.items()):
if real_type.name == ret:
nret_type = real_type
elif nret_type and real_type.name.startswith('_Float'):
- narrow_args.append(real_type)
- if (narrow_fallback is None
- and ret.endswith('x') == real_type.name.endswith('x')):
- narrow_fallback = real_type
+ if ret.endswith('x') == real_type.name.endswith('x'):
+ narrow_args_1.append(real_type)
+ else:
+ narrow_args_2.append(real_type)
+ narrow_args = narrow_args_1 + narrow_args_2
if narrow_args:
narrow_cond = ('(%s && (%s))'
% (nret_type.condition,
' || '.join(t.condition
for t in narrow_args)))
- if narrow_fallback is None:
- narrow_fallback = narrow_args[0]
- if ret == '_Float32x':
- narrow_fallback = Type.float32x_ext_type
else:
# No possible argument types, even conditionally.
narrow_cond = '0'
- narrowing_nonstd = narrowing and not narrowing_std
types = [ret] + args
for t in types:
if t != 'c' and t != 'g' and t != 'r' and t != 's':
@@ -530,19 +511,13 @@ class Tests(object):
if t == 'g' or t == 'c':
arg_types.append(Type.argument_types_list)
elif t == 'r':
- if narrowing_std:
- arg_types.append(Type.standard_real_argument_types_list)
- elif narrowing:
- arg_types.append(
- Type.non_standard_real_argument_types_list)
- else:
- arg_types.append(Type.real_argument_types_list)
+ arg_types.append(Type.real_argument_types_list)
elif t == 's':
arg_types.append(Type.standard_real_argument_types_list)
arg_types_product = list_product(arg_types)
test_num = 0
for this_args in arg_types_product:
- comb_type = Type.combine_types(this_args, narrowing_nonstd)
+ comb_type = Type.combine_types(this_args)
if narrowing:
# As long as there are no integer arguments, and as
# long as the chosen argument type is as wide as all
@@ -550,22 +525,22 @@ class Tests(object):
# of the macro call do not depend on the exact
# function chosen. In particular, for f32x functions
# when _Float64x exists, the chosen type should differ
- # for _Float32x and _Float64 arguments, but it is not
- # always possible to distinguish those types before
- # GCC 7 and the implementation does not attempt to do
- # so before GCC 8.
+ # for double / _Float32x and _Float64 arguments, but
+ # it is not always possible to distinguish those types
+ # before GCC 7 (resulting in some cases - only real
+ # arguments - where a wider argument type is used,
+ # which is semantically OK, and others - integer
+ # arguments present - where it may not be OK, but is
+ # unavoidable).
narrow_mant_dig = comb_type.real_type.mant_dig
for arg_type in this_args:
if arg_type.integer:
narrow_mant_dig = 0
else:
narrow_mant_dig = 0
- if (narrowing
- and comb_type not in narrow_args
- and narrow_fallback is not None):
- comb_type = narrow_fallback
- can_comb = Type.can_combine_types(this_args, narrowing_nonstd)
+ can_comb = Type.can_combine_types(this_args)
all_conds = [t.condition for t in this_args]
+ narrow_args_cond = '(%s)' % ' && '.join(sorted(set(all_conds)))
all_conds.append(can_comb)
if narrowing:
all_conds.append(narrow_cond)
@@ -579,10 +554,69 @@ class Tests(object):
test_func_name = 'test_%s_%d' % (macro, test_num)
test_num += 1
mant_dig = comb_type.real_type.mant_dig
+ test_mant_dig_comp = ''
+ if (narrowing
+ and comb_type not in narrow_args):
+ # The expected argument type is the first in
+ # narrow_args that can represent all the values of
+ # comb_type (which, for the supported cases, means the
+ # first with mant_dig at least as large as that for
+ # comb_type, provided this isn't the case of an IBM
+ # long double argument with binary128 type from
+ # narrow_args).
+ narrow_extra_conds = []
+ test_mant_dig_list = ['#undef NARROW_MANT_DIG\n#if 0\n']
+ for t in narrow_args:
+ t_cond = '(%s && %s && %s <= %s && %s)' % (
+ narrow_args_cond, t.condition, mant_dig, t.mant_dig,
+ Type.can_combine_types(this_args + [t]))
+ narrow_extra_conds.append(t_cond)
+ test_mant_dig_list.append('#elif %s\n'
+ '#define NARROW_MANT_DIG %s\n'
+ % (t_cond, t.mant_dig))
+ test_mant_dig_list.append('#endif\n')
+ test_mant_dig_comp = ''.join(test_mant_dig_list)
+ all_conds.append('(%s)' % ' || '.join(narrow_extra_conds))
+ # A special case where this logic isn't correct is
+ # where comb_type is the internal long_double_Float64
+ # or long_double_Float64x, which will be detected as
+ # not in narrow_args even if the actual type chosen in
+ # a particular configuration would have been in
+ # narrow_args, so check for that case and handle it
+ # appropriately. In particular, if long double has
+ # the same format as double and there are long double
+ # and _Float64 arguments, and the macro returns
+ # _Float32x, the function called should be one for
+ # _Float64 arguments, not one for _Float64x arguments
+ # that would arise from this logic.
+ if comb_type.real_type.name == 'long_double_Float64':
+ comb_type_1 = Type.long_double_type
+ comb_type_2 = Type.float64_type
+ comb_type_is_2_cond = 'LDBL_MANT_DIG <= FLT64_MANT_DIG'
+ elif comb_type.real_type.name == 'long_double_Float64x':
+ comb_type_1 = Type.long_double_type
+ comb_type_2 = Type.float64x_type
+ comb_type_is_2_cond = 'LDBL_MANT_DIG < FLT64X_MANT_DIG'
+ else:
+ comb_type_1 = None
+ comb_type_2 = None
+ if comb_type_1 is None:
+ mant_dig = 'NARROW_MANT_DIG'
+ else:
+ mant_dig = ''
+ if comb_type_1 in narrow_args:
+ mant_dig += '!(%s) ? %s : ' % (comb_type_is_2_cond,
+ comb_type_1.mant_dig)
+ if comb_type_2 in narrow_args:
+ mant_dig += '%s ? %s : ' % (comb_type_is_2_cond,
+ comb_type_2.mant_dig)
+ mant_dig += 'NARROW_MANT_DIG'
+ if narrow_mant_dig != 0:
+ narrow_mant_dig = mant_dig
test_text = '%s, "%s", "%s", %s, %s' % (test_func_name, func_name,
test_name, mant_dig,
narrow_mant_dig)
- test_text = ' { %s },\n' % test_text
+ test_text = '%s { %s },\n' % (test_mant_dig_comp, test_text)
test_text = if_cond_text(all_conds, test_text)
self.test_array_list.append(test_text)
call_args = []
@@ -730,7 +764,7 @@ class Tests(object):
' && strcmp (called_func_name,\n'
' tests[i].func_name) == 0)\n'
' num_pass++;\n'
- '#if !__GNUC_PREREQ (8, 0)\n'
+ '#if !__GNUC_PREREQ (7, 0)\n'
' else if (tests[i].narrow_mant_dig > 0\n'
' && (called_mant_dig\n'
' >= tests[i].narrow_mant_dig)\n'
@@ -747,6 +781,21 @@ class Tests(object):
' tests[i].mant_dig,\n'
' called_func_name, called_mant_dig);\n'
' }\n'
+ ' else if (tests[i].narrow_mant_dig == 0\n'
+ ' && strcmp (called_func_name,\n'
+ ' tests[i].func_name) == 0)\n'
+ ' {\n'
+ ' num_pass++;\n'
+ ' printf ("Test %zu (%s):\\n"\n'
+ ' " Expected: %s precision %d\\n"\n'
+ ' " Actual: %s precision %d\\n"\n'
+ ' " (unavoidable with old GCC)'
+ '\\n\\n",\n'
+ ' i, tests[i].test_name,\n'
+ ' tests[i].func_name,\n'
+ ' tests[i].mant_dig,\n'
+ ' called_func_name, called_mant_dig);\n'
+ ' }\n'
'#endif\n'
' else\n'
' {\n'
diff --git a/math/tgmath.h b/math/tgmath.h
index b55cb39c93575ddc..dbd165dd1882dcc4 100644
--- a/math/tgmath.h
+++ b/math/tgmath.h
@@ -37,9 +37,17 @@
for older GCC, using other compiler extensions but with macros
expanding their arguments many times (so resulting in exponential
blowup of the size of expansions when calls to such macros are
- nested inside arguments to such macros). */
+ nested inside arguments to such macros). Because of a long series
+ of defect fixes made after the initial release of TS 18661-1, GCC
+ versions before GCC 13 have __builtin_tgmath semantics that, when
+ integer arguments are passed to narrowing macros returning
+ _Float32x, or non-narrowing macros with at least two generic
+ arguments, do not always correspond to the C2X semantics, so more
+ complicated macro definitions are also used in some cases for
+ versions from GCC 8 to GCC 12. */
#define __HAVE_BUILTIN_TGMATH __GNUC_PREREQ (8, 0)
+#define __HAVE_BUILTIN_TGMATH_C2X __GNUC_PREREQ (13, 0)
#if __GNUC_PREREQ (2, 7)
@@ -135,13 +143,14 @@
__builtin_tgmath (__TGMATH_NARROW_FUNCS_F32 (F) (X), (Y))
# define __TGMATH_2_NARROW_F64(F, X, Y) \
__builtin_tgmath (__TGMATH_NARROW_FUNCS_F64 (F) (X), (Y))
-# if __HAVE_FLOAT128
+# if __HAVE_FLOAT128 && __HAVE_BUILTIN_TGMATH_C2X
# define __TGMATH_2_NARROW_F32X(F, X, Y) \
__builtin_tgmath (__TGMATH_NARROW_FUNCS_F32X (F) (X), (Y))
# endif
-# else /* !__HAVE_BUILTIN_TGMATH. */
+# endif
+# if !__HAVE_BUILTIN_TGMATH_C2X
# ifdef __NO_LONG_DOUBLE_MATH
# define __tgml(fct) fct
# else
@@ -181,13 +190,17 @@
/* Whether an expression (of arithmetic type) has a real type. */
# define __expr_is_real(E) (__builtin_classify_type (E) != 9)
+/* Type T1 if E is 1, type T2 is E is 0. */
+# define __tgmath_type_if(T1, T2, E) \
+ __typeof__ (*(0 ? (__typeof__ (0 ? (T2 *) 0 : (void *) (E))) 0 \
+ : (__typeof__ (0 ? (T1 *) 0 : (void *) (!(E)))) 0))
+
/* The tgmath real type for T, where E is 0 if T is an integer type
and 1 for a floating type. If T has a complex type, it is
unspecified whether the return type is real or complex (but it has
the correct corresponding real type). */
# define __tgmath_real_type_sub(T, E) \
- __typeof__ (*(0 ? (__typeof__ (0 ? (double *) 0 : (void *) (E))) 0 \
- : (__typeof__ (0 ? (T *) 0 : (void *) (!(E)))) 0))
+ __tgmath_type_if (T, double, E)
/* The tgmath real type of EXPR. */
# define __tgmath_real_type(expr) \
@@ -215,6 +228,56 @@
__real_integer_type (__typeof__ (+(expr))), \
__complex_integer_type (__typeof__ (+(expr))))
+/* The tgmath real type of EXPR1 combined with EXPR2, without handling
+ the C2X rule of interpreting integer arguments as _Float32x if any
+ argument is _FloatNx. */
+# define __tgmath_real_type2_base(expr1, expr2) \
+ __typeof ((__tgmath_real_type (expr1)) 0 + (__tgmath_real_type (expr2)) 0)
+
+/* The tgmath complex type of EXPR1 combined with EXPR2, without
+ handling the C2X rule of interpreting integer arguments as
+ _Float32x if any argument is _FloatNx. */
+# define __tgmath_complex_type2_base(expr1, expr2) \
+ __typeof ((__tgmath_complex_type (expr1)) 0 \
+ + (__tgmath_complex_type (expr2)) 0)
+
+/* The tgmath real type of EXPR1 combined with EXPR2 and EXPR3,
+ without handling the C2X rule of interpreting integer arguments as
+ _Float32x if any argument is _FloatNx. */
+# define __tgmath_real_type3_base(expr1, expr2, expr3) \
+ __typeof ((__tgmath_real_type (expr1)) 0 \
+ + (__tgmath_real_type (expr2)) 0 \
+ + (__tgmath_real_type (expr3)) 0)
+
+/* The tgmath real or complex type of EXPR1 combined with EXPR2 (and
+ EXPR3 if applicable). */
+# if __HAVE_FLOATN_NOT_TYPEDEF
+# define __tgmath_real_type2(expr1, expr2) \
+ __tgmath_type_if (_Float32x, __tgmath_real_type2_base (expr1, expr2), \
+ _Generic ((expr1) + (expr2), _Float32x: 1, default: 0))
+# define __tgmath_complex_type2(expr1, expr2) \
+ __tgmath_type_if (_Float32x, \
+ __tgmath_type_if (_Complex _Float32x, \
+ __tgmath_complex_type2_base (expr1, \
+ expr2), \
+ _Generic ((expr1) + (expr2), \
+ _Complex _Float32x: 1, \
+ default: 0)), \
+ _Generic ((expr1) + (expr2), _Float32x: 1, default: 0))
+# define __tgmath_real_type3(expr1, expr2, expr3) \
+ __tgmath_type_if (_Float32x, \
+ __tgmath_real_type3_base (expr1, expr2, expr3), \
+ _Generic ((expr1) + (expr2) + (expr3), \
+ _Float32x: 1, default: 0))
+# else
+# define __tgmath_real_type2(expr1, expr2) \
+ __tgmath_real_type2_base (expr1, expr2)
+# define __tgmath_complex_type2(expr1, expr2) \
+ __tgmath_complex_type2_base (expr1, expr2)
+# define __tgmath_real_type3(expr1, expr2, expr3) \
+ __tgmath_real_type3_base (expr1, expr2, expr3)
+# endif
+
# if (__HAVE_DISTINCT_FLOAT16 \
|| __HAVE_DISTINCT_FLOAT32 \
|| __HAVE_DISTINCT_FLOAT64 \
@@ -226,7 +289,10 @@
/* Expand to text that checks if ARG_COMB has type _Float128, and if
so calls the appropriately suffixed FCT (which may include a cast),
- or FCT and CFCT for complex functions, with arguments ARG_CALL. */
+ or FCT and CFCT for complex functions, with arguments ARG_CALL.
+ __TGMATH_F128LD (only used in the __HAVE_FLOAT64X_LONG_DOUBLE case,
+ for narrowing macros) handles long double the same as
+ _Float128. */
# if __HAVE_DISTINCT_FLOAT128 && __GLIBC_USE (IEC_60559_TYPES_EXT)
# if (!__HAVE_FLOAT64X \
|| __HAVE_FLOAT64X_LONG_DOUBLE \
@@ -234,6 +300,10 @@
# define __TGMATH_F128(arg_comb, fct, arg_call) \
__builtin_types_compatible_p (__typeof (+(arg_comb)), _Float128) \
? fct ## f128 arg_call :
+# define __TGMATH_F128LD(arg_comb, fct, arg_call) \
+ (__builtin_types_compatible_p (__typeof (+(arg_comb)), _Float128) \
+ || __builtin_types_compatible_p (__typeof (+(arg_comb)), long double)) \
+ ? fct ## f128 arg_call :
# define __TGMATH_CF128(arg_comb, fct, cfct, arg_call) \
__builtin_types_compatible_p (__typeof (+__real__ (arg_comb)), _Float128) \
? (__expr_is_real (arg_comb) \
@@ -259,7 +329,7 @@
# define __TGMATH_CF128(arg_comb, fct, cfct, arg_call) /* Nothing. */
# endif
-# endif /* !__HAVE_BUILTIN_TGMATH. */
+# endif /* !__HAVE_BUILTIN_TGMATH_C2X. */
/* We have two kinds of generic macros: to support functions which are
only defined on real valued parameters and those which are defined
@@ -272,14 +342,18 @@
__TGMATH_2 (Fct, (Val1), (Val2))
# define __TGMATH_BINARY_FIRST_REAL_STD_ONLY(Val1, Val2, Fct) \
__TGMATH_2STD (Fct, (Val1), (Val2))
-# define __TGMATH_BINARY_REAL_ONLY(Val1, Val2, Fct) \
+# if __HAVE_BUILTIN_TGMATH_C2X
+# define __TGMATH_BINARY_REAL_ONLY(Val1, Val2, Fct) \
__TGMATH_2 (Fct, (Val1), (Val2))
+# endif
# define __TGMATH_BINARY_REAL_STD_ONLY(Val1, Val2, Fct) \
__TGMATH_2STD (Fct, (Val1), (Val2))
-# define __TGMATH_TERNARY_FIRST_SECOND_REAL_ONLY(Val1, Val2, Val3, Fct) \
+# if __HAVE_BUILTIN_TGMATH_C2X
+# define __TGMATH_TERNARY_FIRST_SECOND_REAL_ONLY(Val1, Val2, Val3, Fct) \
__TGMATH_3 (Fct, (Val1), (Val2), (Val3))
-# define __TGMATH_TERNARY_REAL_ONLY(Val1, Val2, Val3, Fct) \
+# define __TGMATH_TERNARY_REAL_ONLY(Val1, Val2, Val3, Fct) \
__TGMATH_3 (Fct, (Val1), (Val2), (Val3))
+# endif
# define __TGMATH_TERNARY_FIRST_REAL_RET_ONLY(Val1, Val2, Val3, Fct) \
__TGMATH_3 (Fct, (Val1), (Val2), (Val3))
# define __TGMATH_UNARY_REAL_IMAG(Val, Fct, Cfct) \
@@ -289,11 +363,14 @@
__TGMATH_1C (Fct, Cfct, (Val))
# define __TGMATH_UNARY_REAL_IMAG_RET_REAL_SAME(Val, Cfct) \
__TGMATH_1 (Cfct, (Val))
-# define __TGMATH_BINARY_REAL_IMAG(Val1, Val2, Fct, Cfct) \
+# if __HAVE_BUILTIN_TGMATH_C2X
+# define __TGMATH_BINARY_REAL_IMAG(Val1, Val2, Fct, Cfct) \
__TGMATH_2C (Fct, Cfct, (Val1), (Val2))
+# endif
-# else /* !__HAVE_BUILTIN_TGMATH. */
+# endif
+# if !__HAVE_BUILTIN_TGMATH
# define __TGMATH_UNARY_REAL_ONLY(Val, Fct) \
(__extension__ ((sizeof (+(Val)) == sizeof (double) \
|| __builtin_classify_type (Val) != 8) \
@@ -330,29 +407,28 @@
: (sizeof (+(Val1)) == sizeof (float)) \
? (__tgmath_real_type (Val1)) Fct##f (Val1, Val2) \
: (__tgmath_real_type (Val1)) __tgml(Fct) (Val1, Val2)))
+# endif
+# if !__HAVE_BUILTIN_TGMATH_C2X
# define __TGMATH_BINARY_REAL_ONLY(Val1, Val2, Fct) \
(__extension__ ((sizeof ((Val1) + (Val2)) > sizeof (double) \
&& __builtin_classify_type ((Val1) + (Val2)) == 8) \
? __TGMATH_F128 ((Val1) + (Val2), \
- (__typeof \
- ((__tgmath_real_type (Val1)) 0 \
- + (__tgmath_real_type (Val2)) 0)) Fct, \
+ (__tgmath_real_type2 (Val1, Val2)) Fct, \
(Val1, Val2)) \
- (__typeof ((__tgmath_real_type (Val1)) 0 \
- + (__tgmath_real_type (Val2)) 0)) \
+ (__tgmath_real_type2 (Val1, Val2)) \
__tgml(Fct) (Val1, Val2) \
: (sizeof (+(Val1)) == sizeof (double) \
|| sizeof (+(Val2)) == sizeof (double) \
|| __builtin_classify_type (Val1) != 8 \
|| __builtin_classify_type (Val2) != 8) \
- ? (__typeof ((__tgmath_real_type (Val1)) 0 \
- + (__tgmath_real_type (Val2)) 0)) \
+ ? (__tgmath_real_type2 (Val1, Val2)) \
Fct (Val1, Val2) \
- : (__typeof ((__tgmath_real_type (Val1)) 0 \
- + (__tgmath_real_type (Val2)) 0)) \
+ : (__tgmath_real_type2 (Val1, Val2)) \
Fct##f (Val1, Val2)))
+# endif
+# if !__HAVE_BUILTIN_TGMATH
# define __TGMATH_BINARY_REAL_STD_ONLY(Val1, Val2, Fct) \
(__extension__ ((sizeof ((Val1) + (Val2)) > sizeof (double) \
&& __builtin_classify_type ((Val1) + (Val2)) == 8) \
@@ -369,27 +445,24 @@
: (__typeof ((__tgmath_real_type (Val1)) 0 \
+ (__tgmath_real_type (Val2)) 0)) \
Fct##f (Val1, Val2)))
+# endif
+# if !__HAVE_BUILTIN_TGMATH_C2X
# define __TGMATH_TERNARY_FIRST_SECOND_REAL_ONLY(Val1, Val2, Val3, Fct) \
(__extension__ ((sizeof ((Val1) + (Val2)) > sizeof (double) \
&& __builtin_classify_type ((Val1) + (Val2)) == 8) \
? __TGMATH_F128 ((Val1) + (Val2), \
- (__typeof \
- ((__tgmath_real_type (Val1)) 0 \
- + (__tgmath_real_type (Val2)) 0)) Fct, \
+ (__tgmath_real_type2 (Val1, Val2)) Fct, \
(Val1, Val2, Val3)) \
- (__typeof ((__tgmath_real_type (Val1)) 0 \
- + (__tgmath_real_type (Val2)) 0)) \
+ (__tgmath_real_type2 (Val1, Val2)) \
__tgml(Fct) (Val1, Val2, Val3) \
: (sizeof (+(Val1)) == sizeof (double) \
|| sizeof (+(Val2)) == sizeof (double) \
|| __builtin_classify_type (Val1) != 8 \
|| __builtin_classify_type (Val2) != 8) \
- ? (__typeof ((__tgmath_real_type (Val1)) 0 \
- + (__tgmath_real_type (Val2)) 0)) \
+ ? (__tgmath_real_type2 (Val1, Val2)) \
Fct (Val1, Val2, Val3) \
- : (__typeof ((__tgmath_real_type (Val1)) 0 \
- + (__tgmath_real_type (Val2)) 0)) \
+ : (__tgmath_real_type2 (Val1, Val2)) \
Fct##f (Val1, Val2, Val3)))
# define __TGMATH_TERNARY_REAL_ONLY(Val1, Val2, Val3, Fct) \
@@ -397,14 +470,10 @@
&& __builtin_classify_type ((Val1) + (Val2) + (Val3)) \
== 8) \
? __TGMATH_F128 ((Val1) + (Val2) + (Val3), \
- (__typeof \
- ((__tgmath_real_type (Val1)) 0 \
- + (__tgmath_real_type (Val2)) 0 \
- + (__tgmath_real_type (Val3)) 0)) Fct, \
+ (__tgmath_real_type3 (Val1, Val2, \
+ Val3)) Fct, \
(Val1, Val2, Val3)) \
- (__typeof ((__tgmath_real_type (Val1)) 0 \
- + (__tgmath_real_type (Val2)) 0 \
- + (__tgmath_real_type (Val3)) 0)) \
+ (__tgmath_real_type3 (Val1, Val2, Val3)) \
__tgml(Fct) (Val1, Val2, Val3) \
: (sizeof (+(Val1)) == sizeof (double) \
|| sizeof (+(Val2)) == sizeof (double) \
@@ -412,15 +481,13 @@
|| __builtin_classify_type (Val1) != 8 \
|| __builtin_classify_type (Val2) != 8 \
|| __builtin_classify_type (Val3) != 8) \
- ? (__typeof ((__tgmath_real_type (Val1)) 0 \
- + (__tgmath_real_type (Val2)) 0 \
- + (__tgmath_real_type (Val3)) 0)) \
+ ? (__tgmath_real_type3 (Val1, Val2, Val3)) \
Fct (Val1, Val2, Val3) \
- : (__typeof ((__tgmath_real_type (Val1)) 0 \
- + (__tgmath_real_type (Val2)) 0 \
- + (__tgmath_real_type (Val3)) 0)) \
+ : (__tgmath_real_type3 (Val1, Val2, Val3)) \
Fct##f (Val1, Val2, Val3)))
+# endif
+# if !__HAVE_BUILTIN_TGMATH
# define __TGMATH_TERNARY_FIRST_REAL_RET_ONLY(Val1, Val2, Val3, Fct) \
(__extension__ ((sizeof (+(Val1)) == sizeof (double) \
|| __builtin_classify_type (Val1) != 8) \
@@ -496,7 +563,9 @@
__tgml(Cfct) (Val))))
# define __TGMATH_UNARY_REAL_IMAG_RET_REAL_SAME(Val, Cfct) \
__TGMATH_UNARY_REAL_IMAG_RET_REAL ((Val), Cfct, Cfct)
+# endif
+# if !__HAVE_BUILTIN_TGMATH_C2X
/* XXX This definition has to be changed as soon as the compiler understands
the imaginary keyword. */
# define __TGMATH_BINARY_REAL_IMAG(Val1, Val2, Fct, Cfct) \
@@ -505,46 +574,39 @@
&& __builtin_classify_type (__real__ (Val1) \
+ __real__ (Val2)) == 8) \
? __TGMATH_CF128 ((Val1) + (Val2), \
- (__typeof \
- ((__tgmath_complex_type (Val1)) 0 \
- + (__tgmath_complex_type (Val2)) 0)) \
+ (__tgmath_complex_type2 (Val1, Val2)) \
Fct, \
- (__typeof \
- ((__tgmath_complex_type (Val1)) 0 \
- + (__tgmath_complex_type (Val2)) 0)) \
+ (__tgmath_complex_type2 (Val1, Val2)) \
Cfct, \
(Val1, Val2)) \
(__expr_is_real ((Val1) + (Val2)) \
- ? (__typeof ((__tgmath_complex_type (Val1)) 0 \
- + (__tgmath_complex_type (Val2)) 0)) \
+ ? (__tgmath_complex_type2 (Val1, Val2)) \
__tgml(Fct) (Val1, Val2) \
- : (__typeof ((__tgmath_complex_type (Val1)) 0 \
- + (__tgmath_complex_type (Val2)) 0)) \
+ : (__tgmath_complex_type2 (Val1, Val2)) \
__tgml(Cfct) (Val1, Val2)) \
: (sizeof (+__real__ (Val1)) == sizeof (double) \
|| sizeof (+__real__ (Val2)) == sizeof (double) \
|| __builtin_classify_type (__real__ (Val1)) != 8 \
|| __builtin_classify_type (__real__ (Val2)) != 8) \
? (__expr_is_real ((Val1) + (Val2)) \
- ? (__typeof ((__tgmath_complex_type (Val1)) 0 \
- + (__tgmath_complex_type (Val2)) 0)) \
+ ? (__tgmath_complex_type2 (Val1, Val2)) \
Fct (Val1, Val2) \
- : (__typeof ((__tgmath_complex_type (Val1)) 0 \
- + (__tgmath_complex_type (Val2)) 0)) \
+ : (__tgmath_complex_type2 (Val1, Val2)) \
Cfct (Val1, Val2)) \
: (__expr_is_real ((Val1) + (Val2)) \
- ? (__typeof ((__tgmath_complex_type (Val1)) 0 \
- + (__tgmath_complex_type (Val2)) 0)) \
+ ? (__tgmath_complex_type2 (Val1, Val2)) \
Fct##f (Val1, Val2) \
- : (__typeof ((__tgmath_complex_type (Val1)) 0 \
- + (__tgmath_complex_type (Val2)) 0)) \
+ : (__tgmath_complex_type2 (Val1, Val2)) \
Cfct##f (Val1, Val2))))
+# endif
+# if !__HAVE_BUILTIN_TGMATH
# define __TGMATH_2_NARROW_F(F, X, Y) \
(__extension__ (sizeof ((__tgmath_real_type (X)) 0 \
+ (__tgmath_real_type (Y)) 0) > sizeof (double) \
? F ## l (X, Y) \
: F (X, Y)))
+# endif
/* In most cases, these narrowing macro definitions based on sizeof
ensure that the function called has the right argument format, as
for other <tgmath.h> macros for compilers before GCC 8, but may not
@@ -553,35 +615,50 @@
In the case of macros for _Float32x return type, when _Float64x
exists, _Float64 arguments should result in the *f64 function being
- called while _Float32x arguments should result in the *f64x
- function being called. These cases cannot be distinguished using
- sizeof (or at all if the types are typedefs rather than different
- types). However, for these functions it is OK (does not affect the
- final result) to call a function with any argument format at least
- as wide as all the floating-point arguments, unless that affects
- rounding of integer arguments. Integer arguments are considered to
- have type _Float64, so the *f64 functions are preferred for f32x*
- macros when no argument has a wider floating-point type. */
-# if __HAVE_FLOAT64X_LONG_DOUBLE && __HAVE_DISTINCT_FLOAT128
+ called while _Float32x, float and double arguments should result in
+ the *f64x function being called (and integer arguments are
+ considered to have type _Float32x if any argument has type
+ _FloatNx, or double otherwise). These cases cannot be
+ distinguished using sizeof (or at all if the types are typedefs
+ rather than different types, in which case we err on the side of
+ using the wider type if unsure). */
+# if !__HAVE_BUILTIN_TGMATH_C2X
+# if __HAVE_FLOATN_NOT_TYPEDEF
+# define __TGMATH_NARROW_F32X_USE_F64X(X) \
+ !__builtin_types_compatible_p (__typeof (+(X)), _Float64)
+# else
+# define __TGMATH_NARROW_F32X_USE_F64X(X) \
+ (__builtin_types_compatible_p (__typeof (+(X)), double) \
+ || __builtin_types_compatible_p (__typeof (+(X)), float) \
+ || !__floating_type (__typeof (+(X))))
+# endif
+# endif
+# if __HAVE_FLOAT64X_LONG_DOUBLE && __HAVE_DISTINCT_FLOAT128
+# if !__HAVE_BUILTIN_TGMATH
# define __TGMATH_2_NARROW_F32(F, X, Y) \
(__extension__ (sizeof ((__tgmath_real_type (X)) 0 \
+ (__tgmath_real_type (Y)) 0) > sizeof (_Float64) \
- ? __TGMATH_F128 ((X) + (Y), F, (X, Y)) \
+ ? __TGMATH_F128LD ((X) + (Y), F, (X, Y)) \
F ## f64x (X, Y) \
: F ## f64 (X, Y)))
# define __TGMATH_2_NARROW_F64(F, X, Y) \
(__extension__ (sizeof ((__tgmath_real_type (X)) 0 \
+ (__tgmath_real_type (Y)) 0) > sizeof (_Float64) \
- ? __TGMATH_F128 ((X) + (Y), F, (X, Y)) \
+ ? __TGMATH_F128LD ((X) + (Y), F, (X, Y)) \
F ## f64x (X, Y) \
: F ## f128 (X, Y)))
+# endif
+# if !__HAVE_BUILTIN_TGMATH_C2X
# define __TGMATH_2_NARROW_F32X(F, X, Y) \
(__extension__ (sizeof ((__tgmath_real_type (X)) 0 \
+ (__tgmath_real_type (Y)) 0) > sizeof (_Float64) \
+ || __TGMATH_NARROW_F32X_USE_F64X ((X) + (Y)) \
? __TGMATH_F128 ((X) + (Y), F, (X, Y)) \
F ## f64x (X, Y) \
: F ## f64 (X, Y)))
-# elif __HAVE_FLOAT128
+# endif
+# elif __HAVE_FLOAT128
+# if !__HAVE_BUILTIN_TGMATH
# define __TGMATH_2_NARROW_F32(F, X, Y) \
(__extension__ (sizeof ((__tgmath_real_type (X)) 0 \
+ (__tgmath_real_type (Y)) 0) > sizeof (_Float64) \
@@ -589,16 +666,21 @@
: F ## f64 (X, Y)))
# define __TGMATH_2_NARROW_F64(F, X, Y) \
(F ## f128 (X, Y))
+# endif
+# if !__HAVE_BUILTIN_TGMATH_C2X
# define __TGMATH_2_NARROW_F32X(F, X, Y) \
(__extension__ (sizeof ((__tgmath_real_type (X)) 0 \
+ (__tgmath_real_type (Y)) 0) > sizeof (_Float32x) \
+ || __TGMATH_NARROW_F32X_USE_F64X ((X) + (Y)) \
? F ## f64x (X, Y) \
: F ## f64 (X, Y)))
-# else
+# endif
+# else
+# if !__HAVE_BUILTIN_TGMATH
# define __TGMATH_2_NARROW_F32(F, X, Y) \
(F ## f64 (X, Y))
# endif
-# endif /* !__HAVE_BUILTIN_TGMATH. */
+# endif
#else
# error "Unsupported compiler; you cannot use <tgmath.h>"
#endif

@ -0,0 +1,36 @@
commit c8126360dfa98024cc40bce915e126309993cdf9
Author: Joseph Myers <joseph@codesourcery.com>
Date: Mon Aug 23 16:18:42 2021 +0000
Fix iconv build with GCC mainline
Current GCC mainline produces -Wstringop-overflow errors building some
iconv converters, as discussed at
<https://gcc.gnu.org/pipermail/gcc/2021-July/236943.html>. Add an
__builtin_unreachable call as suggested so that GCC can see the case
that would involve a buffer overflow is unreachable; because the
unreachability depends on valid conversion state being passed into the
function from previous conversion steps, it's not something the
compiler can reasonably deduce on its own.
Tested with build-many-glibcs.py that, together with
<https://sourceware.org/pipermail/libc-alpha/2021-August/130244.html>,
it restores the glibc build for powerpc-linux-gnu.
diff --git a/iconv/loop.c b/iconv/loop.c
index 062cc1b868c1efa5..560a5f6394e8faf6 100644
--- a/iconv/loop.c
+++ b/iconv/loop.c
@@ -436,6 +436,12 @@ SINGLE(LOOPFCT) (struct __gconv_step *step,
return __GCONV_FULL_OUTPUT;
/* Now add characters from the normal input buffer. */
+ if (inlen >= MAX_NEEDED_INPUT)
+ /* Avoid a -Wstringop-overflow= warning when this loop is
+ unrolled. The compiler cannot otherwise see that this is
+ unreachable because it depends on (state->__count & 7) not
+ being too large after a previous conversion step. */
+ __builtin_unreachable ();
do
bytebuf[inlen++] = *inptr++;
while (inlen < MAX_NEEDED_INPUT && inptr < inend);

@ -0,0 +1,32 @@
commit 3edc4ff2ceff4a59587ebecb94148d3bcfa1df62
Author: Paul Eggert <eggert@cs.ucla.edu>
Date: Wed Jul 19 14:09:26 2023 -0700
make struct pthread a complete type
* nptl/descr.h (struct pthread): Remove end_padding member, which
made this type incomplete.
(PTHREAD_STRUCT_END_PADDING): Stop using end_padding.
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
diff --git a/nptl/descr.h b/nptl/descr.h
index dfef9c4bda075d13..c487846c171f9434 100644
--- a/nptl/descr.h
+++ b/nptl/descr.h
@@ -412,11 +412,11 @@ struct pthread
/* rseq area registered with the kernel. */
struct rseq rseq_area;
- /* This member must be last. */
- char end_padding[];
-
+ /* Amount of end padding, if any, in this structure.
+ This definition relies on rseq_area being last. */
#define PTHREAD_STRUCT_END_PADDING \
- (sizeof (struct pthread) - offsetof (struct pthread, end_padding))
+ (sizeof (struct pthread) - offsetof (struct pthread, rseq_area) \
+ + sizeof (struct rseq))
} __attribute ((aligned (TCB_ALIGNMENT)));
static inline bool

@ -0,0 +1,56 @@
commit 2c6b4b272e6b4d07303af25709051c3e96288f2d
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Jul 21 16:18:18 2023 +0200
nptl: Unconditionally use a 32-byte rseq area
If the kernel headers provide a larger struct rseq, we used that
size as the argument to the rseq system call. As a result,
rseq registration would fail on older kernels which only accept
size 32.
Conflicts:
nptl/descr.h
(upstream has an additional #include <internal-sigset.h>)
diff --git a/nptl/descr.h b/nptl/descr.h
index c487846c171f9434..eded9eca38e7275e 100644
--- a/nptl/descr.h
+++ b/nptl/descr.h
@@ -35,7 +35,6 @@
#include <bits/types/res_state.h>
#include <kernel-features.h>
#include <tls-internal-struct.h>
-#include <sys/rseq.h>
#ifndef TCB_ALIGNMENT
# define TCB_ALIGNMENT 32
@@ -409,14 +408,25 @@ struct pthread
/* Used on strsignal. */
struct tls_internal_t tls_state;
- /* rseq area registered with the kernel. */
- struct rseq rseq_area;
+ /* rseq area registered with the kernel. Use a custom definition
+ here to isolate from kernel struct rseq changes. The
+ implementation of sched_getcpu needs acccess to the cpu_id field;
+ the other fields are unused and not included here. */
+ union
+ {
+ struct
+ {
+ uint32_t cpu_id_start;
+ uint32_t cpu_id;
+ };
+ char pad[32]; /* Original rseq area size. */
+ } rseq_area __attribute__ ((aligned (32)));
/* Amount of end padding, if any, in this structure.
This definition relies on rseq_area being last. */
#define PTHREAD_STRUCT_END_PADDING \
(sizeof (struct pthread) - offsetof (struct pthread, rseq_area) \
- + sizeof (struct rseq))
+ + sizeof ((struct pthread) {}.rseq_area))
} __attribute ((aligned (TCB_ALIGNMENT)));
static inline bool

@ -0,0 +1,27 @@
commit 637aac2ae3980de31a6baab236a9255fe853cc76
Author: Stefan Liebler <stli@linux.ibm.com>
Date: Tue Jul 25 11:34:30 2023 +0200
Include sys/rseq.h in tst-rseq-disable.c
Starting with commit 2c6b4b272e6b4d07303af25709051c3e96288f2d
"nptl: Unconditionally use a 32-byte rseq area", the testcase
misc/tst-rseq-disable is UNSUPPORTED as RSEQ_SIG is not defined.
The mentioned commit removes inclusion of sys/rseq.h in nptl/descr.h.
Thus just include sys/rseq.h in the tst-rseq-disable.c as also done
in tst-rseq.c and tst-rseq-nptl.c.
Reviewed-by: Florian Weimer <fweimer@redhat.com>
diff --git a/sysdeps/unix/sysv/linux/tst-rseq-disable.c b/sysdeps/unix/sysv/linux/tst-rseq-disable.c
index 6d73f77e9621da42..964dc728ac5e7b95 100644
--- a/sysdeps/unix/sysv/linux/tst-rseq-disable.c
+++ b/sysdeps/unix/sysv/linux/tst-rseq-disable.c
@@ -22,6 +22,7 @@
#include <support/xthread.h>
#include <sysdep.h>
#include <thread_pointer.h>
+#include <sys/rseq.h>
#include <unistd.h>
#ifdef RSEQ_SIG

@ -0,0 +1,26 @@
commit 23ee92deea4c99d0e6a5f48fa7b942909b123ec5
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Jul 20 18:31:48 2023 +0200
debug: Mark libSegFault.so as NODELETE
The signal handler installed in the ELF constructor cannot easily
be removed again (because the program may have changed handlers
in the meantime). Mark the object as NODELETE so that the registered
handler function is never unloaded.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/debug/Makefile b/debug/Makefile
index 71248e0d457a5b12..9fbc40dc69b477ca 100644
--- a/debug/Makefile
+++ b/debug/Makefile
@@ -213,6 +213,8 @@ extra-libs-others = $(extra-libs)
libSegFault-routines = segfault
libSegFault-inhibit-o = $(filter-out .os,$(object-suffixes))
+# libSegFault.so installs a signal handler in its ELF constructor.
+LDFLAGS-SegFault.so = -Wl,--enable-new-dtags,-z,nodelete
libpcprofile-routines = pcprofile
libpcprofile-inhibit-o = $(filter-out .os,$(object-suffixes))

@ -0,0 +1,58 @@
commit e2f68b54e8052da14680074fc5df03153216f218
Author: Siddhesh Poyarekar <siddhesh@sourceware.org>
Date: Thu Mar 17 11:16:57 2022 +0530
nss: Sort tests and tests-container and put one test per line
Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
diff --git a/nss/Makefile b/nss/Makefile
index 716bc8f6ef5276b0..aa6d350f2f859d12 100644
--- a/nss/Makefile
+++ b/nss/Makefile
@@ -56,21 +56,30 @@ extra-objs += $(makedb-modules:=.o)
tests-static = tst-field
tests-internal = tst-field
-tests = test-netdb test-digits-dots tst-nss-getpwent bug17079 \
- tst-nss-test1 \
- tst-nss-test2 \
- tst-nss-test4 \
- tst-nss-test5 \
- tst-nss-test_errno
-xtests = bug-erange
-
-tests-container = \
- tst-nss-compat1 \
- tst-nss-test3 \
- tst-nss-files-hosts-long \
- tst-nss-db-endpwent \
- tst-nss-db-endgrent \
- tst-reload1 tst-reload2
+
+tests := \
+ bug17079 \
+ test-digits-dots \
+ test-netdb \
+ tst-nss-getpwent \
+ tst-nss-test1 \
+ tst-nss-test2 \
+ tst-nss-test4 \
+ tst-nss-test5 \
+ tst-nss-test_errno \
+# tests
+
+xtests = bug-erange
+
+tests-container := \
+ tst-nss-compat1 \
+ tst-nss-db-endgrent \
+ tst-nss-db-endpwent \
+ tst-nss-files-hosts-long \
+ tst-nss-test3 \
+ tst-reload1 \
+ tst-reload2 \
+# tests-container
# Tests which need libdl
ifeq (yes,$(build-shared))

@ -0,0 +1,243 @@
commit c437631485a85c3bd034e12f53dd1c8207f05940
Author: Andreas Schwab <schwab@suse.de>
Date: Wed Jul 19 09:55:31 2023 +0200
Restore lookup of IPv4 mapped addresses in files database (bug 25457)
This was broken by commit 9c02d0784d ("nss_files: Remove RES_USE_INET6
from hosts processing"), which removed too much.
Conflicts:
nss/Makefile
(missing tst-nss-gai-actions downstream)
diff --git a/nss/Makefile b/nss/Makefile
index aa6d350f2f859d12..f01674a16e720b88 100644
--- a/nss/Makefile
+++ b/nss/Makefile
@@ -76,6 +76,7 @@ tests-container := \
tst-nss-db-endgrent \
tst-nss-db-endpwent \
tst-nss-files-hosts-long \
+ tst-nss-files-hosts-v4mapped \
tst-nss-test3 \
tst-reload1 \
tst-reload2 \
diff --git a/nss/nss_files/files-hosts.c b/nss/nss_files/files-hosts.c
index d54d91d038a03c18..f6f11fb18b43fa9f 100644
--- a/nss/nss_files/files-hosts.c
+++ b/nss/nss_files/files-hosts.c
@@ -27,6 +27,7 @@
#include <nss.h>
/* Get implementation for some internal functions. */
+#include "../resolv/mapv4v6addr.h"
#include "../resolv/res_hconf.h"
@@ -34,8 +35,8 @@
#define DATABASE "hosts"
#define NEED_H_ERRNO
-#define EXTRA_ARGS , af
-#define EXTRA_ARGS_DECL , int af
+#define EXTRA_ARGS , af, flags
+#define EXTRA_ARGS_DECL , int af, int flags
#define ENTDATA hostent_data
struct hostent_data
@@ -60,8 +61,12 @@ LINE_PARSER
af = af == AF_UNSPEC ? AF_INET : af;
else
{
- if (af == AF_INET
- && __inet_pton (AF_INET6, addr, entdata->host_addr) > 0)
+ if (af == AF_INET6 && (flags & AI_V4MAPPED) != 0
+ && __inet_pton (AF_INET, addr, entdata->host_addr) > 0)
+ map_v4v6_address ((char *) entdata->host_addr,
+ (char *) entdata->host_addr);
+ else if (af == AF_INET
+ && __inet_pton (AF_INET6, addr, entdata->host_addr) > 0)
{
if (IN6_IS_ADDR_V4MAPPED (entdata->host_addr))
memcpy (entdata->host_addr, entdata->host_addr + 12, INADDRSZ);
@@ -94,13 +99,14 @@ LINE_PARSER
STRING_FIELD (result->h_name, isspace, 1);
})
-#define EXTRA_ARGS_VALUE , AF_INET
+#define EXTRA_ARGS_VALUE , AF_INET, 0
#include "files-XXX.c"
#undef EXTRA_ARGS_VALUE
/* We only need to consider IPv4 mapped addresses if the input to the
gethostbyaddr() function is an IPv6 address. */
-#define EXTRA_ARGS_VALUE , af
+#define EXTRA_ARGS_VALUE \
+ , af, (len == IN6ADDRSZ ? AI_V4MAPPED : 0)
DB_LOOKUP (hostbyaddr, ,,,
{
if (result->h_length == (int) len
@@ -152,7 +158,7 @@ gethostbyname3_multi (FILE * stream, const char *name, int af,
while (true)
{
status = internal_getent (stream, &tmp_result_buf, tmp_buffer.data,
- tmp_buffer.length, errnop, herrnop, af);
+ tmp_buffer.length, errnop, herrnop, af, 0);
/* Enlarge the buffer if necessary. */
if (status == NSS_STATUS_TRYAGAIN && *herrnop == NETDB_INTERNAL
&& *errnop == ERANGE)
@@ -337,7 +343,7 @@ _nss_files_gethostbyname3_r (const char *name, int af, struct hostent *result,
if (status == NSS_STATUS_SUCCESS)
{
while ((status = internal_getent (stream, result, buffer, buflen, errnop,
- herrnop, af))
+ herrnop, af, 0))
== NSS_STATUS_SUCCESS)
{
LOOKUP_NAME_CASE (h_name, h_aliases)
@@ -402,7 +408,7 @@ _nss_files_gethostbyname4_r (const char *name, struct gaih_addrtuple **pat,
struct hostent result;
status = internal_getent (stream, &result, buffer, buflen, errnop,
- herrnop, AF_UNSPEC);
+ herrnop, AF_UNSPEC, 0);
if (status != NSS_STATUS_SUCCESS)
break;
diff --git a/nss/tst-nss-files-hosts-v4mapped.c b/nss/tst-nss-files-hosts-v4mapped.c
new file mode 100644
index 0000000000000000..45582904e68adf29
--- /dev/null
+++ b/nss/tst-nss-files-hosts-v4mapped.c
@@ -0,0 +1,41 @@
+/* Test lookup of IPv4 mapped addresses in files database (bug 25457)
+
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public License as
+ published by the Free Software Foundation; either version 2.1 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If
+ not, see <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+
+static int
+do_test (void)
+{
+ struct sockaddr_in6 s6 = { .sin6_family = AF_INET6 };
+ inet_pton (AF_INET6, "::ffff:192.168.0.1", &s6.sin6_addr);
+
+ char node[NI_MAXHOST];
+ int res = getnameinfo ((struct sockaddr *) &s6, sizeof (s6), node,
+ sizeof (node), NULL, 0, NI_NAMEREQD);
+ if (res)
+ printf ("%d %s\n", res, gai_strerror (res));
+ else
+ printf ("node=%s\n", node);
+
+ return res != 0;
+}
+
+#include <support/test-driver.c>
diff --git a/nss/tst-nss-files-hosts-v4mapped.root/etc/hosts b/nss/tst-nss-files-hosts-v4mapped.root/etc/hosts
new file mode 100644
index 0000000000000000..609b25ad39a879b4
--- /dev/null
+++ b/nss/tst-nss-files-hosts-v4mapped.root/etc/hosts
@@ -0,0 +1 @@
+192.168.0.1 v4mapped
diff --git a/nss/tst-nss-files-hosts-v4mapped.root/etc/nsswitch.conf b/nss/tst-nss-files-hosts-v4mapped.root/etc/nsswitch.conf
new file mode 100644
index 0000000000000000..5b0c6a419937a013
--- /dev/null
+++ b/nss/tst-nss-files-hosts-v4mapped.root/etc/nsswitch.conf
@@ -0,0 +1 @@
+hosts: files
diff --git a/resolv/mapv4v6addr.h b/resolv/mapv4v6addr.h
new file mode 100644
index 0000000000000000..7f85f7d5e393ec5f
--- /dev/null
+++ b/resolv/mapv4v6addr.h
@@ -0,0 +1,69 @@
+/*
+ * ++Copyright++ 1985, 1988, 1993
+ * -
+ * Copyright (c) 1985, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * -
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ * -
+ * --Copyright--
+ */
+
+#include <string.h>
+#include <arpa/nameser.h>
+
+static void
+map_v4v6_address (const char *src, char *dst)
+{
+ u_char *p = (u_char *) dst;
+ int i;
+
+ /* Move the IPv4 part to the right position. */
+ memcpy (dst + 12, src, INADDRSZ);
+
+ /* Mark this ipv6 addr as a mapped ipv4. */
+ for (i = 0; i < 10; i++)
+ *p++ = 0x00;
+ *p++ = 0xff;
+ *p = 0xff;
+}

@ -0,0 +1,66 @@
commit 89c017de2f52d17862bda9a6f8382e913457bfbe
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Tue Jan 3 13:06:48 2023 -0800
x86: Check minimum/maximum of non_temporal_threshold [BZ #29953]
The minimum non_temporal_threshold is 0x4040. non_temporal_threshold may
be set to less than the minimum value when the shared cache size isn't
available (e.g., in an emulator) or by the tunable. Add checks for
minimum and maximum of non_temporal_threshold.
This fixes BZ #29953.
(cherry picked from commit 48b74865c63840b288bd85b4d8743533b73b339b)
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index a7d2cc5fef03884b..f2d2de458db7358c 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -747,6 +747,18 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
share of the cache, it has a substantial risk of negatively
impacting the performance of other threads running on the chip. */
unsigned long int non_temporal_threshold = shared * 3 / 4;
+ /* SIZE_MAX >> 4 because memmove-vec-unaligned-erms right-shifts the value of
+ 'x86_non_temporal_threshold' by `LOG_4X_MEMCPY_THRESH` (4) and it is best
+ if that operation cannot overflow. Minimum of 0x4040 (16448) because the
+ L(large_memset_4x) loops need 64-byte to cache align and enough space for
+ at least 1 iteration of 4x PAGE_SIZE unrolled loop. Both values are
+ reflected in the manual. */
+ unsigned long int maximum_non_temporal_threshold = SIZE_MAX >> 4;
+ unsigned long int minimum_non_temporal_threshold = 0x4040;
+ if (non_temporal_threshold < minimum_non_temporal_threshold)
+ non_temporal_threshold = minimum_non_temporal_threshold;
+ else if (non_temporal_threshold > maximum_non_temporal_threshold)
+ non_temporal_threshold = maximum_non_temporal_threshold;
#if HAVE_TUNABLES
/* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */
@@ -801,8 +813,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
shared = tunable_size;
tunable_size = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL);
- /* NB: Ignore the default value 0. */
- if (tunable_size != 0)
+ if (tunable_size > minimum_non_temporal_threshold
+ && tunable_size <= maximum_non_temporal_threshold)
non_temporal_threshold = tunable_size;
tunable_size = TUNABLE_GET (x86_rep_movsb_threshold, long int, NULL);
@@ -817,14 +829,9 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX);
TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX);
- /* SIZE_MAX >> 4 because memmove-vec-unaligned-erms right-shifts the value of
- 'x86_non_temporal_threshold' by `LOG_4X_MEMCPY_THRESH` (4) and it is best
- if that operation cannot overflow. Minimum of 0x4040 (16448) because the
- L(large_memset_4x) loops need 64-byte to cache align and enough space for
- at least 1 iteration of 4x PAGE_SIZE unrolled loop. Both values are
- reflected in the manual. */
TUNABLE_SET_WITH_BOUNDS (x86_non_temporal_threshold, non_temporal_threshold,
- 0x4040, SIZE_MAX >> 4);
+ minimum_non_temporal_threshold,
+ maximum_non_temporal_threshold);
TUNABLE_SET_WITH_BOUNDS (x86_rep_movsb_threshold, rep_movsb_threshold,
minimum_rep_movsb_threshold, SIZE_MAX);
TUNABLE_SET_WITH_BOUNDS (x86_rep_stosb_threshold, rep_stosb_threshold, 1,

@ -0,0 +1,122 @@
commit 6a0d56b009e34caea9cbc0bbec3272345ea8f55a
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Feb 21 09:20:28 2023 +0100
gshadow: Matching sgetsgent, sgetsgent_r ERANGE handling (bug 30151)
Before this change, sgetsgent_r did not set errno to ERANGE, but
sgetsgent only check errno, not the return value from sgetsgent_r.
Consequently, sgetsgent did not detect any error, and reported
success to the caller, without initializing the struct sgrp object
whose address was returned.
This commit changes sgetsgent_r to set errno as well. This avoids
similar issues in applications which only change errno.
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
(cherry picked from commit 969e9733c7d17edf1e239a73fa172f357561f440)
diff --git a/gshadow/Makefile b/gshadow/Makefile
index 2fdd0c7641d3655f..094a6c7b5aaad16b 100644
--- a/gshadow/Makefile
+++ b/gshadow/Makefile
@@ -26,7 +26,7 @@ headers = gshadow.h
routines = getsgent getsgnam sgetsgent fgetsgent putsgent \
getsgent_r getsgnam_r sgetsgent_r fgetsgent_r
-tests = tst-gshadow tst-putsgent tst-fgetsgent_r
+tests = tst-gshadow tst-putsgent tst-fgetsgent_r tst-sgetsgent
CFLAGS-getsgent_r.c += -fexceptions
CFLAGS-getsgent.c += -fexceptions
diff --git a/gshadow/sgetsgent_r.c b/gshadow/sgetsgent_r.c
index 565463b07e8facbe..ae500d64c386d964 100644
--- a/gshadow/sgetsgent_r.c
+++ b/gshadow/sgetsgent_r.c
@@ -61,7 +61,10 @@ __sgetsgent_r (const char *string, struct sgrp *resbuf, char *buffer,
buffer[buflen - 1] = '\0';
sp = strncpy (buffer, string, buflen);
if (buffer[buflen - 1] != '\0')
- return ERANGE;
+ {
+ __set_errno (ERANGE);
+ return ERANGE;
+ }
}
else
sp = (char *) string;
diff --git a/gshadow/tst-sgetsgent.c b/gshadow/tst-sgetsgent.c
new file mode 100644
index 0000000000000000..0370c10fd0630b42
--- /dev/null
+++ b/gshadow/tst-sgetsgent.c
@@ -0,0 +1,69 @@
+/* Test large input for sgetsgent (bug 30151).
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <gshadow.h>
+#include <stddef.h>
+#include <support/check.h>
+#include <support/support.h>
+#include <support/xmemstream.h>
+#include <stdlib.h>
+
+static int
+do_test (void)
+{
+ /* Create a shadow group with 1000 members. */
+ struct xmemstream mem;
+ xopen_memstream (&mem);
+ const char *passwd = "k+zD0nucwfxAo3sw1NXUj6K5vt5M16+X0TVGdE1uFvq5R8V7efJ";
+ fprintf (mem.out, "group-name:%s::m0", passwd);
+ for (int i = 1; i < 1000; ++i)
+ fprintf (mem.out, ",m%d", i);
+ xfclose_memstream (&mem);
+
+ /* Call sgetsgent. */
+ char *input = mem.buffer;
+ struct sgrp *e = sgetsgent (input);
+ TEST_VERIFY_EXIT (e != NULL);
+ TEST_COMPARE_STRING (e->sg_namp, "group-name");
+ TEST_COMPARE_STRING (e->sg_passwd, passwd);
+ /* No administrators. */
+ TEST_COMPARE_STRING (e->sg_adm[0], NULL);
+ /* Check the members list. */
+ for (int i = 0; i < 1000; ++i)
+ {
+ char *member = xasprintf ("m%d", i);
+ TEST_COMPARE_STRING (e->sg_mem[i], member);
+ free (member);
+ }
+ TEST_COMPARE_STRING (e->sg_mem[1000], NULL);
+
+ /* Check that putsgent brings back the input string. */
+ xopen_memstream (&mem);
+ TEST_COMPARE (putsgent (e, mem.out), 0);
+ xfclose_memstream (&mem);
+ /* Compare without the trailing '\n' that putsgent added. */
+ TEST_COMPARE (mem.buffer[mem.length - 1], '\n');
+ mem.buffer[mem.length - 1] = '\0';
+ TEST_COMPARE_STRING (mem.buffer, input);
+
+ free (mem.buffer);
+ free (input);
+ return 0;
+}
+
+#include <support/test-driver.c>

@ -0,0 +1,36 @@
commit 71eb9cc1ffd79e96549dfb16f8e86aaf88a3bac8
Author: Florian Weimer <fweimer@redhat.com>
Date: Mon Apr 3 17:23:11 2023 +0200
x86_64: Fix asm constraints in feraiseexcept (bug 30305)
The divss instruction clobbers its first argument, and the constraints
need to reflect that. Fortunately, with GCC 12, generated code does
not actually change, so there is no externally visible bug.
Suggested-by: Jakub Jelinek <jakub@redhat.com>
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
(cherry picked from commit 5d1ccdda7b0c625751661d50977f3dfbc73f8eae)
diff --git a/sysdeps/x86_64/fpu/fraiseexcpt.c b/sysdeps/x86_64/fpu/fraiseexcpt.c
index 44a1d93b34796e1a..a301b657c49b31b1 100644
--- a/sysdeps/x86_64/fpu/fraiseexcpt.c
+++ b/sysdeps/x86_64/fpu/fraiseexcpt.c
@@ -33,7 +33,7 @@ __feraiseexcept (int excepts)
/* One example of an invalid operation is 0.0 / 0.0. */
float f = 0.0;
- __asm__ __volatile__ ("divss %0, %0 " : : "x" (f));
+ __asm__ __volatile__ ("divss %0, %0 " : "+x" (f));
(void) &f;
}
@@ -43,7 +43,7 @@ __feraiseexcept (int excepts)
float f = 1.0;
float g = 0.0;
- __asm__ __volatile__ ("divss %1, %0" : : "x" (f), "x" (g));
+ __asm__ __volatile__ ("divss %1, %0" : "+x" (f) : "x" (g));
(void) &f;
}

@ -0,0 +1,278 @@
commit 567f7413fb1179acd936766b542ea72f7ef60d8b
Author: Adam Yi <ayi@janestreet.com>
Date: Tue Mar 7 07:30:02 2023 -0500
posix: Fix system blocks SIGCHLD erroneously [BZ #30163]
Fix bug that SIGCHLD is erroneously blocked forever in the following
scenario:
1. Thread A calls system but hasn't returned yet
2. Thread B calls another system but returns
SIGCHLD would be blocked forever in thread B after its system() returns,
even after the system() in thread A returns.
Although POSIX does not require, glibc system implementation aims to be
thread and cancellation safe. This bug was introduced in
5fb7fc96350575c9adb1316833e48ca11553be49 when we moved reverting signal
mask to happen when the last concurrently running system returns,
despite that signal mask is per thread. This commit reverts this logic
and adds a test.
Signed-off-by: Adam Yi <ayi@janestreet.com>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
(cherry picked from commit 436a604b7dc741fc76b5a6704c6cd8bb178518e7)
diff --git a/stdlib/tst-system.c b/stdlib/tst-system.c
index 178808e048a9b3bc..d1413d7c6a4cb82c 100644
--- a/stdlib/tst-system.c
+++ b/stdlib/tst-system.c
@@ -26,6 +26,7 @@
#include <support/check.h>
#include <support/temp_file.h>
#include <support/support.h>
+#include <support/xthread.h>
#include <support/xunistd.h>
static char *tmpdir;
@@ -72,6 +73,20 @@ call_system (void *closure)
}
}
+static void *
+sleep_and_check_sigchld (void *closure)
+{
+ double *seconds = (double *) closure;
+ char cmd[namemax];
+ sprintf (cmd, "sleep %lf" , *seconds);
+ TEST_COMPARE (system (cmd), 0);
+
+ sigset_t blocked = {0};
+ TEST_COMPARE (sigprocmask (SIG_BLOCK, NULL, &blocked), 0);
+ TEST_COMPARE (sigismember (&blocked, SIGCHLD), 0);
+ return NULL;
+}
+
static int
do_test (void)
{
@@ -155,6 +170,17 @@ do_test (void)
xchmod (_PATH_BSHELL, st.st_mode);
}
+ {
+ pthread_t long_sleep_thread = xpthread_create (NULL,
+ sleep_and_check_sigchld,
+ &(double) { 0.2 });
+ pthread_t short_sleep_thread = xpthread_create (NULL,
+ sleep_and_check_sigchld,
+ &(double) { 0.1 });
+ xpthread_join (short_sleep_thread);
+ xpthread_join (long_sleep_thread);
+ }
+
TEST_COMPARE (system (""), 0);
return 0;
diff --git a/support/Makefile b/support/Makefile
index 0aa9d41c5a890087..e7f66920f1abbb48 100644
--- a/support/Makefile
+++ b/support/Makefile
@@ -32,6 +32,8 @@ libsupport-routines = \
check_hostent \
check_netent \
delayed_exit \
+ dtotimespec \
+ dtotimespec-time64 \
ignore_stderr \
next_to_fault \
oom_error \
diff --git a/support/dtotimespec-time64.c b/support/dtotimespec-time64.c
new file mode 100644
index 0000000000000000..b3d5e351e3a631ac
--- /dev/null
+++ b/support/dtotimespec-time64.c
@@ -0,0 +1,27 @@
+/* Convert double to timespec. 64-bit time support.
+ Copyright (C) 2011-2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library and is also part of gnulib.
+ Patches to this file should be submitted to both projects.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <time.h>
+
+#if __TIMESIZE != 64
+# define timespec __timespec64
+# define time_t __time64_t
+# define dtotimespec dtotimespec_time64
+# include "dtotimespec.c"
+#endif
diff --git a/support/dtotimespec.c b/support/dtotimespec.c
new file mode 100644
index 0000000000000000..cde5b4d74cba1932
--- /dev/null
+++ b/support/dtotimespec.c
@@ -0,0 +1,50 @@
+/* Convert double to timespec.
+ Copyright (C) 2011-2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library and is also part of gnulib.
+ Patches to this file should be submitted to both projects.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Convert the double value SEC to a struct timespec. Round toward
+ positive infinity. On overflow, return an extremal value. */
+
+#include <support/timespec.h>
+#include <intprops.h>
+
+struct timespec
+dtotimespec (double sec)
+{
+ if (sec <= TYPE_MINIMUM (time_t))
+ return make_timespec (TYPE_MINIMUM (time_t), 0);
+ else if (sec >= 1.0 + TYPE_MAXIMUM (time_t))
+ return make_timespec (TYPE_MAXIMUM (time_t), TIMESPEC_HZ - 1);
+ else
+ {
+ time_t s = sec;
+ double frac = TIMESPEC_HZ * (sec - s);
+ long ns = frac;
+ ns += ns < frac;
+ s += ns / TIMESPEC_HZ;
+ ns %= TIMESPEC_HZ;
+
+ if (ns < 0)
+ {
+ s--;
+ ns += TIMESPEC_HZ;
+ }
+
+ return make_timespec (s, ns);
+ }
+}
diff --git a/support/shell-container.c b/support/shell-container.c
index b2a4324dc7444be5..6fe925dc49075848 100644
--- a/support/shell-container.c
+++ b/support/shell-container.c
@@ -39,6 +39,7 @@
#include <error.h>
#include <support/support.h>
+#include <support/timespec.h>
/* Design considerations
@@ -171,6 +172,32 @@ kill_func (char **argv)
return 0;
}
+/* Emulate the "/bin/sleep" command. No suffix support. Options are
+ ignored. */
+static int
+sleep_func (char **argv)
+{
+ if (argv[0] == NULL)
+ {
+ fprintf (stderr, "sleep: missing operand\n");
+ return 1;
+ }
+ char *endptr = NULL;
+ double sec = strtod (argv[0], &endptr);
+ if (endptr == argv[0] || errno == ERANGE || sec < 0)
+ {
+ fprintf (stderr, "sleep: invalid time interval '%s'\n", argv[0]);
+ return 1;
+ }
+ struct timespec ts = dtotimespec (sec);
+ if (nanosleep (&ts, NULL) < 0)
+ {
+ fprintf (stderr, "sleep: failed to nanosleep: %s\n", strerror (errno));
+ return 1;
+ }
+ return 0;
+}
+
/* This is a list of all the built-in commands we understand. */
static struct {
const char *name;
@@ -181,6 +208,7 @@ static struct {
{ "cp", copy_func },
{ "exit", exit_func },
{ "kill", kill_func },
+ { "sleep", sleep_func },
{ NULL, NULL }
};
diff --git a/support/timespec.h b/support/timespec.h
index 0478aef51fdcc5ae..843a90d60675f37d 100644
--- a/support/timespec.h
+++ b/support/timespec.h
@@ -57,6 +57,8 @@ int support_timespec_check_in_range (struct timespec expected,
struct timespec observed,
double lower_bound, double upper_bound);
+struct timespec dtotimespec (double sec) __attribute__((const));
+
#else
struct timespec __REDIRECT (timespec_add, (struct timespec, struct timespec),
timespec_add_time64);
@@ -82,6 +84,8 @@ int __REDIRECT (support_timespec_check_in_range, (struct timespec expected,
double lower_bound,
double upper_bound),
support_timespec_check_in_range_time64);
+
+struct timespec __REDIRECT (dtotimespec, (double sec), dtotimespec_time64);
#endif
/* Check that the timespec on the left represents a time before the
diff --git a/sysdeps/posix/system.c b/sysdeps/posix/system.c
index 48668fb392e67d1a..b9676abb1c6f35eb 100644
--- a/sysdeps/posix/system.c
+++ b/sysdeps/posix/system.c
@@ -179,16 +179,16 @@ do_system (const char *line)
as if the shell had terminated using _exit(127). */
status = W_EXITCODE (127, 0);
+ /* sigaction can not fail with SIGINT/SIGQUIT used with old
+ disposition. Same applies for sigprocmask. */
DO_LOCK ();
if (SUB_REF () == 0)
{
- /* sigaction can not fail with SIGINT/SIGQUIT used with old
- disposition. Same applies for sigprocmask. */
__sigaction (SIGINT, &intr, NULL);
__sigaction (SIGQUIT, &quit, NULL);
- __sigprocmask (SIG_SETMASK, &omask, NULL);
}
DO_UNLOCK ();
+ __sigprocmask (SIG_SETMASK, &omask, NULL);
if (ret != 0)
__set_errno (ret);

@ -0,0 +1,71 @@
commit 8e1a8e04b153739a77289e6fc07cbfc252d87e02
Author: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
Date: Sat Feb 4 14:41:38 2023 +0300
gmon: Fix allocated buffer overflow (bug 29444)
The `__monstartup()` allocates a buffer used to store all the data
accumulated by the monitor.
The size of this buffer depends on the size of the internal structures
used and the address range for which the monitor is activated, as well
as on the maximum density of call instructions and/or callable functions
that could be potentially on a segment of executable code.
In particular a hash table of arcs is placed at the end of this buffer.
The size of this hash table is calculated in bytes as
p->fromssize = p->textsize / HASHFRACTION;
but actually should be
p->fromssize = ROUNDUP(p->textsize / HASHFRACTION, sizeof(*p->froms));
This results in writing beyond the end of the allocated buffer when an
added arc corresponds to a call near from the end of the monitored
address range, since `_mcount()` check the incoming caller address for
monitored range but not the intermediate result hash-like index that
uses to write into the table.
It should be noted that when the results are output to `gmon.out`, the
table is read to the last element calculated from the allocated size in
bytes, so the arcs stored outside the buffer boundary did not fall into
`gprof` for analysis. Thus this "feature" help me to found this bug
during working with https://sourceware.org/bugzilla/show_bug.cgi?id=29438
Just in case, I will explicitly note that the problem breaks the
`make test t=gmon/tst-gmon-dso` added for Bug 29438.
There, the arc of the `f3()` call disappears from the output, since in
the DSO case, the call to `f3` is located close to the end of the
monitored range.
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
Another minor error seems a related typo in the calculation of
`kcountsize`, but since kcounts are smaller than froms, this is
actually to align the p->froms data.
Co-authored-by: DJ Delorie <dj@redhat.com>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
(cherry picked from commit 801af9fafd4689337ebf27260aa115335a0cb2bc)
diff --git a/gmon/gmon.c b/gmon/gmon.c
index dee64803ada583d7..bf76358d5b1aa2da 100644
--- a/gmon/gmon.c
+++ b/gmon/gmon.c
@@ -132,6 +132,8 @@ __monstartup (u_long lowpc, u_long highpc)
p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER));
p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER));
p->textsize = p->highpc - p->lowpc;
+ /* This looks like a typo, but it's here to align the p->froms
+ section. */
p->kcountsize = ROUNDUP(p->textsize / HISTFRACTION, sizeof(*p->froms));
p->hashfraction = HASHFRACTION;
p->log_hashfraction = -1;
@@ -142,7 +144,7 @@ __monstartup (u_long lowpc, u_long highpc)
instead of integer division. Precompute shift amount. */
p->log_hashfraction = ffs(p->hashfraction * sizeof(*p->froms)) - 1;
}
- p->fromssize = p->textsize / HASHFRACTION;
+ p->fromssize = ROUNDUP(p->textsize / HASHFRACTION, sizeof(*p->froms));
p->tolimit = p->textsize * ARCDENSITY / 100;
if (p->tolimit < MINARCS)
p->tolimit = MINARCS;

@ -0,0 +1,464 @@
commit 4dd89b2a8fc91bc74ea85a442ae4c672b6dda113
Author: Simon Kissane <skissane@gmail.com>
Date: Sat Feb 11 20:12:13 2023 +1100
gmon: improve mcount overflow handling [BZ# 27576]
When mcount overflows, no gmon.out file is generated, but no message is printed
to the user, leaving the user with no idea why, and thinking maybe there is
some bug - which is how BZ 27576 ended up being logged. Print a message to
stderr in this case so the user knows what is going on.
As a comment in sys/gmon.h acknowledges, the hardcoded MAXARCS value is too
small for some large applications, including the test case in that BZ. Rather
than increase it, add tunables to enable MINARCS and MAXARCS to be overridden
at runtime (glibc.gmon.minarcs and glibc.gmon.maxarcs). So if a user gets the
mcount overflow error, they can try increasing maxarcs (they might need to
increase minarcs too if the heuristic is wrong in their case.)
Note setting minarcs/maxarcs too large can cause monstartup to fail with an
out of memory error. If you set them large enough, it can cause an integer
overflow in calculating the buffer size. I haven't done anything to defend
against that - it would not generally be a security vulnerability, since these
tunables will be ignored in suid/sgid programs (due to the SXID_ERASE default),
and if you can set GLIBC_TUNABLES in the environment of a process, you can take
it over anyway (LD_PRELOAD, LD_LIBRARY_PATH, etc). I thought about modifying
the code of monstartup to defend against integer overflows, but doing so is
complicated, and I realise the existing code is susceptible to them even prior
to this change (e.g. try passing a pathologically large highpc argument to
monstartup), so I decided just to leave that possibility in-place.
Add a test case which demonstrates mcount overflow and the tunables.
Document the new tunables in the manual.
Signed-off-by: Simon Kissane <skissane@gmail.com>
Reviewed-by: DJ Delorie <dj@redhat.com>
(cherry picked from commit 31be941e4367c001b2009308839db5c67bf9dcbc)
# Conflicts:
# elf/dl-tunables.list
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index ffcd7f18d4fafb91..feb8fcbf5bd5ca7d 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -165,4 +165,17 @@ glibc {
default: 2
}
}
+
+ gmon {
+ minarcs {
+ type: INT_32
+ minval: 50
+ default: 50
+ }
+ maxarcs {
+ type: INT_32
+ minval: 50
+ default: 1048576
+ }
+ }
}
diff --git a/gmon/Makefile b/gmon/Makefile
index 7b7b854327702030..706f50f7dd4cae84 100644
--- a/gmon/Makefile
+++ b/gmon/Makefile
@@ -25,7 +25,7 @@ include ../Makeconfig
headers := sys/gmon.h sys/gmon_out.h sys/profil.h
routines := gmon mcount profil sprofil prof-freq
-tests = tst-sprofil tst-gmon
+tests = tst-sprofil tst-gmon tst-mcount-overflow
ifeq ($(build-profile),yes)
tests += tst-profile-static
tests-static += tst-profile-static
@@ -56,6 +56,18 @@ ifeq ($(run-built-tests),yes)
tests-special += $(objpfx)tst-gmon-gprof.out
endif
+CFLAGS-tst-mcount-overflow.c := -fno-omit-frame-pointer -pg
+tst-mcount-overflow-no-pie = yes
+CRT-tst-mcount-overflow := $(csu-objpfx)g$(start-installed-name)
+# Intentionally use invalid config where maxarcs<minarcs to check warning is printed
+tst-mcount-overflow-ENV := GMON_OUT_PREFIX=$(objpfx)tst-mcount-overflow.data \
+ GLIBC_TUNABLES=glibc.gmon.minarcs=51:glibc.gmon.maxarcs=50
+# Send stderr into output file because we make sure expected messages are printed
+tst-mcount-overflow-ARGS := 2>&1 1>/dev/null | cat
+ifeq ($(run-built-tests),yes)
+tests-special += $(objpfx)tst-mcount-overflow-check.out
+endif
+
CFLAGS-tst-gmon-static.c := $(PIE-ccflag) -fno-omit-frame-pointer -pg
CRT-tst-gmon-static := $(csu-objpfx)gcrt1.o
tst-gmon-static-no-pie = yes
@@ -103,6 +115,14 @@ $(objpfx)tst-gmon.out: clean-tst-gmon-data
clean-tst-gmon-data:
rm -f $(objpfx)tst-gmon.data.*
+$(objpfx)tst-mcount-overflow.o: clean-tst-mcount-overflow-data
+clean-tst-mcount-overflow-data:
+ rm -f $(objpfx)tst-mcount-overflow.data.*
+
+$(objpfx)tst-mcount-overflow-check.out: tst-mcount-overflow-check.sh $(objpfx)tst-mcount-overflow.out
+ $(SHELL) $< $(objpfx)tst-mcount-overflow > $@; \
+ $(evaluate-test)
+
$(objpfx)tst-gmon-gprof.out: tst-gmon-gprof.sh $(objpfx)tst-gmon.out
$(SHELL) $< $(GPROF) $(objpfx)tst-gmon $(objpfx)tst-gmon.data.* > $@; \
$(evaluate-test)
diff --git a/gmon/gmon.c b/gmon/gmon.c
index bf76358d5b1aa2da..689bf80141e559ca 100644
--- a/gmon/gmon.c
+++ b/gmon/gmon.c
@@ -46,6 +46,11 @@
#include <libc-internal.h>
#include <not-cancel.h>
+#if HAVE_TUNABLES
+# define TUNABLE_NAMESPACE gmon
+# include <elf/dl-tunables.h>
+#endif
+
#ifdef PIC
# include <link.h>
@@ -124,6 +129,22 @@ __monstartup (u_long lowpc, u_long highpc)
int o;
char *cp;
struct gmonparam *p = &_gmonparam;
+ long int minarcs, maxarcs;
+
+#if HAVE_TUNABLES
+ /* Read minarcs/maxarcs tunables. */
+ minarcs = TUNABLE_GET (minarcs, int32_t, NULL);
+ maxarcs = TUNABLE_GET (maxarcs, int32_t, NULL);
+ if (maxarcs < minarcs)
+ {
+ ERR("monstartup: maxarcs < minarcs, setting maxarcs = minarcs\n");
+ maxarcs = minarcs;
+ }
+#else
+ /* No tunables, we use hardcoded defaults */
+ minarcs = MINARCS;
+ maxarcs = MAXARCS;
+#endif
/*
* round lowpc and highpc to multiples of the density we're using
@@ -146,10 +167,10 @@ __monstartup (u_long lowpc, u_long highpc)
}
p->fromssize = ROUNDUP(p->textsize / HASHFRACTION, sizeof(*p->froms));
p->tolimit = p->textsize * ARCDENSITY / 100;
- if (p->tolimit < MINARCS)
- p->tolimit = MINARCS;
- else if (p->tolimit > MAXARCS)
- p->tolimit = MAXARCS;
+ if (p->tolimit < minarcs)
+ p->tolimit = minarcs;
+ else if (p->tolimit > maxarcs)
+ p->tolimit = maxarcs;
p->tossize = p->tolimit * sizeof(struct tostruct);
cp = calloc (p->kcountsize + p->fromssize + p->tossize, 1);
diff --git a/gmon/mcount.c b/gmon/mcount.c
index 9d4a1a50fa6ab21a..f7180fdb83399a14 100644
--- a/gmon/mcount.c
+++ b/gmon/mcount.c
@@ -41,6 +41,10 @@ static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
#include <atomic.h>
+#include <not-cancel.h>
+#include <unistd.h>
+#define ERR(s) __write_nocancel (STDERR_FILENO, s, sizeof (s) - 1)
+
/*
* mcount is called on entry to each function compiled with the profiling
* switch set. _mcount(), which is declared in a machine-dependent way
@@ -170,6 +174,7 @@ done:
return;
overflow:
p->state = GMON_PROF_ERROR;
+ ERR("mcount: call graph buffer size limit exceeded, gmon.out will not be generated\n");
return;
}
diff --git a/gmon/sys/gmon.h b/gmon/sys/gmon.h
index b4cc3b043a2aec77..af0582a3717085b5 100644
--- a/gmon/sys/gmon.h
+++ b/gmon/sys/gmon.h
@@ -111,6 +111,8 @@ extern struct __bb *__bb_head;
* Always allocate at least this many tostructs. This
* hides the inadequacy of the ARCDENSITY heuristic, at least
* for small programs.
+ *
+ * Value can be overridden at runtime by glibc.gmon.minarcs tunable.
*/
#define MINARCS 50
@@ -124,8 +126,8 @@ extern struct __bb *__bb_head;
* Used to be max representable value of ARCINDEX minus 2, but now
* that ARCINDEX is a long, that's too large; we don't really want
* to allow a 48 gigabyte table.
- * The old value of 1<<16 wasn't high enough in practice for large C++
- * programs; will 1<<20 be adequate for long? FIXME
+ *
+ * Value can be overridden at runtime by glibc.gmon.maxarcs tunable.
*/
#define MAXARCS (1 << 20)
diff --git a/gmon/tst-mcount-overflow-check.sh b/gmon/tst-mcount-overflow-check.sh
new file mode 100644
index 0000000000000000..27eb5538fd573a6e
--- /dev/null
+++ b/gmon/tst-mcount-overflow-check.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+# Test expected messages generated when mcount overflows
+# Copyright (C) 2017-2023 Free Software Foundation, Inc.
+# Copyright The GNU Toolchain Authors.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+LC_ALL=C
+export LC_ALL
+set -e
+exec 2>&1
+
+program="$1"
+
+check_msg() {
+ if ! grep -q "$1" "$program.out"; then
+ echo "FAIL: expected message not in output: $1"
+ exit 1
+ fi
+}
+
+check_msg 'monstartup: maxarcs < minarcs, setting maxarcs = minarcs'
+check_msg 'mcount: call graph buffer size limit exceeded, gmon.out will not be generated'
+
+for data_file in $1.data.*; do
+ if [ -f "$data_file" ]; then
+ echo "FAIL: expected no data files, but found $data_file"
+ exit 1
+ fi
+done
+
+echo PASS
diff --git a/gmon/tst-mcount-overflow.c b/gmon/tst-mcount-overflow.c
new file mode 100644
index 0000000000000000..06cc93ef872eb7c1
--- /dev/null
+++ b/gmon/tst-mcount-overflow.c
@@ -0,0 +1,72 @@
+/* Test program to trigger mcount overflow in profiling collection.
+ Copyright (C) 2017-2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Program with sufficiently complex, yet pointless, call graph
+ that it will trigger an mcount overflow, when you set the
+ minarcs/maxarcs tunables to very low values. */
+
+#define PREVENT_TAIL_CALL asm volatile ("")
+
+/* Calls REP(n) macro 16 times, for n=0..15.
+ * You need to define REP(n) before using this.
+ */
+#define REPS \
+ REP(0) REP(1) REP(2) REP(3) REP(4) REP(5) REP(6) REP(7) \
+ REP(8) REP(9) REP(10) REP(11) REP(12) REP(13) REP(14) REP(15)
+
+/* Defines 16 leaf functions named f1_0 to f1_15 */
+#define REP(n) \
+ __attribute__ ((noinline, noclone, weak)) void f1_##n (void) {};
+REPS
+#undef REP
+
+/* Calls all 16 leaf functions f1_* in succession */
+__attribute__ ((noinline, noclone, weak)) void
+f2 (void)
+{
+# define REP(n) f1_##n();
+ REPS
+# undef REP
+ PREVENT_TAIL_CALL;
+}
+
+/* Defines 16 functions named f2_0 to f2_15, which all just call f2 */
+#define REP(n) \
+ __attribute__ ((noinline, noclone, weak)) void \
+ f2_##n (void) { f2(); PREVENT_TAIL_CALL; };
+REPS
+#undef REP
+
+__attribute__ ((noinline, noclone, weak)) void
+f3 (int count)
+{
+ for (int i = 0; i < count; ++i)
+ {
+ /* Calls f1_0(), f2_0(), f1_1(), f2_1(), f3_0(), etc */
+# define REP(n) f1_##n(); f2_##n();
+ REPS
+# undef REP
+ }
+}
+
+int
+main (void)
+{
+ f3 (1000);
+ return 0;
+}
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 561e0df230646de1..a387bf3055f3488e 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -77,6 +77,9 @@ glibc.malloc.check: 0 (min: 0, max: 3)
capabilities seen by @theglibc{}
* Memory Related Tunables:: Tunables that control the use of memory by
@theglibc{}.
+* gmon Tunables:: Tunables that control the gmon profiler, used in
+ conjunction with gprof
+
@end menu
@node Tunable names
@@ -596,3 +599,59 @@ support in the kernel if this tunable has any non-zero value.
The default value is @samp{0}, which disables all memory tagging.
@end deftp
+
+@node gmon Tunables
+@section gmon Tunables
+@cindex gmon tunables
+
+@deftp {Tunable namespace} glibc.gmon
+This tunable namespace affects the behaviour of the gmon profiler.
+gmon is a component of @theglibc{} which is normally used in
+conjunction with gprof.
+
+When GCC compiles a program with the @code{-pg} option, it instruments
+the program with calls to the @code{mcount} function, to record the
+program's call graph. At program startup, a memory buffer is allocated
+to store this call graph; the size of the buffer is calculated using a
+heuristic based on code size. If during execution, the buffer is found
+to be too small, profiling will be aborted and no @file{gmon.out} file
+will be produced. In that case, you will see the following message
+printed to standard error:
+
+@example
+mcount: call graph buffer size limit exceeded, gmon.out will not be generated
+@end example
+
+Most of the symbols discussed in this section are defined in the header
+@code{sys/gmon.h}. However, some symbols (for example @code{mcount})
+are not defined in any header file, since they are only intended to be
+called from code generated by the compiler.
+@end deftp
+
+@deftp Tunable glibc.mem.minarcs
+The heuristic for sizing the call graph buffer is known to be
+insufficient for small programs; hence, the calculated value is clamped
+to be at least a minimum size. The default minimum (in units of
+call graph entries, @code{struct tostruct}), is given by the macro
+@code{MINARCS}. If you have some program with an unusually complex
+call graph, for which the heuristic fails to allocate enough space,
+you can use this tunable to increase the minimum to a larger value.
+@end deftp
+
+@deftp Tunable glibc.mem.maxarcs
+To prevent excessive memory consumption when profiling very large
+programs, the call graph buffer is allowed to have a maximum of
+@code{MAXARCS} entries. For some very large programs, the default
+value of @code{MAXARCS} defined in @file{sys/gmon.h} is too small; in
+that case, you can use this tunable to increase it.
+
+Note the value of the @code{maxarcs} tunable must be greater or equal
+to that of the @code{minarcs} tunable; if this constraint is violated,
+a warning will printed to standard error at program startup, and
+the @code{minarcs} value will be used as the maximum as well.
+
+Setting either tunable too high may result in a call graph buffer
+whose size exceeds the available memory; in that case, an out of memory
+error will be printed at program startup, the profiler will be
+disabled, and no @file{gmon.out} file will be generated.
+@end deftp
diff --git a/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list b/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list
index d9d62499be4d67cb..b8495f695c39e2d3 100644
--- a/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list
@@ -26,3 +26,5 @@
@order glibc.rtld.optional_static_tls
@order glibc.malloc.tcache_max
@order glibc.malloc.check
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs
diff --git a/sysdeps/unix/sysv/linux/i386/dl-tunables.list b/sysdeps/unix/sysv/linux/i386/dl-tunables.list
index e83962ec3af11691..0ee51cdcdd4a7ee7 100644
--- a/sysdeps/unix/sysv/linux/i386/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/i386/dl-tunables.list
@@ -33,3 +33,5 @@
@order glibc.rtld.optional_static_tls
@order glibc.malloc.tcache_max
@order glibc.malloc.check
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list
index 8f01840ef57874e7..17b4114e12736fa4 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list
@@ -26,3 +26,5 @@
@order glibc.rtld.optional_static_tls
@order glibc.malloc.tcache_max
@order glibc.malloc.check
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list b/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
index 3dd7e891c5e37b1a..1c8cd0ec14324703 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
@@ -26,3 +26,5 @@
@order glibc.malloc.tcache_max
@order glibc.malloc.check
@order glibc.cpu.hwcaps
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list b/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list
index e83962ec3af11691..0ee51cdcdd4a7ee7 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list
@@ -33,3 +33,5 @@
@order glibc.rtld.optional_static_tls
@order glibc.malloc.tcache_max
@order glibc.malloc.check
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs

@ -0,0 +1,188 @@
commit f1b15d2005f5125529171db3be39026a1157c2a8
Author: Simon Kissane <skissane@gmail.com>
Date: Sat Feb 11 08:58:02 2023 +1100
gmon: fix memory corruption issues [BZ# 30101]
V2 of this patch fixes an issue in V1, where the state was changed to ON not
OFF at end of _mcleanup. I hadn't noticed that (counterintuitively) ON=0 and
OFF=3, hence zeroing the buffer turned it back on. So set the state to OFF
after the memset.
1. Prevent double free, and reads from unallocated memory, when
_mcleanup is (incorrectly) called two or more times in a row,
without an intervening call to __monstartup; with this patch, the
second and subsequent calls effectively become no-ops instead.
While setting tos=NULL is minimal fix, safest action is to zero the
whole gmonparam buffer.
2. Prevent memory leak when __monstartup is (incorrectly) called two
or more times in a row, without an intervening call to _mcleanup;
with this patch, the second and subsequent calls effectively become
no-ops instead.
3. After _mcleanup, treat __moncontrol(1) as __moncontrol(0) instead.
With zeroing of gmonparam buffer in _mcleanup, this stops the
state incorrectly being changed to GMON_PROF_ON despite profiling
actually being off. If we'd just done the minimal fix to _mcleanup
of setting tos=NULL, there is risk of far worse memory corruption:
kcount would point to deallocated memory, and the __profil syscall
would make the kernel write profiling data into that memory,
which could have since been reallocated to something unrelated.
4. Ensure __moncontrol(0) still turns off profiling even in error
state. Otherwise, if mcount overflows and sets state to
GMON_PROF_ERROR, when _mcleanup calls __moncontrol(0), the __profil
syscall to disable profiling will not be invoked. _mcleanup will
free the buffer, but the kernel will still be writing profiling
data into it, potentially corrupted arbitrary memory.
Also adds a test case for (1). Issues (2)-(4) are not feasible to test.
Signed-off-by: Simon Kissane <skissane@gmail.com>
Reviewed-by: DJ Delorie <dj@redhat.com>
(cherry picked from commit bde121872001d8f3224eeafa5b7effb871c3fbca)
diff --git a/gmon/Makefile b/gmon/Makefile
index 706f50f7dd4cae84..7fd9db8f749a0843 100644
--- a/gmon/Makefile
+++ b/gmon/Makefile
@@ -1,4 +1,5 @@
-# Copyright (C) 1995-2021 Free Software Foundation, Inc.
+# Copyright (C) 1995-2023 Free Software Foundation, Inc.
+# Copyright The GNU Toolchain Authors.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
@@ -25,7 +26,7 @@ include ../Makeconfig
headers := sys/gmon.h sys/gmon_out.h sys/profil.h
routines := gmon mcount profil sprofil prof-freq
-tests = tst-sprofil tst-gmon tst-mcount-overflow
+tests = tst-sprofil tst-gmon tst-mcount-overflow tst-mcleanup
ifeq ($(build-profile),yes)
tests += tst-profile-static
tests-static += tst-profile-static
@@ -68,6 +69,14 @@ ifeq ($(run-built-tests),yes)
tests-special += $(objpfx)tst-mcount-overflow-check.out
endif
+CFLAGS-tst-mcleanup.c := -fno-omit-frame-pointer -pg
+tst-mcleanup-no-pie = yes
+CRT-tst-mcleanup := $(csu-objpfx)g$(start-installed-name)
+tst-mcleanup-ENV := GMON_OUT_PREFIX=$(objpfx)tst-mcleanup.data
+ifeq ($(run-built-tests),yes)
+tests-special += $(objpfx)tst-mcleanup.out
+endif
+
CFLAGS-tst-gmon-static.c := $(PIE-ccflag) -fno-omit-frame-pointer -pg
CRT-tst-gmon-static := $(csu-objpfx)gcrt1.o
tst-gmon-static-no-pie = yes
@@ -123,6 +132,10 @@ $(objpfx)tst-mcount-overflow-check.out: tst-mcount-overflow-check.sh $(objpfx)ts
$(SHELL) $< $(objpfx)tst-mcount-overflow > $@; \
$(evaluate-test)
+$(objpfx)tst-mcleanup.out: clean-tst-mcleanup-data
+clean-tst-mcleanup-data:
+ rm -f $(objpfx)tst-mcleanup.data.*
+
$(objpfx)tst-gmon-gprof.out: tst-gmon-gprof.sh $(objpfx)tst-gmon.out
$(SHELL) $< $(GPROF) $(objpfx)tst-gmon $(objpfx)tst-gmon.data.* > $@; \
$(evaluate-test)
diff --git a/gmon/gmon.c b/gmon/gmon.c
index 689bf80141e559ca..5e99a7351dc71666 100644
--- a/gmon/gmon.c
+++ b/gmon/gmon.c
@@ -102,11 +102,8 @@ __moncontrol (int mode)
{
struct gmonparam *p = &_gmonparam;
- /* Don't change the state if we ran into an error. */
- if (p->state == GMON_PROF_ERROR)
- return;
-
- if (mode)
+ /* Treat start request as stop if error or gmon not initialized. */
+ if (mode && p->state != GMON_PROF_ERROR && p->tos != NULL)
{
/* start */
__profil((void *) p->kcount, p->kcountsize, p->lowpc, s_scale);
@@ -116,7 +113,9 @@ __moncontrol (int mode)
{
/* stop */
__profil(NULL, 0, 0, 0);
- p->state = GMON_PROF_OFF;
+ /* Don't change the state if we ran into an error. */
+ if (p->state != GMON_PROF_ERROR)
+ p->state = GMON_PROF_OFF;
}
}
libc_hidden_def (__moncontrol)
@@ -146,6 +145,14 @@ __monstartup (u_long lowpc, u_long highpc)
maxarcs = MAXARCS;
#endif
+ /*
+ * If we are incorrectly called twice in a row (without an
+ * intervening call to _mcleanup), ignore the second call to
+ * prevent leaking memory.
+ */
+ if (p->tos != NULL)
+ return;
+
/*
* round lowpc and highpc to multiples of the density we're using
* so the rest of the scaling (here and in gprof) stays in ints.
@@ -463,9 +470,14 @@ _mcleanup (void)
{
__moncontrol (0);
- if (_gmonparam.state != GMON_PROF_ERROR)
+ if (_gmonparam.state != GMON_PROF_ERROR && _gmonparam.tos != NULL)
write_gmon ();
/* free the memory. */
free (_gmonparam.tos);
+
+ /* reset buffer to initial state for safety */
+ memset(&_gmonparam, 0, sizeof _gmonparam);
+ /* somewhat confusingly, ON=0, OFF=3 */
+ _gmonparam.state = GMON_PROF_OFF;
}
diff --git a/gmon/tst-mcleanup.c b/gmon/tst-mcleanup.c
new file mode 100644
index 0000000000000000..b259653ec833aca4
--- /dev/null
+++ b/gmon/tst-mcleanup.c
@@ -0,0 +1,31 @@
+/* Test program for repeated invocation of _mcleanup
+ Copyright The GNU Toolchain Authors.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Intentionally calls _mcleanup() twice: once manually, it will be
+ called again as an atexit handler. This is incorrect use of the API,
+ but the point of the test is to make sure we don't crash when the
+ API is misused in this way. */
+
+#include <sys/gmon.h>
+
+int
+main (void)
+{
+ _mcleanup();
+ return 0;
+}

@ -0,0 +1,3 @@
#Type Name ID GECOS Home directory Shell
g nscd 28
u nscd 28 "NSCD Daemon" - -

@ -155,7 +155,7 @@ end \
Summary: The GNU libc libraries
Name: glibc
Version: %{glibcversion}
Release: 60%{?dist}
Release: 82%{?dist}
# In general, GPLv2+ is used by programs, LGPLv2+ is used for
# libraries.
@ -195,6 +195,7 @@ Source10: wrap-find-debuginfo.sh
Source11: parse-SUPPORTED.py
# Include in the source RPM for reference.
Source12: ChangeLog.old
Source13: nscd-sysusers.conf
######################################################################
# Activate the wrapper script for debuginfo generation, by rewriting
@ -700,6 +701,52 @@ Patch468: glibc-upstream-2.34-386.patch
# glibc-upstream-2.34-387.patch is a NEWS-only update. Skipped downstream.
Patch469: glibc-upstream-2.34-388.patch
Patch470: glibc-upstream-2.34-389.patch
Patch471: glibc-rh2172953.patch
Patch472: glibc-rh2149615-1.patch
Patch473: glibc-rh2149615-2.patch
Patch474: glibc-rh2169978-1.patch
Patch475: glibc-rh2169978-2.patch
Patch476: glibc-rh2149615-3.patch
Patch477: glibc-rh2166710.patch
# glibc-upstream-2.34-390.patch backported above as glibc-rh2172953.patch.
Patch478: glibc-upstream-2.34-391.patch
Patch479: glibc-upstream-2.34-392.patch
Patch480: glibc-upstream-2.34-393.patch
Patch481: glibc-upstream-2.34-394.patch
Patch482: glibc-upstream-2.34-395.patch
Patch483: glibc-upstream-2.34-396.patch
Patch484: glibc-upstream-2.34-397.patch
# glibc-upstream-2.34-398.patch not backported because we can avoid the
# ABI tunable issue downstream, using @order directives.
# This marks the end of backports via upstream release/2.34/master.
# All future backports need maintain CentOS 9 Stream and RHEL 9 only.
Patch485: glibc-rh2215368.patch
Patch486: glibc-rh2213908.patch
Patch487: glibc-rh2189923.patch
Patch488: glibc-RHEL-729.patch
Patch489: glibc-rh2222188-1.patch
Patch490: glibc-rh2222188-2.patch
Patch491: glibc-rh2222188-3.patch
Patch492: glibc-rh2222188-4.patch
Patch493: glibc-rh2222188-5.patch
Patch494: glibc-rh2224289-1.patch
Patch495: glibc-rh2224289-2.patch
Patch496: glibc-rh2224349.patch
Patch497: glibc-rh2224289-3.patch
Patch498: glibc-rh2224504-1.patch
Patch499: glibc-rh2224504-2.patch
Patch500: glibc-rh2213907-1.patch
Patch501: glibc-rh2213907-2.patch
Patch502: glibc-rh2213907-3.patch
Patch503: glibc-rh2213907-4.patch
Patch504: glibc-rh2213907-5.patch
Patch505: glibc-rh2213907-6.patch
Patch506: glibc-rh2166710-2.patch
Patch507: glibc-rh2166710-3.patch
Patch508: glibc-rh2222188-6.patch
Patch509: glibc-rh2213907-7.patch
##############################################################################
# Continued list of core "glibc" package information:
@ -756,7 +803,7 @@ BuildRequires: valgrind
%endif
# We use systemd rpm macros for nscd
BuildRequires: systemd
BuildRequires: systemd systemd-rpm-macros
# We use python for the microbenchmarks and locale data regeneration
# from unicode sources (carried out manually). We choose python3
@ -1399,10 +1446,10 @@ Requires: %{name} = %{version}-%{release}
Requires: libselinux >= 1.17.10-1
%endif
Requires: audit-libs >= 1.1.3
Requires(pre): /usr/sbin/useradd, coreutils
Requires(pre): systemd, coreutils
Requires(post): systemd
Requires(preun): systemd
Requires(postun): systemd, /usr/sbin/userdel
Requires(postun): systemd
%description -n nscd
The nscd daemon caches name service lookups and can improve
@ -1815,6 +1862,8 @@ install_different "$RPM_BUILD_ROOT/%{_libdir}/glibc-hwcaps" power10 ..
popd
%endif
install -p -D -m 0644 %{SOURCE13} ${RPM_BUILD_ROOT}%{_sysusersdir}/nscd.conf
##############################################################################
# Remove the files we don't want to distribute
##############################################################################
@ -2081,8 +2130,8 @@ done
find -type f | xargs chmod a-x
# Use sysroot-relative paths in linker script. Ignore symbolic links.
sed -e 's,\([^0-9a-zA-Z=*]/lib\),=/usr/lib,g' \
-e 's,\([^0-9a-zA-Z=*]\)/,\1=/,g' \
sed -e 's,\([^0-9a-zA-Z=*]/lib\),/usr/lib,g' \
-e 's,\([^0-9a-zA-Z=*]\)/,\1/,g' \
-i $(find -type f -name 'lib[cm].so')
popd
@ -2746,10 +2795,8 @@ update_gconv_modules_cache ()
update_gconv_modules_cache ()
%pre -n nscd
getent group nscd >/dev/null || /usr/sbin/groupadd -g 28 -r nscd
getent passwd nscd >/dev/null ||
/usr/sbin/useradd -M -o -r -d / -s /sbin/nologin \
-c "NSCD Daemon" -u 28 -g nscd nscd
# install user nscd(28) and group nscd(28)
%sysusers_create_compat %{SOURCE13}
%post -n nscd
%systemd_post nscd.service
@ -2758,9 +2805,6 @@ getent passwd nscd >/dev/null ||
%systemd_preun nscd.service
%postun -n nscd
if test $1 = 0; then
/usr/sbin/userdel nscd > /dev/null 2>&1 || :
fi
%systemd_postun_with_restart nscd.service
%files -f glibc.filelist
@ -2768,7 +2812,7 @@ fi
%if %{buildpower10}
%dir /%{_libdir}/glibc-hwcaps/power10
%endif
%verify(not md5 size mtime) %config(noreplace) /etc/nsswitch.conf
%verify(not md5 size mtime link) %config(noreplace) /etc/nsswitch.conf
%verify(not md5 size mtime) %config(noreplace) /etc/ld.so.conf
%verify(not md5 size mtime) %config(noreplace) /etc/rpc
%dir /etc/ld.so.conf.d
@ -2837,6 +2881,7 @@ fi
%attr(0600,root,root) %verify(not md5 size mtime) %ghost %config(missingok,noreplace) /var/db/nscd/hosts
%attr(0600,root,root) %verify(not md5 size mtime) %ghost %config(missingok,noreplace) /var/db/nscd/services
%ghost %config(missingok,noreplace) /etc/sysconfig/nscd
%{_sysusersdir}/nscd.conf
%files -f nss_db.filelist -n nss_db
/var/db/Makefile
@ -2859,6 +2904,80 @@ fi
%endif
%changelog
* Tue Aug 15 2023 Carlos O'Donell <carlos@redhat.com> - 2.34-82
- Fix string and memory function tuning on small systems (#2213907)
* Mon Aug 14 2023 Florian Weimer <fweimer@redhat.com> - 2.34-81
- Fix additional GCC 13 build issue (#2222188)
* Fri Aug 11 2023 Florian Weimer <fweimer@redhat.com> - 2.34-80
- Fix AMD cache size computation for hypervisors, old CPUs (#2166710)
* Tue Aug 8 2023 DJ Delorie <dj@redhat.com> - 2.34-79
- Fix temporal threshold calculations (#2213907)
* Fri Aug 4 2023 Florian Weimer <fweimer@redhat.com> - 2.34-78
- Ignore symbolic link change on /etc/nsswitch.conf (#2229156)
* Fri Jul 28 2023 Florian Weimer <fweimer@redhat.com> - 2.34-77
- Fix regression with IPv4 mapped addresses in /etc/hosts (#2224504)
* Tue Jul 25 2023 Florian Weimer <fweimer@redhat.com> - 2.34-76
- Fix accidentally disabled rseq test (#2224289)
* Fri Jul 21 2023 Florian Weimer <fweimer@redhat.com> - 2.34-75
- Make libSegFault.so NODELETE (#2224349)
* Fri Jul 21 2023 Florian Weimer <fweimer@redhat.com> - 2.34-74
- rseq_area should always be 32 bytes large (#2224289)
* Thu Jul 20 2023 Florian Weimer <fweimer@redhat.com> - 2.34-73
- GCC Toolset 13 C++ compatibility for <math.h> iseqsig (#2222188)
* Fri Jul 07 2023 Carlos O'Donell <carlos@redhat.com> - 2.34-72
- Update ESTALE error message translations (RHEL-729)
* Fri Jul 07 2023 Carlos O'Donell <carlos@redhat.com> - 2.34-71
- Avoid lazy binding failures during dlclose (#2189923)
* Mon Jun 26 2023 Arjun Shankar <arjun@redhat.com> - 2.34-70
- resolv_conf: release lock on allocation failure (#2213908)
* Mon Jun 26 2023 Arjun Shankar <arjun@redhat.com> - 2.34-69
- strerror must not return NULL (#2215368)
* Mon May 08 2023 DJ Delorie <dj@redhat.com> - 2.34-68
- Switch to sysusers_ctl instead of useradd (#2095417)
* Fri Apr 28 2023 Florian Weimer <fweimer@redhat.com> - 2.34-67
- Sync with upstream branch release/2.34/master,
commit 0ea8174d62263c2679c95c0d215d2627e560f7aa:
- gmon: fix memory corruption issues [BZ# 30101]
- gmon: improve mcount overflow handling [BZ# 27576]
- gmon: Fix allocated buffer overflow (bug 29444)
- posix: Fix system blocks SIGCHLD erroneously [BZ #30163]
- x86_64: Fix asm constraints in feraiseexcept (bug 30305)
- gshadow: Matching sgetsgent, sgetsgent_r ERANGE handling (bug 30151)
- x86: Check minimum/maximum of non_temporal_threshold [BZ #29953]
* Thu Apr 20 2023 Patsy Griffin <patsy@redhat.com> - 2.34-66
- x86: Cache computation for AMD architecture. (#2166710)
* Fri Apr 14 2023 Florian Weimer <fweimer@redhat.com> - 2.34-65
- Do not add = to linker scripts in sysroot (#2153855)
* Thu Apr 06 2023 DJ Delorie <dj@redhat.com> - 2.34-64
- x86: Use CHECK_FEATURE_PRESENT on PCONFIG (#2149615)
* Thu Mar 30 2023 Arjun Shankar <arjun@redhat.com> - 2.34-63
- s390x: Influence hwcaps/stfle via glibc.cpu.hwcaps tunable (#2169978)
* Wed Mar 29 2023 DJ Delorie <dj@redhat.com> - 2.34-62
- x86: Don't check PREFETCHWT1 in tst-cpu-features-cpuinfo.c (#2149615)
* Mon Mar 6 2023 Carlos O'Donell <carlos@redhat.com> - 2.34-61
- Fix nested atexit calls from atexit handlers (#2172953)
* Wed Feb 8 2023 Florian Weimer <fweimer@redhat.com> - 2.34-60
- Upstream test for ldconfig -p (#2167811)

Loading…
Cancel
Save