You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
145 lines
5.1 KiB
145 lines
5.1 KiB
commit 9d92452c70805a2e2dbbdb2b1ffc34bd86e1c8df
|
|
Author: Wilco Dijkstra <wilco.dijkstra@arm.com>
|
|
Date: Thu Mar 21 16:48:33 2024 +0000
|
|
|
|
AArch64: Check kernel version for SVE ifuncs
|
|
|
|
Old Linux kernels disable SVE after every system call. Calling the
|
|
SVE-optimized memcpy afterwards will then cause a trap to reenable SVE.
|
|
As a result, applications with a high use of syscalls may run slower with
|
|
the SVE memcpy. This is true for kernels between 4.15.0 and before 6.2.0,
|
|
except for 5.14.0 which was patched. Avoid this by checking the kernel
|
|
version and selecting the SVE ifunc on modern kernels.
|
|
|
|
Parse the kernel version reported by uname() into a 24-bit kernel.major.minor
|
|
value without calling any library functions. If uname() is not supported or
|
|
if the version format is not recognized, assume the kernel is modern.
|
|
|
|
Tested-by: Florian Weimer <fweimer@redhat.com>
|
|
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
|
|
(cherry picked from commit 2e94e2f5d2bf2de124c8ad7da85463355e54ccb2)
|
|
|
|
diff --git a/sysdeps/aarch64/cpu-features.h b/sysdeps/aarch64/cpu-features.h
|
|
index 77a782422af1b6e4..5f2da91ebbd0adaf 100644
|
|
--- a/sysdeps/aarch64/cpu-features.h
|
|
+++ b/sysdeps/aarch64/cpu-features.h
|
|
@@ -71,6 +71,7 @@ struct cpu_features
|
|
/* Currently, the GLIBC memory tagging tunable only defines 8 bits. */
|
|
uint8_t mte_state;
|
|
bool sve;
|
|
+ bool prefer_sve_ifuncs;
|
|
bool mops;
|
|
};
|
|
|
|
diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
|
|
index c52860efb22d70eb..61dc40088f4d9e5e 100644
|
|
--- a/sysdeps/aarch64/multiarch/init-arch.h
|
|
+++ b/sysdeps/aarch64/multiarch/init-arch.h
|
|
@@ -36,5 +36,7 @@
|
|
MTE_ENABLED (); \
|
|
bool __attribute__((unused)) sve = \
|
|
GLRO(dl_aarch64_cpu_features).sve; \
|
|
+ bool __attribute__((unused)) prefer_sve_ifuncs = \
|
|
+ GLRO(dl_aarch64_cpu_features).prefer_sve_ifuncs; \
|
|
bool __attribute__((unused)) mops = \
|
|
GLRO(dl_aarch64_cpu_features).mops;
|
|
diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
|
|
index d12eccfca51f4bcf..ce53567dab33c2f0 100644
|
|
--- a/sysdeps/aarch64/multiarch/memcpy.c
|
|
+++ b/sysdeps/aarch64/multiarch/memcpy.c
|
|
@@ -47,7 +47,7 @@ select_memcpy_ifunc (void)
|
|
{
|
|
if (IS_A64FX (midr))
|
|
return __memcpy_a64fx;
|
|
- return __memcpy_sve;
|
|
+ return prefer_sve_ifuncs ? __memcpy_sve : __memcpy_generic;
|
|
}
|
|
|
|
if (IS_THUNDERX (midr))
|
|
diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
|
|
index 2081eeb4d40e0240..fe95037be391896c 100644
|
|
--- a/sysdeps/aarch64/multiarch/memmove.c
|
|
+++ b/sysdeps/aarch64/multiarch/memmove.c
|
|
@@ -47,7 +47,7 @@ select_memmove_ifunc (void)
|
|
{
|
|
if (IS_A64FX (midr))
|
|
return __memmove_a64fx;
|
|
- return __memmove_sve;
|
|
+ return prefer_sve_ifuncs ? __memmove_sve : __memmove_generic;
|
|
}
|
|
|
|
if (IS_THUNDERX (midr))
|
|
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
|
|
index b1a3f673f067280b..c0b047bc0dbeae42 100644
|
|
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
|
|
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
|
|
@@ -21,6 +21,7 @@
|
|
#include <sys/auxv.h>
|
|
#include <elf/dl-hwcaps.h>
|
|
#include <sys/prctl.h>
|
|
+#include <sys/utsname.h>
|
|
#include <dl-tunables-parse.h>
|
|
|
|
#define DCZID_DZP_MASK (1 << 4)
|
|
@@ -62,6 +63,46 @@ get_midr_from_mcpu (const struct tunable_str_t *mcpu)
|
|
return UINT64_MAX;
|
|
}
|
|
|
|
+#if __LINUX_KERNEL_VERSION < 0x060200
|
|
+
|
|
+/* Return true if we prefer using SVE in string ifuncs. Old kernels disable
|
|
+ SVE after every system call which results in unnecessary traps if memcpy
|
|
+ uses SVE. This is true for kernels between 4.15.0 and before 6.2.0, except
|
|
+ for 5.14.0 which was patched. For these versions return false to avoid using
|
|
+ SVE ifuncs.
|
|
+ Parse the kernel version into a 24-bit kernel.major.minor value without
|
|
+ calling any library functions. If uname() is not supported or if the version
|
|
+ format is not recognized, assume the kernel is modern and return true. */
|
|
+
|
|
+static inline bool
|
|
+prefer_sve_ifuncs (void)
|
|
+{
|
|
+ struct utsname buf;
|
|
+ const char *p = &buf.release[0];
|
|
+ int kernel = 0;
|
|
+ int val;
|
|
+
|
|
+ if (__uname (&buf) < 0)
|
|
+ return true;
|
|
+
|
|
+ for (int shift = 16; shift >= 0; shift -= 8)
|
|
+ {
|
|
+ for (val = 0; *p >= '0' && *p <= '9'; p++)
|
|
+ val = val * 10 + *p - '0';
|
|
+ kernel |= (val & 255) << shift;
|
|
+ if (*p++ != '.')
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (kernel >= 0x060200 || kernel == 0x050e00)
|
|
+ return true;
|
|
+ if (kernel >= 0x040f00)
|
|
+ return false;
|
|
+ return true;
|
|
+}
|
|
+
|
|
+#endif
|
|
+
|
|
static inline void
|
|
init_cpu_features (struct cpu_features *cpu_features)
|
|
{
|
|
@@ -126,6 +167,13 @@ init_cpu_features (struct cpu_features *cpu_features)
|
|
/* Check if SVE is supported. */
|
|
cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE;
|
|
|
|
+ cpu_features->prefer_sve_ifuncs = cpu_features->sve;
|
|
+
|
|
+#if __LINUX_KERNEL_VERSION < 0x060200
|
|
+ if (cpu_features->sve)
|
|
+ cpu_features->prefer_sve_ifuncs = prefer_sve_ifuncs ();
|
|
+#endif
|
|
+
|
|
/* Check if MOPS is supported. */
|
|
cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS;
|
|
}
|