From 168a13dcac503b8a673f16922b1dcbda79a4f0a9 Mon Sep 17 00:00:00 2001
From: MSVSphere Packaging Team <packager@msvsphere-os.ru>
Date: Fri, 29 Mar 2024 15:40:45 +0300
Subject: [PATCH] import gmp-6.1.2-12.el8

---
 .gitignore                        |    1 +
 .gmp.metadata                     |    1 +
 SOURCES/cve-2021-43618.patch      |   25 +
 SOURCES/gmp-6.0.0-debuginfo.patch |   21 +
 SOURCES/gmp-fcf-protection.patch  | 1985 +++++++++++++++++++++++++++++
 SOURCES/gmp-mparam.h              |   88 ++
 SOURCES/gmp.h                     |   88 ++
 SOURCES/ibm_z13_simd_part1.patch  |  596 +++++++++
 SOURCES/ibm_z13_simd_part2.patch  |  536 ++++++++
 SOURCES/ibm_z13_simd_part3.patch  |  139 ++
 SOURCES/ibm_z13_simd_part4.patch  |  151 +++
 SPECS/gmp.spec                    |  712 +++++++++++
 12 files changed, 4343 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .gmp.metadata
 create mode 100644 SOURCES/cve-2021-43618.patch
 create mode 100644 SOURCES/gmp-6.0.0-debuginfo.patch
 create mode 100644 SOURCES/gmp-fcf-protection.patch
 create mode 100644 SOURCES/gmp-mparam.h
 create mode 100644 SOURCES/gmp.h
 create mode 100644 SOURCES/ibm_z13_simd_part1.patch
 create mode 100644 SOURCES/ibm_z13_simd_part2.patch
 create mode 100644 SOURCES/ibm_z13_simd_part3.patch
 create mode 100644 SOURCES/ibm_z13_simd_part4.patch
 create mode 100644 SPECS/gmp.spec

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..30416dc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+SOURCES/gmp-6.1.2.tar.bz2
diff --git a/.gmp.metadata b/.gmp.metadata
new file mode 100644
index 0000000..7f96990
--- /dev/null
+++ b/.gmp.metadata
@@ -0,0 +1 @@
+366ded6a44cd108ba6b3f5b9a252eab3f3a95cdf SOURCES/gmp-6.1.2.tar.bz2
diff --git a/SOURCES/cve-2021-43618.patch b/SOURCES/cve-2021-43618.patch
new file mode 100644
index 0000000..f741972
--- /dev/null
+++ b/SOURCES/cve-2021-43618.patch
@@ -0,0 +1,25 @@
+
+# HG changeset patch
+# User Marco Bodrato <bodrato@mail.dm.unipi.it>
+# Date 1634836009 -7200
+# Node ID 561a9c25298e17bb01896801ff353546c6923dbd
+# Parent  e1fd9db13b475209a864577237ea4b9105b3e96e
+mpz/inp_raw.c: Avoid bit size overflows
+
+diff -r e1fd9db13b47 -r 561a9c25298e mpz/inp_raw.c
+--- a/mpz/inp_raw.c	Tue Dec 22 23:49:51 2020 +0100
++++ b/mpz/inp_raw.c	Thu Oct 21 19:06:49 2021 +0200
+@@ -88,8 +88,11 @@
+ 
+   abs_csize = ABS (csize);
+ 
++  if (UNLIKELY (abs_csize > ~(mp_bitcnt_t) 0 / 8))
++    return 0; /* Bit size overflows */
++
+   /* round up to a multiple of limbs */
+-  abs_xsize = BITS_TO_LIMBS (abs_csize*8);
++  abs_xsize = BITS_TO_LIMBS ((mp_bitcnt_t) abs_csize * 8);
+ 
+   if (abs_xsize != 0)
+     {
+
diff --git a/SOURCES/gmp-6.0.0-debuginfo.patch b/SOURCES/gmp-6.0.0-debuginfo.patch
new file mode 100644
index 0000000..bb72839
--- /dev/null
+++ b/SOURCES/gmp-6.0.0-debuginfo.patch
@@ -0,0 +1,21 @@
+diff -up wrk/mpn/m4-ccas.wrk wrk/mpn/m4-ccas
+--- wrk/mpn/m4-ccas.wrk	2015-04-02 16:44:03.645305407 +0200
++++ wrk/mpn/m4-ccas	2015-04-02 16:21:57.893870969 +0200
+@@ -104,4 +104,4 @@ echo "$CC"
+ $CC || exit
+ 
+ # Comment this out to preserve .s intermediates
+-rm -f $TMP
++#rm -f $TMP
+diff -up wrk/mpn/Makeasm.am.wrk wrk/mpn/Makeasm.am
+--- wrk/mpn/Makeasm.am.wrk	2015-04-02 16:42:41.692278742 +0200
++++ wrk/mpn/Makeasm.am	2015-04-02 16:21:57.891870945 +0200
+@@ -66,7 +66,7 @@ SUFFIXES = .s .S .asm
+ 
+ 
+ # can be overridden during development, eg. "make RM_TMP=: mul_1.lo"
+-RM_TMP = rm -f
++RM_TMP = true
+ 
+ 
+ # .S assembler, preprocessed with cpp.
diff --git a/SOURCES/gmp-fcf-protection.patch b/SOURCES/gmp-fcf-protection.patch
new file mode 100644
index 0000000..731cc8f
--- /dev/null
+++ b/SOURCES/gmp-fcf-protection.patch
@@ -0,0 +1,1985 @@
+diff --git a/mpn/x86_64/addaddmul_1msb0.asm b/mpn/x86_64/addaddmul_1msb0.asm
+index 87c21b4..2bfa122 100644
+--- a/mpn/x86_64/addaddmul_1msb0.asm
++++ b/mpn/x86_64/addaddmul_1msb0.asm
+@@ -168,3 +168,4 @@ L(end):	cmp	$1, R32(n)
+ 	pop	%r12
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/addmul_2.asm b/mpn/x86_64/addmul_2.asm
+index 18307d7..2999ce5 100644
+--- a/mpn/x86_64/addmul_2.asm
++++ b/mpn/x86_64/addmul_2.asm
+@@ -182,3 +182,4 @@ L(end):	xor	R32(w1), R32(w1)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/aorrlsh1_n.asm b/mpn/x86_64/aorrlsh1_n.asm
+index 6ee0872..9ebd7dc 100644
+--- a/mpn/x86_64/aorrlsh1_n.asm
++++ b/mpn/x86_64/aorrlsh1_n.asm
+@@ -168,3 +168,4 @@ ifdef(`OPERATION_rsblsh1_n',`
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/aorrlshC_n.asm b/mpn/x86_64/aorrlshC_n.asm
+index 5a9fd4d..c3d55a6 100644
+--- a/mpn/x86_64/aorrlshC_n.asm
++++ b/mpn/x86_64/aorrlshC_n.asm
+@@ -158,3 +158,4 @@ ifelse(ADDSUB,add,`
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/aorrlsh_n.asm b/mpn/x86_64/aorrlsh_n.asm
+index 5ca128f..7dd0bcf 100644
+--- a/mpn/x86_64/aorrlsh_n.asm
++++ b/mpn/x86_64/aorrlsh_n.asm
+@@ -174,3 +174,4 @@ L(end):	add	R32(%rbx), R32(%rbx)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/aors_err1_n.asm b/mpn/x86_64/aors_err1_n.asm
+index 54d0b3f..13a6af2 100644
+--- a/mpn/x86_64/aors_err1_n.asm
++++ b/mpn/x86_64/aors_err1_n.asm
+@@ -223,3 +223,4 @@ L(end):
+ 	pop	%rbx
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/aors_err2_n.asm b/mpn/x86_64/aors_err2_n.asm
+index ce5c2a4..0466f06 100644
+--- a/mpn/x86_64/aors_err2_n.asm
++++ b/mpn/x86_64/aors_err2_n.asm
+@@ -170,3 +170,4 @@ L(end):
+ 	pop	%rbx
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/aors_err3_n.asm b/mpn/x86_64/aors_err3_n.asm
+index bb6d0c5..cc5461f 100644
+--- a/mpn/x86_64/aors_err3_n.asm
++++ b/mpn/x86_64/aors_err3_n.asm
+@@ -154,3 +154,4 @@ L(end):
+ 	pop	%rbx
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/aors_n.asm b/mpn/x86_64/aors_n.asm
+index 8941f7a..361e04d 100644
+--- a/mpn/x86_64/aors_n.asm
++++ b/mpn/x86_64/aors_n.asm
+@@ -167,3 +167,4 @@ L(end):	lea	32(up), up
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/aorsmul_1.asm b/mpn/x86_64/aorsmul_1.asm
+index e3fc005..25d0c13 100644
+--- a/mpn/x86_64/aorsmul_1.asm
++++ b/mpn/x86_64/aorsmul_1.asm
+@@ -178,3 +178,4 @@ IFDOS(``pop	%rdi		'')
+ IFDOS(``pop	%rsi		'')
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/atom/addmul_2.asm b/mpn/x86_64/atom/addmul_2.asm
+index c1dcdc4..07ae7b8 100644
+--- a/mpn/x86_64/atom/addmul_2.asm
++++ b/mpn/x86_64/atom/addmul_2.asm
+@@ -184,3 +184,4 @@ L(end):	mul	v1
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/atom/aorrlsh1_n.asm b/mpn/x86_64/atom/aorrlsh1_n.asm
+index f44de19..f9d7bac 100644
+--- a/mpn/x86_64/atom/aorrlsh1_n.asm
++++ b/mpn/x86_64/atom/aorrlsh1_n.asm
+@@ -236,3 +236,4 @@ IFDOS(`	mov	56(%rsp), %r8	')
+ 	sbb	R32(%rbp), R32(%rbp)	C save acy
+ 	jmp	L(ent)
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/atom/aorrlsh2_n.asm b/mpn/x86_64/atom/aorrlsh2_n.asm
+index 02fb29d..5ea55b4 100644
+--- a/mpn/x86_64/atom/aorrlsh2_n.asm
++++ b/mpn/x86_64/atom/aorrlsh2_n.asm
+@@ -189,3 +189,4 @@ ifdef(`OPERATION_rsblsh2_n',`
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/atom/aorsmul_1.asm b/mpn/x86_64/atom/aorsmul_1.asm
+index e953153..6a12f96 100644
+--- a/mpn/x86_64/atom/aorsmul_1.asm
++++ b/mpn/x86_64/atom/aorsmul_1.asm
+@@ -188,3 +188,4 @@ L(cj1):	ADDSUB	%rax, (rp,n,8)
+ 	ret
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/atom/lshift.asm b/mpn/x86_64/atom/lshift.asm
+index 1b37d5d..15786cb 100644
+--- a/mpn/x86_64/atom/lshift.asm
++++ b/mpn/x86_64/atom/lshift.asm
+@@ -121,3 +121,4 @@ L(end):	shl	R8(%rcx), %r10
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/atom/lshiftc.asm b/mpn/x86_64/atom/lshiftc.asm
+index 7385f8f..3171d3c 100644
+--- a/mpn/x86_64/atom/lshiftc.asm
++++ b/mpn/x86_64/atom/lshiftc.asm
+@@ -125,3 +125,4 @@ L(end):	shl	R8(%rcx), %r10
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/atom/mul_1.asm b/mpn/x86_64/atom/mul_1.asm
+index d76a3d3..304c208 100644
+--- a/mpn/x86_64/atom/mul_1.asm
++++ b/mpn/x86_64/atom/mul_1.asm
+@@ -141,3 +141,4 @@ IFDOS(`	mov	56(%rsp), %r8	')
+ 	jmp	L(com)
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/atom/mul_2.asm b/mpn/x86_64/atom/mul_2.asm
+index f3fc3af..c7b78a7 100644
+--- a/mpn/x86_64/atom/mul_2.asm
++++ b/mpn/x86_64/atom/mul_2.asm
+@@ -184,3 +184,4 @@ L(end):	mul	v1
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/atom/redc_1.asm b/mpn/x86_64/atom/redc_1.asm
+index 62b9a84..eeb09d3 100644
+--- a/mpn/x86_64/atom/redc_1.asm
++++ b/mpn/x86_64/atom/redc_1.asm
+@@ -577,3 +577,4 @@ L(n4):	mov	-32(mp), %rax
+ 	jmp	L(cj)
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/atom/rsh1aors_n.asm b/mpn/x86_64/atom/rsh1aors_n.asm
+index 6f5f638..a589b89 100644
+--- a/mpn/x86_64/atom/rsh1aors_n.asm
++++ b/mpn/x86_64/atom/rsh1aors_n.asm
+@@ -285,3 +285,4 @@ L(cj1):	pop	%r15
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/atom/rshift.asm b/mpn/x86_64/atom/rshift.asm
+index 29c027d..c8b78bf 100644
+--- a/mpn/x86_64/atom/rshift.asm
++++ b/mpn/x86_64/atom/rshift.asm
+@@ -119,3 +119,4 @@ L(end):	shr	R8(cnt), %r10
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/atom/sublsh1_n.asm b/mpn/x86_64/atom/sublsh1_n.asm
+index 1306acd..574b25b 100644
+--- a/mpn/x86_64/atom/sublsh1_n.asm
++++ b/mpn/x86_64/atom/sublsh1_n.asm
+@@ -240,3 +240,4 @@ IFDOS(`	mov	56(%rsp), %r8	')
+ 	sbb	R32(%rbp), R32(%rbp)	C save acy
+ 	jmp	L(ent)
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/bd1/aorsmul_1.asm b/mpn/x86_64/bd1/aorsmul_1.asm
+index 96fec9f..ce76154 100644
+--- a/mpn/x86_64/bd1/aorsmul_1.asm
++++ b/mpn/x86_64/bd1/aorsmul_1.asm
+@@ -179,3 +179,4 @@ IFDOS(``pop	%rsi		'')
+ 	ret
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/bd1/mul_1.asm b/mpn/x86_64/bd1/mul_1.asm
+index e59667c..308f336 100644
+--- a/mpn/x86_64/bd1/mul_1.asm
++++ b/mpn/x86_64/bd1/mul_1.asm
+@@ -182,3 +182,4 @@ IFDOS(``pop	%rsi		'')
+ 	ret
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/bd1/mul_2.asm b/mpn/x86_64/bd1/mul_2.asm
+index 4ed5f30..f40cf47 100644
+--- a/mpn/x86_64/bd1/mul_2.asm
++++ b/mpn/x86_64/bd1/mul_2.asm
+@@ -190,3 +190,4 @@ L(end):	mov	-8(up,n,8), %rax
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/bd1/mul_basecase.asm b/mpn/x86_64/bd1/mul_basecase.asm
+index e47ba58..6d61cbc 100644
+--- a/mpn/x86_64/bd1/mul_basecase.asm
++++ b/mpn/x86_64/bd1/mul_basecase.asm
+@@ -414,3 +414,4 @@ L(ret2):pop	%rbp
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/bdiv_dbm1c.asm b/mpn/x86_64/bdiv_dbm1c.asm
+index a53bd52..f9c4aa0 100644
+--- a/mpn/x86_64/bdiv_dbm1c.asm
++++ b/mpn/x86_64/bdiv_dbm1c.asm
+@@ -104,3 +104,4 @@ L(lo1):	sub	%rax, %r8
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/bdiv_q_1.asm b/mpn/x86_64/bdiv_q_1.asm
+index 02eacbe..7bfa66d 100644
+--- a/mpn/x86_64/bdiv_q_1.asm
++++ b/mpn/x86_64/bdiv_q_1.asm
+@@ -165,3 +165,4 @@ L(one):	shr	R8(%rcx), %rax
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/bobcat/aors_n.asm b/mpn/x86_64/bobcat/aors_n.asm
+index 22287b8..1df1a08 100644
+--- a/mpn/x86_64/bobcat/aors_n.asm
++++ b/mpn/x86_64/bobcat/aors_n.asm
+@@ -148,3 +148,4 @@ PROLOGUE(func_nc)
+ IFDOS(`	mov	56(%rsp), %r8	')
+ 	jmp	L(ent)
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/bobcat/aorsmul_1.asm b/mpn/x86_64/bobcat/aorsmul_1.asm
+index 415a17c..79d81f4 100644
+--- a/mpn/x86_64/bobcat/aorsmul_1.asm
++++ b/mpn/x86_64/bobcat/aorsmul_1.asm
+@@ -181,3 +181,4 @@ IFDOS(`	pop	%rdi		')
+ IFDOS(`	pop	%rsi		')
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/bobcat/copyd.asm b/mpn/x86_64/bobcat/copyd.asm
+index 877714e..2f781a3 100644
+--- a/mpn/x86_64/bobcat/copyd.asm
++++ b/mpn/x86_64/bobcat/copyd.asm
+@@ -89,3 +89,4 @@ L(end):	cmp	$-4, R32(n)
+ L(ret):	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/bobcat/copyi.asm b/mpn/x86_64/bobcat/copyi.asm
+index ee0f578..ff249bc 100644
+--- a/mpn/x86_64/bobcat/copyi.asm
++++ b/mpn/x86_64/bobcat/copyi.asm
+@@ -92,3 +92,4 @@ L(end):	cmp	$4, R32(n)
+ L(ret):	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/bobcat/mul_1.asm b/mpn/x86_64/bobcat/mul_1.asm
+index ab428a8..b4f401b 100644
+--- a/mpn/x86_64/bobcat/mul_1.asm
++++ b/mpn/x86_64/bobcat/mul_1.asm
+@@ -185,3 +185,4 @@ IFDOS(`	pop	%rdi		')
+ IFDOS(`	pop	%rsi		')
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/bobcat/mul_basecase.asm b/mpn/x86_64/bobcat/mul_basecase.asm
+index e7d46bf..14c7b13 100644
+--- a/mpn/x86_64/bobcat/mul_basecase.asm
++++ b/mpn/x86_64/bobcat/mul_basecase.asm
+@@ -484,3 +484,4 @@ L(ret):	pop	%r13
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/bobcat/redc_1.asm b/mpn/x86_64/bobcat/redc_1.asm
+index d55b1e5..d686cfb 100644
+--- a/mpn/x86_64/bobcat/redc_1.asm
++++ b/mpn/x86_64/bobcat/redc_1.asm
+@@ -505,3 +505,4 @@ L(n3):	mov	-24(mp), %rax
+ 	jmp	L(ret)
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/bobcat/sqr_basecase.asm b/mpn/x86_64/bobcat/sqr_basecase.asm
+index 0e417a1..5693c46 100644
+--- a/mpn/x86_64/bobcat/sqr_basecase.asm
++++ b/mpn/x86_64/bobcat/sqr_basecase.asm
+@@ -563,3 +563,4 @@ L(esd):	add	%rbx, w0
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/cnd_aors_n.asm b/mpn/x86_64/cnd_aors_n.asm
+index 13a2ab3..35f30e7 100644
+--- a/mpn/x86_64/cnd_aors_n.asm
++++ b/mpn/x86_64/cnd_aors_n.asm
+@@ -181,3 +181,4 @@ L(end):	neg	R32(%rax)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/com.asm b/mpn/x86_64/com.asm
+index 006acaf..56b0747 100644
+--- a/mpn/x86_64/com.asm
++++ b/mpn/x86_64/com.asm
+@@ -93,3 +93,4 @@ L(e10):	movq	24(up,n,8), %r9
+ L(ret):	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/copyd.asm b/mpn/x86_64/copyd.asm
+index a5e6e59..020e287 100644
+--- a/mpn/x86_64/copyd.asm
++++ b/mpn/x86_64/copyd.asm
+@@ -91,3 +91,4 @@ L(end):	shr	R32(n)
+ 	mov	%r9, -16(rp)
+ 1:	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/copyi.asm b/mpn/x86_64/copyi.asm
+index bafce7a..1a4fb6d 100644
+--- a/mpn/x86_64/copyi.asm
++++ b/mpn/x86_64/copyi.asm
+@@ -90,3 +90,4 @@ L(end):	shr	R32(n)
+ 	mov	%r9, 16(rp)
+ 1:	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/core2/aors_err1_n.asm b/mpn/x86_64/core2/aors_err1_n.asm
+index 3f875ae..5162272 100644
+--- a/mpn/x86_64/core2/aors_err1_n.asm
++++ b/mpn/x86_64/core2/aors_err1_n.asm
+@@ -223,3 +223,4 @@ L(end):
+ 	pop	%rbx
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/core2/aors_n.asm b/mpn/x86_64/core2/aors_n.asm
+index 74a1bce..19078d8 100644
+--- a/mpn/x86_64/core2/aors_n.asm
++++ b/mpn/x86_64/core2/aors_n.asm
+@@ -139,3 +139,4 @@ IFDOS(`	mov	56(%rsp), %r8	')
+ 	jmp	L(start)
+ EPILOGUE()
+ 
++CF_PROT
+diff --git a/mpn/x86_64/core2/aorsmul_1.asm b/mpn/x86_64/core2/aorsmul_1.asm
+index 6b313dd..392f4de 100644
+--- a/mpn/x86_64/core2/aorsmul_1.asm
++++ b/mpn/x86_64/core2/aorsmul_1.asm
+@@ -176,3 +176,4 @@ L(n1):	mov	8(rp), %r10
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/core2/divrem_1.asm b/mpn/x86_64/core2/divrem_1.asm
+index 1b3f139..0a67dc3 100644
+--- a/mpn/x86_64/core2/divrem_1.asm
++++ b/mpn/x86_64/core2/divrem_1.asm
+@@ -241,3 +241,4 @@ L(ret):	pop	%rbx
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/core2/gcd_1.asm b/mpn/x86_64/core2/gcd_1.asm
+index bdb940c..452b763 100644
+--- a/mpn/x86_64/core2/gcd_1.asm
++++ b/mpn/x86_64/core2/gcd_1.asm
+@@ -144,3 +144,4 @@ L(end):	pop	%rcx
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/core2/lshift.asm b/mpn/x86_64/core2/lshift.asm
+index 8ccafec..00b39b8 100644
+--- a/mpn/x86_64/core2/lshift.asm
++++ b/mpn/x86_64/core2/lshift.asm
+@@ -147,3 +147,4 @@ L(end):	shld	R8(cnt), %r8, %r11
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/core2/lshiftc.asm b/mpn/x86_64/core2/lshiftc.asm
+index 65c7b2f..4d3acfe 100644
+--- a/mpn/x86_64/core2/lshiftc.asm
++++ b/mpn/x86_64/core2/lshiftc.asm
+@@ -157,3 +157,4 @@ L(end):	shld	R8(cnt), %r8, %r11
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/core2/mul_basecase.asm b/mpn/x86_64/core2/mul_basecase.asm
+index d16be85..04cd4c2 100644
+--- a/mpn/x86_64/core2/mul_basecase.asm
++++ b/mpn/x86_64/core2/mul_basecase.asm
+@@ -973,3 +973,4 @@ L(lo3):	mul	v0
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/core2/mullo_basecase.asm b/mpn/x86_64/core2/mullo_basecase.asm
+index 0f03d86..efed03d 100644
+--- a/mpn/x86_64/core2/mullo_basecase.asm
++++ b/mpn/x86_64/core2/mullo_basecase.asm
+@@ -425,3 +425,4 @@ L(n3):	mov	(vp_param), %r9
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/core2/redc_1.asm b/mpn/x86_64/core2/redc_1.asm
+index 8c296fd..d98f56f 100644
+--- a/mpn/x86_64/core2/redc_1.asm
++++ b/mpn/x86_64/core2/redc_1.asm
+@@ -428,3 +428,4 @@ L(n4):	mov	-32(mp), %rax
+ 	jmp	L(add_n)
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/core2/rsh1aors_n.asm b/mpn/x86_64/core2/rsh1aors_n.asm
+index 27eed37..579fec6 100644
+--- a/mpn/x86_64/core2/rsh1aors_n.asm
++++ b/mpn/x86_64/core2/rsh1aors_n.asm
+@@ -167,3 +167,4 @@ L(end):	shrd	$1, %rbx, %rbp
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/core2/rshift.asm b/mpn/x86_64/core2/rshift.asm
+index ab32ec8..97f4429 100644
+--- a/mpn/x86_64/core2/rshift.asm
++++ b/mpn/x86_64/core2/rshift.asm
+@@ -145,3 +145,4 @@ L(end):	shrd	R8(cnt), %r8, %r11
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/core2/sqr_basecase.asm b/mpn/x86_64/core2/sqr_basecase.asm
+index a112c1b..0ee6ca3 100644
+--- a/mpn/x86_64/core2/sqr_basecase.asm
++++ b/mpn/x86_64/core2/sqr_basecase.asm
+@@ -982,3 +982,4 @@ L(n3):	mov	%rax, %r10
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/core2/sublshC_n.asm b/mpn/x86_64/core2/sublshC_n.asm
+index 5acc46b..7a48dfb 100644
+--- a/mpn/x86_64/core2/sublshC_n.asm
++++ b/mpn/x86_64/core2/sublshC_n.asm
+@@ -156,3 +156,4 @@ L(end):	shr	$RSH, %r11
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreibwl/addmul_1.asm b/mpn/x86_64/coreibwl/addmul_1.asm
+index aaa58e7..4fb79f9 100644
+--- a/mpn/x86_64/coreibwl/addmul_1.asm
++++ b/mpn/x86_64/coreibwl/addmul_1.asm
+@@ -107,33 +107,39 @@ L(tab):	JMPENT(	L(f0), L(tab))
+ 	JMPENT(	L(f7), L(tab))
+ 	TEXT
+ 
+-L(f0):	mulx(	(up), %r10, %r8)
++L(f0):	CFPROT_ENDBR
++	mulx(	(up), %r10, %r8)
+ 	lea	-8(up), up
+ 	lea	-8(rp), rp
+ 	lea	-1(n), n
+ 	jmp	L(b0)
+ 
+-L(f3):	mulx(	(up), %r9, %rax)
++L(f3):	CFPROT_ENDBR
++	mulx(	(up), %r9, %rax)
+ 	lea	16(up), up
+ 	lea	-48(rp), rp
+ 	jmp	L(b3)
+ 
+-L(f4):	mulx(	(up), %r10, %r8)
++L(f4):	CFPROT_ENDBR
++	mulx(	(up), %r10, %r8)
+ 	lea	24(up), up
+ 	lea	-40(rp), rp
+ 	jmp	L(b4)
+ 
+-L(f5):	mulx(	(up), %r9, %rax)
++L(f5):	CFPROT_ENDBR
++	mulx(	(up), %r9, %rax)
+ 	lea	32(up), up
+ 	lea	-32(rp), rp
+ 	jmp	L(b5)
+ 
+-L(f6):	mulx(	(up), %r10, %r8)
++L(f6):	CFPROT_ENDBR
++	mulx(	(up), %r10, %r8)
+ 	lea	40(up), up
+ 	lea	-24(rp), rp
+ 	jmp	L(b6)
+ 
+-L(f1):	mulx(	(up), %r9, %rax)
++L(f1):	CFPROT_ENDBR
++	mulx(	(up), %r9, %rax)
+ 	jrcxz	L(1)
+ 	jmp	L(b1)
+ L(1):	add	(rp), %r9
+@@ -151,7 +157,8 @@ ifdef(`PIC',
+ `	nop;nop;nop;nop',
+ `	nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop')
+ 
+-L(f2):	mulx(	(up), %r10, %r8)
++L(f2):	CFPROT_ENDBR
++	mulx(	(up), %r10, %r8)
+ 	lea	8(up), up
+ 	lea	8(rp), rp
+ 	mulx(	(up), %r9, %rax)
+@@ -195,9 +202,11 @@ L(b3):	adox(	48,(rp), %r9)
+ 	mulx(	(up), %r9, %rax)
+ 	jmp	L(top)
+ 
+-L(f7):	mulx(	(up), %r9, %rax)
++L(f7):	CFPROT_ENDBR
++	mulx(	(up), %r9, %rax)
+ 	lea	-16(up), up
+ 	lea	-16(rp), rp
+ 	jmp	L(b7)
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/coreibwl/mul_1.asm b/mpn/x86_64/coreibwl/mul_1.asm
+index a271e6c..4fe4822 100644
+--- a/mpn/x86_64/coreibwl/mul_1.asm
++++ b/mpn/x86_64/coreibwl/mul_1.asm
+@@ -106,48 +106,56 @@ L(tab):	JMPENT(	L(f0), L(tab))
+ 	JMPENT(	L(f7), L(tab))
+ 	TEXT
+ 
+-L(f0):	mulx(	(up), %r10, %r8)
++L(f0):	CFPROT_ENDBR
++	mulx(	(up), %r10, %r8)
+ 	lea	56(up), up
+ 	lea	-8(rp), rp
+ 	jmp	L(b0)
+ 
+-L(f3):	mulx(	(up), %r9, %rax)
++L(f3):	CFPROT_ENDBR
++	mulx(	(up), %r9, %rax)
+ 	lea	16(up), up
+ 	lea	16(rp), rp
+ 	inc	n
+ 	jmp	L(b3)
+ 
+-L(f4):	mulx(	(up), %r10, %r8)
++L(f4):	CFPROT_ENDBR
++	mulx(	(up), %r10, %r8)
+ 	lea	24(up), up
+ 	lea	24(rp), rp
+ 	inc	n
+ 	jmp	L(b4)
+ 
+-L(f5):	mulx(	(up), %r9, %rax)
++L(f5):	CFPROT_ENDBR
++	mulx(	(up), %r9, %rax)
+ 	lea	32(up), up
+ 	lea	32(rp), rp
+ 	inc	n
+ 	jmp	L(b5)
+ 
+-L(f6):	mulx(	(up), %r10, %r8)
++L(f6):	CFPROT_ENDBR
++	mulx(	(up), %r10, %r8)
+ 	lea	40(up), up
+ 	lea	40(rp), rp
+ 	inc	n
+ 	jmp	L(b6)
+ 
+-L(f7):	mulx(	(up), %r9, %rax)
++L(f7):	CFPROT_ENDBR
++	mulx(	(up), %r9, %rax)
+ 	lea	48(up), up
+ 	lea	48(rp), rp
+ 	inc	n
+ 	jmp	L(b7)
+ 
+-L(f1):	mulx(	(up), %r9, %rax)
++L(f1):	CFPROT_ENDBR
++	mulx(	(up), %r9, %rax)
+ 	test	n, n
+ 	jnz	L(b1)
+ L(1):	mov	%r9, (rp)
+ 	ret
+ 
+-L(f2):	mulx(	(up), %r10, %r8)
++L(f2):	CFPROT_ENDBR
++	mulx(	(up), %r10, %r8)
+ 	lea	8(up), up
+ 	lea	8(rp), rp
+ 	mulx(	(up), %r9, %rax)
+@@ -191,3 +199,4 @@ L(end):	mov	%r10, -8(rp)
+ 	ret
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/coreibwl/mul_basecase.asm b/mpn/x86_64/coreibwl/mul_basecase.asm
+index 50f3ce5..74cd67c 100644
+--- a/mpn/x86_64/coreibwl/mul_basecase.asm
++++ b/mpn/x86_64/coreibwl/mul_basecase.asm
+@@ -155,45 +155,53 @@ ifdef(`PIC',
+ 	jmp	*(%r10,%rax,8)
+ ')
+ 
+-L(mf0):	mulx(	(up), w2, w3)
++L(mf0):	CFPROT_ENDBR
++	mulx(	(up), w2, w3)
+ 	lea	56(up), up
+ 	lea	-8(rp), rp
+ 	jmp	L(mb0)
+ 
+-L(mf3):	mulx(	(up), w0, w1)
++L(mf3):	CFPROT_ENDBR
++	mulx(	(up), w0, w1)
+ 	lea	16(up), up
+ 	lea	16(rp), rp
+ 	inc	n
+ 	jmp	L(mb3)
+ 
+-L(mf4):	mulx(	(up), w2, w3)
++L(mf4):	CFPROT_ENDBR
++	mulx(	(up), w2, w3)
+ 	lea	24(up), up
+ 	lea	24(rp), rp
+ 	inc	n
+ 	jmp	L(mb4)
+ 
+-L(mf5):	mulx(	(up), w0, w1)
++L(mf5):	CFPROT_ENDBR
++	mulx(	(up), w0, w1)
+ 	lea	32(up), up
+ 	lea	32(rp), rp
+ 	inc	n
+ 	jmp	L(mb5)
+ 
+-L(mf6):	mulx(	(up), w2, w3)
++L(mf6):	CFPROT_ENDBR
++	mulx(	(up), w2, w3)
+ 	lea	40(up), up
+ 	lea	40(rp), rp
+ 	inc	n
+ 	jmp	L(mb6)
+ 
+-L(mf7):	mulx(	(up), w0, w1)
++L(mf7):	CFPROT_ENDBR
++	mulx(	(up), w0, w1)
+ 	lea	48(up), up
+ 	lea	48(rp), rp
+ 	inc	n
+ 	jmp	L(mb7)
+ 
+-L(mf1):	mulx(	(up), w0, w1)
++L(mf1):	CFPROT_ENDBR
++	mulx(	(up), w0, w1)
+ 	jmp	L(mb1)
+ 
+-L(mf2):	mulx(	(up), w2, w3)
++L(mf2):	CFPROT_ENDBR
++	mulx(	(up), w2, w3)
+ 	lea	8(up), up
+ 	lea	8(rp), rp
+ 	mulx(	(up), w0, w1)
+@@ -254,32 +262,39 @@ L(outer):
+ 	lea	8(vp), vp
+ 	jmp	*jaddr
+ 
+-L(f0):	mulx(	8,(up), w2, w3)
++L(f0):	CFPROT_ENDBR
++	mulx(	8,(up), w2, w3)
+ 	lea	8(rp,unneg,8), rp
+ 	lea	-1(n), n
+ 	jmp	L(b0)
+ 
+-L(f3):	mulx(	-16,(up), w0, w1)
++L(f3):	CFPROT_ENDBR
++	mulx(	-16,(up), w0, w1)
+ 	lea	-56(rp,unneg,8), rp
+ 	jmp	L(b3)
+ 
+-L(f4):	mulx(	-24,(up), w2, w3)
++L(f4):	CFPROT_ENDBR
++	mulx(	-24,(up), w2, w3)
+ 	lea	-56(rp,unneg,8), rp
+ 	jmp	L(b4)
+ 
+-L(f5):	mulx(	-32,(up), w0, w1)
++L(f5):	CFPROT_ENDBR
++	mulx(	-32,(up), w0, w1)
+ 	lea	-56(rp,unneg,8), rp
+ 	jmp	L(b5)
+ 
+-L(f6):	mulx(	-40,(up), w2, w3)
++L(f6):	CFPROT_ENDBR
++	mulx(	-40,(up), w2, w3)
+ 	lea	-56(rp,unneg,8), rp
+ 	jmp	L(b6)
+ 
+-L(f7):	mulx(	16,(up), w0, w1)
++L(f7):	CFPROT_ENDBR
++	mulx(	16,(up), w0, w1)
+ 	lea	8(rp,unneg,8), rp
+ 	jmp	L(b7)
+ 
+-L(f1):	mulx(	(up), w0, w1)
++L(f1):	CFPROT_ENDBR
++	mulx(	(up), w0, w1)
+ 	lea	8(rp,unneg,8), rp
+ 	jmp	L(b1)
+ 
+@@ -300,7 +315,7 @@ L(done):
+ 	FUNC_EXIT()
+ 	ret
+ 
+-L(f2):
++L(f2):	CFPROT_ENDBR
+ 	mulx(	-8,(up), w2, w3)
+ 	lea	8(rp,unneg,8), rp
+ 	mulx(	(up), w0, w1)
+@@ -365,3 +380,4 @@ L(atab):JMPENT(	L(f0), L(atab))
+ 	JMPENT(	L(f7), L(atab))
+ 	TEXT
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreibwl/sqr_basecase.asm b/mpn/x86_64/coreibwl/sqr_basecase.asm
+index 447ba00..64e8298 100644
+--- a/mpn/x86_64/coreibwl/sqr_basecase.asm
++++ b/mpn/x86_64/coreibwl/sqr_basecase.asm
+@@ -184,42 +184,50 @@ ifdef(`PIC',
+ 	jmp	*(%r10,%rax,8)
+ ')
+ 
+-L(mf0):	mulx(	8,(up), w2, w3)
++L(mf0):	CFPROT_ENDBR
++	mulx(	8,(up), w2, w3)
+ 	lea	64(up), up
+ C	lea	(rp), rp
+ 	jmp	L(mb0)
+ 
+-L(mf3):	mulx(	8,(up), w0, w1)
++L(mf3):	CFPROT_ENDBR
++	mulx(	8,(up), w0, w1)
+ 	lea	24(up), up
+ 	lea	24(rp), rp
+ 	jmp	L(mb3)
+ 
+-L(mf4):	mulx(	8,(up), w2, w3)
++L(mf4):	CFPROT_ENDBR
++	mulx(	8,(up), w2, w3)
+ 	lea	32(up), up
+ 	lea	32(rp), rp
+ 	jmp	L(mb4)
+ 
+-L(mf5):	mulx(	8,(up), w0, w1)
++L(mf5):	CFPROT_ENDBR
++	mulx(	8,(up), w0, w1)
+ 	lea	40(up), up
+ 	lea	40(rp), rp
+ 	jmp	L(mb5)
+ 
+-L(mf6):	mulx(	8,(up), w2, w3)
++L(mf6):	CFPROT_ENDBR
++	mulx(	8,(up), w2, w3)
+ 	lea	48(up), up
+ 	lea	48(rp), rp
+ 	jmp	L(mb6)
+ 
+-L(mf7):	mulx(	8,(up), w0, w1)
++L(mf7):	CFPROT_ENDBR
++	mulx(	8,(up), w0, w1)
+ 	lea	56(up), up
+ 	lea	56(rp), rp
+ 	jmp	L(mb7)
+ 
+-L(mf1):	mulx(	8,(up), w0, w1)
++L(mf1):	CFPROT_ENDBR
++	mulx(	8,(up), w0, w1)
+ 	lea	8(up), up
+ 	lea	8(rp), rp
+ 	jmp	L(mb1)
+ 
+-L(mf2):	mulx(	8,(up), w2, w3)
++L(mf2):	CFPROT_ENDBR
++	mulx(	8,(up), w2, w3)
+ 	lea	16(up), up
+ 	lea	16(rp), rp
+ 	dec	R32(n)
+@@ -275,7 +283,8 @@ L(ed0):	adox(	(rp), w0)
+ 	mov	w0, (rp)
+ 	adc	%rcx, w1		C relies on rcx = 0
+ 	mov	w1, 8(rp)
+-L(f7):	lea	-64(up,un_save,8), up
++L(f7):	CFPROT_ENDBR
++	lea	-64(up,un_save,8), up
+ 	or	R32(un_save), R32(n)
+ 	mov	8(up), u0
+ 	mulx(	16,(up), w0, w1)
+@@ -326,7 +335,8 @@ L(ed1):	adox(	(rp), w0)
+ 	mov	w0, (rp)
+ 	adc	%rcx, w1		C relies on rcx = 0
+ 	mov	w1, 8(rp)
+-L(f0):	lea	-64(up,un_save,8), up
++L(f0):	CFPROT_ENDBR
++	lea	-64(up,un_save,8), up
+ 	or	R32(un_save), R32(n)
+ 	mov	(up), u0
+ 	mulx(	8,(up), w2, w3)
+@@ -377,7 +387,8 @@ L(ed2):	adox(	(rp), w0)
+ 	mov	w0, (rp)
+ 	adc	%rcx, w1		C relies on rcx = 0
+ 	mov	w1, 8(rp)
+-L(f1):	lea	(up,un_save,8), up
++L(f1):	CFPROT_ENDBR
++	lea	(up,un_save,8), up
+ 	or	R32(un_save), R32(n)
+ 	lea	8(un_save), un_save
+ 	mov	-8(up), u0
+@@ -429,7 +440,8 @@ L(ed3):	adox(	(rp), w0)
+ 	mov	w0, (rp)
+ 	adc	%rcx, w1		C relies on rcx = 0
+ 	mov	w1, 8(rp)
+-L(f2):	lea	(up,un_save,8), up
++L(f2):	CFPROT_ENDBR
++	lea	(up,un_save,8), up
+ 	or	R32(un_save), R32(n)
+ 	jz	L(corner2)
+ 	mov	-16(up), u0
+@@ -482,7 +494,8 @@ L(ed4):	adox(	(rp), w0)
+ 	mov	w0, (rp)
+ 	adc	%rcx, w1		C relies on rcx = 0
+ 	mov	w1, 8(rp)
+-L(f3):	lea	(up,un_save,8), up
++L(f3):	CFPROT_ENDBR
++	lea	(up,un_save,8), up
+ 	or	R32(un_save), R32(n)
+ 	jz	L(corner3)
+ 	mov	-24(up), u0
+@@ -534,7 +547,8 @@ L(ed5):	adox(	(rp), w0)
+ 	mov	w0, (rp)
+ 	adc	%rcx, w1		C relies on rcx = 0
+ 	mov	w1, 8(rp)
+-L(f4):	lea	(up,un_save,8), up
++L(f4):	CFPROT_ENDBR
++	lea	(up,un_save,8), up
+ 	or	R32(un_save), R32(n)
+ 	mov	-32(up), u0
+ 	mulx(	-24,(up), w2, w3)
+@@ -585,7 +599,8 @@ L(ed6):	adox(	(rp), w0)
+ 	mov	w0, (rp)
+ 	adc	%rcx, w1		C relies on rcx = 0
+ 	mov	w1, 8(rp)
+-L(f5):	lea	(up,un_save,8), up
++L(f5):	CFPROT_ENDBR
++	lea	(up,un_save,8), up
+ 	or	R32(un_save), R32(n)
+ 	mov	-40(up), u0
+ 	mulx(	-32,(up), w0, w1)
+@@ -636,7 +651,8 @@ L(ed7):	adox(	(rp), w0)
+ 	mov	w0, (rp)
+ 	adc	%rcx, w1		C relies on rcx = 0
+ 	mov	w1, 8(rp)
+-L(f6):	lea	(up,un_save,8), up
++L(f6):	CFPROT_ENDBR
++	lea	(up,un_save,8), up
+ 	or	R32(un_save), R32(n)
+ 	mov	-48(up), u0
+ 	mulx(	-40,(up), w2, w3)
+@@ -838,3 +854,4 @@ L(atab):JMPENT(	L(f6), L(atab))
+ 	JMPENT(	L(f5), L(atab))
+ 	TEXT
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreihwl/addmul_2.asm b/mpn/x86_64/coreihwl/addmul_2.asm
+index 54aebc8..2a5f996 100644
+--- a/mpn/x86_64/coreihwl/addmul_2.asm
++++ b/mpn/x86_64/coreihwl/addmul_2.asm
+@@ -236,3 +236,4 @@ L(end):	mulx(	v0, %rax, w3)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreihwl/aorsmul_1.asm b/mpn/x86_64/coreihwl/aorsmul_1.asm
+index fd5a26d..8c03b17 100644
+--- a/mpn/x86_64/coreihwl/aorsmul_1.asm
++++ b/mpn/x86_64/coreihwl/aorsmul_1.asm
+@@ -196,3 +196,4 @@ L(ret):	pop	%r13
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreihwl/mul_1.asm b/mpn/x86_64/coreihwl/mul_1.asm
+index 1e3c338..b6463f9 100644
+--- a/mpn/x86_64/coreihwl/mul_1.asm
++++ b/mpn/x86_64/coreihwl/mul_1.asm
+@@ -153,3 +153,4 @@ L(cj1):	mov	%rbx, 24(rp)
+ 	ret
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/coreihwl/mul_2.asm b/mpn/x86_64/coreihwl/mul_2.asm
+index 5bdb1aa..21defe9 100644
+--- a/mpn/x86_64/coreihwl/mul_2.asm
++++ b/mpn/x86_64/coreihwl/mul_2.asm
+@@ -171,3 +171,4 @@ L(end):	mulx(	v1, %rdx, %rax)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreihwl/mul_basecase.asm b/mpn/x86_64/coreihwl/mul_basecase.asm
+index b2656c8..e4a8381 100644
+--- a/mpn/x86_64/coreihwl/mul_basecase.asm
++++ b/mpn/x86_64/coreihwl/mul_basecase.asm
+@@ -439,3 +439,4 @@ L(ret2):pop	%rbp
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreihwl/mullo_basecase.asm b/mpn/x86_64/coreihwl/mullo_basecase.asm
+index 9986e8b..6756802 100644
+--- a/mpn/x86_64/coreihwl/mullo_basecase.asm
++++ b/mpn/x86_64/coreihwl/mullo_basecase.asm
+@@ -424,3 +424,4 @@ L(n3):	mov	(vp), %r9
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreihwl/redc_1.asm b/mpn/x86_64/coreihwl/redc_1.asm
+index b1d6c0a..b8b4a9e 100644
+--- a/mpn/x86_64/coreihwl/redc_1.asm
++++ b/mpn/x86_64/coreihwl/redc_1.asm
+@@ -435,3 +435,4 @@ L(ret):	pop	%r15
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreihwl/sqr_basecase.asm b/mpn/x86_64/coreihwl/sqr_basecase.asm
+index 641cdf3..8e83470 100644
+--- a/mpn/x86_64/coreihwl/sqr_basecase.asm
++++ b/mpn/x86_64/coreihwl/sqr_basecase.asm
+@@ -504,3 +504,4 @@ L(dend):adc	%rbx, %rdx
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreinhm/aorrlsh_n.asm b/mpn/x86_64/coreinhm/aorrlsh_n.asm
+index eed64e7..b1a4610 100644
+--- a/mpn/x86_64/coreinhm/aorrlsh_n.asm
++++ b/mpn/x86_64/coreinhm/aorrlsh_n.asm
+@@ -198,3 +198,4 @@ IFDOS(`	mov	64(%rsp), %r9	')	C cy
+ 	sbb	R32(%rbx), R32(%rbx)	C initialise CF save register
+ 	jmp	L(ent)
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreinhm/aorsmul_1.asm b/mpn/x86_64/coreinhm/aorsmul_1.asm
+index b768905..e2d96a8 100644
+--- a/mpn/x86_64/coreinhm/aorsmul_1.asm
++++ b/mpn/x86_64/coreinhm/aorsmul_1.asm
+@@ -185,3 +185,4 @@ L(end):	mul	v0
+ 	ret
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/coreinhm/redc_1.asm b/mpn/x86_64/coreinhm/redc_1.asm
+index fc71c1b..782da6b 100644
+--- a/mpn/x86_64/coreinhm/redc_1.asm
++++ b/mpn/x86_64/coreinhm/redc_1.asm
+@@ -547,3 +547,4 @@ L(n3):	mov	-24(mp), %rax
+ 	jmp	L(ret)
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/coreisbr/addmul_2.asm b/mpn/x86_64/coreisbr/addmul_2.asm
+index 21f0bf4..e6ffe3e 100644
+--- a/mpn/x86_64/coreisbr/addmul_2.asm
++++ b/mpn/x86_64/coreisbr/addmul_2.asm
+@@ -222,3 +222,4 @@ L(end):	mul	v1
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreisbr/aorrlshC_n.asm b/mpn/x86_64/coreisbr/aorrlshC_n.asm
+index 23ace41..75a9b8c 100644
+--- a/mpn/x86_64/coreisbr/aorrlshC_n.asm
++++ b/mpn/x86_64/coreisbr/aorrlshC_n.asm
+@@ -171,3 +171,4 @@ L(end):	shr	$RSH, %rbp
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreisbr/aorrlsh_n.asm b/mpn/x86_64/coreisbr/aorrlsh_n.asm
+index db8ee68..611dcb2 100644
+--- a/mpn/x86_64/coreisbr/aorrlsh_n.asm
++++ b/mpn/x86_64/coreisbr/aorrlsh_n.asm
+@@ -213,3 +213,4 @@ IFDOS(`	mov	64(%rsp), %r9	')	C cy
+ 	sbb	R32(%rbx), R32(%rbx)	C initialise CF save register
+ 	jmp	L(ent)
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreisbr/aors_n.asm b/mpn/x86_64/coreisbr/aors_n.asm
+index 01abf78..07fef16 100644
+--- a/mpn/x86_64/coreisbr/aors_n.asm
++++ b/mpn/x86_64/coreisbr/aors_n.asm
+@@ -196,3 +196,4 @@ PROLOGUE(func_nc)
+ IFDOS(`	mov	56(%rsp), %r8	')
+ 	jmp	L(ent)
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreisbr/aorsmul_1.asm b/mpn/x86_64/coreisbr/aorsmul_1.asm
+index 9f01d9c..41b8016 100644
+--- a/mpn/x86_64/coreisbr/aorsmul_1.asm
++++ b/mpn/x86_64/coreisbr/aorsmul_1.asm
+@@ -207,3 +207,4 @@ IFDOS(``pop	%rsi		'')
+ 	ret
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/coreisbr/mul_1.asm b/mpn/x86_64/coreisbr/mul_1.asm
+index ded7d89..a30f00b 100644
+--- a/mpn/x86_64/coreisbr/mul_1.asm
++++ b/mpn/x86_64/coreisbr/mul_1.asm
+@@ -159,3 +159,4 @@ IFDOS(``pop	%rdi		'')
+ IFDOS(``pop	%rsi		'')
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreisbr/mul_2.asm b/mpn/x86_64/coreisbr/mul_2.asm
+index ffee78a..991820b 100644
+--- a/mpn/x86_64/coreisbr/mul_2.asm
++++ b/mpn/x86_64/coreisbr/mul_2.asm
+@@ -161,3 +161,4 @@ L(end):	mul	v0
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreisbr/mul_basecase.asm b/mpn/x86_64/coreisbr/mul_basecase.asm
+index 35fd1cc..063664b 100644
+--- a/mpn/x86_64/coreisbr/mul_basecase.asm
++++ b/mpn/x86_64/coreisbr/mul_basecase.asm
+@@ -405,3 +405,4 @@ L(ret2):pop	%rbp
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreisbr/mullo_basecase.asm b/mpn/x86_64/coreisbr/mullo_basecase.asm
+index a41a8ac..1b75c78 100644
+--- a/mpn/x86_64/coreisbr/mullo_basecase.asm
++++ b/mpn/x86_64/coreisbr/mullo_basecase.asm
+@@ -382,3 +382,4 @@ L(n3):	mov	(vp_param), %r9
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreisbr/popcount.asm b/mpn/x86_64/coreisbr/popcount.asm
+index a5be33e..426d3a6 100644
+--- a/mpn/x86_64/coreisbr/popcount.asm
++++ b/mpn/x86_64/coreisbr/popcount.asm
+@@ -116,3 +116,4 @@ L(cj1):	add	%r11, %rax
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreisbr/redc_1.asm b/mpn/x86_64/coreisbr/redc_1.asm
+index f0dbe07..710e60e 100644
+--- a/mpn/x86_64/coreisbr/redc_1.asm
++++ b/mpn/x86_64/coreisbr/redc_1.asm
+@@ -544,3 +544,4 @@ L(n3):	mov	-32(mp), %rax
+ 	jmp	L(cj)
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/coreisbr/rsh1aors_n.asm b/mpn/x86_64/coreisbr/rsh1aors_n.asm
+index fd2eaea..d390ff3 100644
+--- a/mpn/x86_64/coreisbr/rsh1aors_n.asm
++++ b/mpn/x86_64/coreisbr/rsh1aors_n.asm
+@@ -191,3 +191,4 @@ L(end):	shrd	$1, %rbx, %rbp
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/coreisbr/sqr_basecase.asm b/mpn/x86_64/coreisbr/sqr_basecase.asm
+index 46a3612..4d4e545 100644
+--- a/mpn/x86_64/coreisbr/sqr_basecase.asm
++++ b/mpn/x86_64/coreisbr/sqr_basecase.asm
+@@ -482,3 +482,4 @@ L(dend):add	%r8, %r10
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/div_qr_1n_pi1.asm b/mpn/x86_64/div_qr_1n_pi1.asm
+index cb072e9..5a4f195 100644
+--- a/mpn/x86_64/div_qr_1n_pi1.asm
++++ b/mpn/x86_64/div_qr_1n_pi1.asm
+@@ -245,3 +245,4 @@ L(q_incr_loop):
+ 	lea	8(U1), U1
+ 	jmp	L(q_incr_loop)
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/div_qr_2n_pi1.asm b/mpn/x86_64/div_qr_2n_pi1.asm
+index 5e59a0a..252781c 100644
+--- a/mpn/x86_64/div_qr_2n_pi1.asm
++++ b/mpn/x86_64/div_qr_2n_pi1.asm
+@@ -156,3 +156,4 @@ L(fix):	C Unlikely update. u2 >= d1
+ 	sbb	d1, u2
+ 	jmp	L(bck)
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/div_qr_2u_pi1.asm b/mpn/x86_64/div_qr_2u_pi1.asm
+index 85af96f..b47209e 100644
+--- a/mpn/x86_64/div_qr_2u_pi1.asm
++++ b/mpn/x86_64/div_qr_2u_pi1.asm
+@@ -198,3 +198,4 @@ L(fix_qh):	C Unlikely update. u2 >= d1
+ 	sbb	d1, u2
+ 	jmp	L(bck_qh)
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/dive_1.asm b/mpn/x86_64/dive_1.asm
+index 988bdab..b401112 100644
+--- a/mpn/x86_64/dive_1.asm
++++ b/mpn/x86_64/dive_1.asm
+@@ -156,3 +156,4 @@ L(one):	shr	R8(%rcx), %rax
+ 	ret
+ 
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/divrem_1.asm b/mpn/x86_64/divrem_1.asm
+index d4d61ad..0417756 100644
+--- a/mpn/x86_64/divrem_1.asm
++++ b/mpn/x86_64/divrem_1.asm
+@@ -312,3 +312,4 @@ L(ret):	pop	%rbx
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/divrem_2.asm b/mpn/x86_64/divrem_2.asm
+index 296c9b6..73aa740 100644
+--- a/mpn/x86_64/divrem_2.asm
++++ b/mpn/x86_64/divrem_2.asm
+@@ -188,3 +188,4 @@ L(fix):	seta	%dl
+ 	sbb	%r11, %rbx
+ 	jmp	L(bck)
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastavx/copyd.asm b/mpn/x86_64/fastavx/copyd.asm
+index 56d472f..8d4f651 100644
+--- a/mpn/x86_64/fastavx/copyd.asm
++++ b/mpn/x86_64/fastavx/copyd.asm
+@@ -170,3 +170,4 @@ L(bc):	test	$4, R8(n)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastavx/copyi.asm b/mpn/x86_64/fastavx/copyi.asm
+index 7607747..3364aa9 100644
+--- a/mpn/x86_64/fastavx/copyi.asm
++++ b/mpn/x86_64/fastavx/copyi.asm
+@@ -167,3 +167,4 @@ L(bc):	test	$4, R8(n)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastsse/com-palignr.asm b/mpn/x86_64/fastsse/com-palignr.asm
+index c7155d1..191e5d9 100644
+--- a/mpn/x86_64/fastsse/com-palignr.asm
++++ b/mpn/x86_64/fastsse/com-palignr.asm
+@@ -308,3 +308,4 @@ L(end):	test	$1, R8(n)
+ 1:	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastsse/com.asm b/mpn/x86_64/fastsse/com.asm
+index 307fb75..5dfc8e4 100644
+--- a/mpn/x86_64/fastsse/com.asm
++++ b/mpn/x86_64/fastsse/com.asm
+@@ -165,3 +165,4 @@ L(sma):	add	$14, n
+ L(don):	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastsse/copyd-palignr.asm b/mpn/x86_64/fastsse/copyd-palignr.asm
+index fac6f8a..a69812c 100644
+--- a/mpn/x86_64/fastsse/copyd-palignr.asm
++++ b/mpn/x86_64/fastsse/copyd-palignr.asm
+@@ -252,3 +252,4 @@ L(end):	test	$1, R8(n)
+ 1:	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastsse/copyd.asm b/mpn/x86_64/fastsse/copyd.asm
+index 5b8b8bf..f03affa 100644
+--- a/mpn/x86_64/fastsse/copyd.asm
++++ b/mpn/x86_64/fastsse/copyd.asm
+@@ -156,3 +156,4 @@ L(sma):	test	$8, R8(n)
+ L(don):	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastsse/copyi-palignr.asm b/mpn/x86_64/fastsse/copyi-palignr.asm
+index 22f13f1..e50f604 100644
+--- a/mpn/x86_64/fastsse/copyi-palignr.asm
++++ b/mpn/x86_64/fastsse/copyi-palignr.asm
+@@ -296,3 +296,4 @@ L(end):	test	$1, R8(n)
+ 1:	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastsse/copyi.asm b/mpn/x86_64/fastsse/copyi.asm
+index b2f3b9d..a506942 100644
+--- a/mpn/x86_64/fastsse/copyi.asm
++++ b/mpn/x86_64/fastsse/copyi.asm
+@@ -175,3 +175,4 @@ dnl	jnc	1b
+ L(ret):	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastsse/lshift-movdqu2.asm b/mpn/x86_64/fastsse/lshift-movdqu2.asm
+index a05e850..df8ee6d 100644
+--- a/mpn/x86_64/fastsse/lshift-movdqu2.asm
++++ b/mpn/x86_64/fastsse/lshift-movdqu2.asm
+@@ -180,3 +180,4 @@ L(end8):movq	(ap), %xmm0
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastsse/lshift.asm b/mpn/x86_64/fastsse/lshift.asm
+index f76972a..7d0f0fc 100644
+--- a/mpn/x86_64/fastsse/lshift.asm
++++ b/mpn/x86_64/fastsse/lshift.asm
+@@ -167,3 +167,4 @@ L(end8):movq	(ap), %xmm0
+ 	movq	%xmm0, (rp)
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastsse/lshiftc-movdqu2.asm b/mpn/x86_64/fastsse/lshiftc-movdqu2.asm
+index 8250910..4878dad 100644
+--- a/mpn/x86_64/fastsse/lshiftc-movdqu2.asm
++++ b/mpn/x86_64/fastsse/lshiftc-movdqu2.asm
+@@ -191,3 +191,4 @@ L(end8):movq	(ap), %xmm0
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastsse/lshiftc.asm b/mpn/x86_64/fastsse/lshiftc.asm
+index d252069..f042ec0 100644
+--- a/mpn/x86_64/fastsse/lshiftc.asm
++++ b/mpn/x86_64/fastsse/lshiftc.asm
+@@ -177,3 +177,4 @@ L(end8):movq	(ap), %xmm0
+ 	movq	%xmm0, (rp)
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastsse/rshift-movdqu2.asm b/mpn/x86_64/fastsse/rshift-movdqu2.asm
+index 1e270b1..8149717 100644
+--- a/mpn/x86_64/fastsse/rshift-movdqu2.asm
++++ b/mpn/x86_64/fastsse/rshift-movdqu2.asm
+@@ -199,3 +199,4 @@ L(bc):	dec	R32(n)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fastsse/sec_tabselect.asm b/mpn/x86_64/fastsse/sec_tabselect.asm
+index e3df110..9975eca 100644
+--- a/mpn/x86_64/fastsse/sec_tabselect.asm
++++ b/mpn/x86_64/fastsse/sec_tabselect.asm
+@@ -190,3 +190,4 @@ L(tp1):	movdqa	%xmm8, %xmm0
+ L(b000):FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/fat/fat_entry.asm b/mpn/x86_64/fat/fat_entry.asm
+index 8f7599d..5f78553 100644
+--- a/mpn/x86_64/fat/fat_entry.asm
++++ b/mpn/x86_64/fat/fat_entry.asm
+@@ -205,3 +205,4 @@ PROLOGUE(__gmpn_cpuid)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/gcd_1.asm b/mpn/x86_64/gcd_1.asm
+index ac4aced..bf32cc0 100644
+--- a/mpn/x86_64/gcd_1.asm
++++ b/mpn/x86_64/gcd_1.asm
+@@ -163,3 +163,4 @@ L(shift_alot):
+ 	mov	%rax, %rcx
+ 	jmp	L(mid)
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/invert_limb.asm b/mpn/x86_64/invert_limb.asm
+index cc79b89..829861f 100644
+--- a/mpn/x86_64/invert_limb.asm
++++ b/mpn/x86_64/invert_limb.asm
+@@ -113,3 +113,4 @@ ifdef(`DARWIN',`
+ 	ret
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/invert_limb_table.asm b/mpn/x86_64/invert_limb_table.asm
+index 739d59e..16fe314 100644
+--- a/mpn/x86_64/invert_limb_table.asm
++++ b/mpn/x86_64/invert_limb_table.asm
+@@ -48,3 +48,4 @@ forloop(i,256,512-1,dnl
+ `	.value	eval(0x7fd00/i)
+ ')dnl
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/k10/hamdist.asm b/mpn/x86_64/k10/hamdist.asm
+index 44b67b5..83e4e86 100644
+--- a/mpn/x86_64/k10/hamdist.asm
++++ b/mpn/x86_64/k10/hamdist.asm
+@@ -101,3 +101,4 @@ L(top):	mov	(ap,n,8), %r8
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/k10/popcount.asm b/mpn/x86_64/k10/popcount.asm
+index 3814aea..17e7a73 100644
+--- a/mpn/x86_64/k10/popcount.asm
++++ b/mpn/x86_64/k10/popcount.asm
+@@ -136,3 +136,4 @@ C 1 = n mod 8
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/k8/aorrlsh_n.asm b/mpn/x86_64/k8/aorrlsh_n.asm
+index ff3a184..8eff29e 100644
+--- a/mpn/x86_64/k8/aorrlsh_n.asm
++++ b/mpn/x86_64/k8/aorrlsh_n.asm
+@@ -215,3 +215,4 @@ L(cj1):	mov	%r9, 8(rp,n,8)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/k8/div_qr_1n_pi1.asm b/mpn/x86_64/k8/div_qr_1n_pi1.asm
+index 861402b..fef3a09 100644
+--- a/mpn/x86_64/k8/div_qr_1n_pi1.asm
++++ b/mpn/x86_64/k8/div_qr_1n_pi1.asm
+@@ -247,3 +247,4 @@ L(q_incr_loop):
+ 	lea	8(U1), U1
+ 	jmp	L(q_incr_loop)
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/k8/mul_basecase.asm b/mpn/x86_64/k8/mul_basecase.asm
+index ca2efb9..61b6e0e 100644
+--- a/mpn/x86_64/k8/mul_basecase.asm
++++ b/mpn/x86_64/k8/mul_basecase.asm
+@@ -467,3 +467,4 @@ L(ret):	pop	%r15
+ 	ret
+ 
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/k8/mullo_basecase.asm b/mpn/x86_64/k8/mullo_basecase.asm
+index fa00f42..b1f5b20 100644
+--- a/mpn/x86_64/k8/mullo_basecase.asm
++++ b/mpn/x86_64/k8/mullo_basecase.asm
+@@ -99,12 +99,14 @@ dnl	JMPENT(	L(2m4), L(tab))			C 10
+ dnl	JMPENT(	L(3m4), L(tab))			C 11
+ 	TEXT
+ 
+-L(1):	imul	%r8, %rax
++L(1):	CFPROT_ENDBR
++	imul	%r8, %rax
+ 	mov	%rax, (rp)
+ 	FUNC_EXIT()
+ 	ret
+ 
+-L(2):	mov	8(vp_param), %r11
++L(2):	CFPROT_ENDBR
++	mov	8(vp_param), %r11
+ 	imul	%rax, %r11		C u0 x v1
+ 	mul	%r8			C u0 x v0
+ 	mov	%rax, (rp)
+@@ -115,7 +117,8 @@ L(2):	mov	8(vp_param), %r11
+ 	FUNC_EXIT()
+ 	ret
+ 
+-L(3):	mov	8(vp_param), %r9	C v1
++L(3):	CFPROT_ENDBR
++	mov	8(vp_param), %r9	C v1
+ 	mov	16(vp_param), %r11
+ 	mul	%r8			C u0 x v0 -> <r1,r0>
+ 	mov	%rax, (rp)		C r0
+@@ -144,7 +147,8 @@ L(0m4):
+ L(1m4):
+ L(2m4):
+ L(3m4):
+-L(gen):	push	%rbx
++L(gen):	CFPROT_ENDBR
++	push	%rbx
+ 	push	%rbp
+ 	push	%r13
+ 	push	%r14
+@@ -434,3 +438,4 @@ L(ret):	pop	%r15
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/k8/mulmid_basecase.asm b/mpn/x86_64/k8/mulmid_basecase.asm
+index 86f1414..0ace1ba 100644
+--- a/mpn/x86_64/k8/mulmid_basecase.asm
++++ b/mpn/x86_64/k8/mulmid_basecase.asm
+@@ -557,3 +557,4 @@ L(ret):	pop	%r15
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/k8/redc_1.asm b/mpn/x86_64/k8/redc_1.asm
+index 9327b21..b00103f 100644
+--- a/mpn/x86_64/k8/redc_1.asm
++++ b/mpn/x86_64/k8/redc_1.asm
+@@ -124,8 +124,9 @@ L(tab):	JMPENT(	L(0), L(tab))
+ 	JMPENT(	L(3m4), L(tab))
+ 	TEXT
+ 
++L(1):	CFPROT_ENDBR
+ 	ALIGN(16)
+-L(1):	mov	(mp_param), %rax
++	mov	(mp_param), %rax
+ 	mul	q0
+ 	add	8(up), %rax
+ 	adc	16(up), %rdx
+@@ -135,8 +136,9 @@ L(1):	mov	(mp_param), %rax
+ 	jmp	L(ret)
+ 
+ 
++L(2):	CFPROT_ENDBR
+ 	ALIGN(16)
+-L(2):	mov	(mp_param), %rax
++  mov	(mp_param), %rax
+ 	mul	q0
+ 	xor	R32(%r14), R32(%r14)
+ 	mov	%rax, %r10
+@@ -171,7 +173,8 @@ L(2):	mov	(mp_param), %rax
+ 	jmp	L(ret)
+ 
+ 
+-L(3):	mov	(mp_param), %rax
++L(3):	CFPROT_ENDBR
++	mov	(mp_param), %rax
+ 	mul	q0
+ 	mov	%rax, %rbx
+ 	mov	%rdx, %r10
+@@ -247,8 +250,8 @@ L(3):	mov	(mp_param), %rax
+ 	jmp	L(ret)
+ 
+ 
++L(2m4):	CFPROT_ENDBR
+ 	ALIGN(16)
+-L(2m4):
+ L(lo2):	mov	(mp,nneg,8), %rax
+ 	mul	q0
+ 	xor	R32(%r14), R32(%r14)
+@@ -323,8 +326,8 @@ L(le2):	add	%r10, (up)
+ 	jmp	L(addx)
+ 
+ 
++L(1m4):	CFPROT_ENDBR
+ 	ALIGN(16)
+-L(1m4):
+ L(lo1):	mov	(mp,nneg,8), %rax
+ 	xor	%r9, %r9
+ 	xor	R32(%rbx), R32(%rbx)
+@@ -396,9 +399,9 @@ L(le1):	add	%r10, (up)
+ 	jmp	L(addx)
+ 
+ 
+-	ALIGN(16)
+ L(0):
+-L(0m4):
++L(0m4):	CFPROT_ENDBR
++	ALIGN(16)
+ L(lo0):	mov	(mp,nneg,8), %rax
+ 	mov	nneg, i
+ 	mul	q0
+@@ -462,8 +465,8 @@ L(le0):	add	%r10, (up)
+ 	jmp	L(addy)
+ 
+ 
++L(3m4):	CFPROT_ENDBR
+ 	ALIGN(16)
+-L(3m4):
+ L(lo3):	mov	(mp,nneg,8), %rax
+ 	mul	q0
+ 	mov	%rax, %rbx
+@@ -589,3 +592,4 @@ L(ret):	pop	%r15
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/k8/sqr_basecase.asm b/mpn/x86_64/k8/sqr_basecase.asm
+index 60cf945..e6a545d 100644
+--- a/mpn/x86_64/k8/sqr_basecase.asm
++++ b/mpn/x86_64/k8/sqr_basecase.asm
+@@ -131,7 +131,8 @@ L(tab):	JMPENT(	L(4), L(tab))
+ 	JMPENT(	L(3m4), L(tab))
+ 	TEXT
+ 
+-L(1):	mov	(up), %rax
++L(1):	CFPROT_ENDBR
++	mov	(up), %rax
+ 	mul	%rax
+ 	add	$40, %rsp
+ 	mov	%rax, (rp)
+@@ -139,7 +140,8 @@ L(1):	mov	(up), %rax
+ 	FUNC_EXIT()
+ 	ret
+ 
+-L(2):	mov	(up), %rax
++L(2):	CFPROT_ENDBR
++	mov	(up), %rax
+ 	mov	%rax, %r8
+ 	mul	%rax
+ 	mov	8(up), %r11
+@@ -165,7 +167,8 @@ L(2):	mov	(up), %rax
+ 	FUNC_EXIT()
+ 	ret
+ 
+-L(3):	mov	(up), %rax
++L(3):	CFPROT_ENDBR
++	mov	(up), %rax
+ 	mov	%rax, %r10
+ 	mul	%rax
+ 	mov	8(up), %r11
+@@ -210,7 +213,8 @@ L(3):	mov	(up), %rax
+ 	FUNC_EXIT()
+ 	ret
+ 
+-L(4):	mov	(up), %rax
++L(4):	CFPROT_ENDBR
++	mov	(up), %rax
+ 	mov	%rax, %r11
+ 	mul	%rax
+ 	mov	8(up), %rbx
+@@ -281,7 +285,7 @@ L(4):	mov	(up), %rax
+ 	ret
+ 
+ 
+-L(0m4):
++L(0m4):	CFPROT_ENDBR
+ 	lea	-16(rp,n,8), tp		C point tp in middle of result operand
+ 	mov	(up), v0
+ 	mov	8(up), %rax
+@@ -339,7 +343,7 @@ L(L3):	xor	R32(w1), R32(w1)
+ 	jmp	L(dowhile)
+ 
+ 
+-L(1m4):
++L(1m4):	CFPROT_ENDBR
+ 	lea	8(rp,n,8), tp		C point tp in middle of result operand
+ 	mov	(up), v0		C u0
+ 	mov	8(up), %rax		C u1
+@@ -417,7 +421,7 @@ L(m2x):	mov	(up,j,8), %rax
+ 	jmp	L(dowhile_end)
+ 
+ 
+-L(2m4):
++L(2m4):	CFPROT_ENDBR
+ 	lea	-16(rp,n,8), tp		C point tp in middle of result operand
+ 	mov	(up), v0
+ 	mov	8(up), %rax
+@@ -474,7 +478,7 @@ L(L1):	xor	R32(w0), R32(w0)
+ 	jmp	L(dowhile_mid)
+ 
+ 
+-L(3m4):
++L(3m4):	CFPROT_ENDBR
+ 	lea	8(rp,n,8), tp		C point tp in middle of result operand
+ 	mov	(up), v0		C u0
+ 	mov	8(up), %rax		C u1
+@@ -805,3 +809,4 @@ L(d1):	mov	%r11, 24(rp,j,8)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/logops_n.asm b/mpn/x86_64/logops_n.asm
+index b277f58..b2c640c 100644
+--- a/mpn/x86_64/logops_n.asm
++++ b/mpn/x86_64/logops_n.asm
+@@ -134,6 +134,7 @@ L(e10):	movq	24(vp,n,8), %r9
+ L(ret):	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+ ')
+ 
+ ifdef(`VARIANT_2',`
+@@ -187,6 +188,7 @@ L(e10):	movq	24(vp,n,8), %r9
+ L(ret):	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+ ')
+ 
+ ifdef(`VARIANT_3',`
+@@ -241,4 +243,5 @@ L(e10):	movq	24(vp,n,8), %r9
+ L(ret):	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+ ')
+diff --git a/mpn/x86_64/lshift.asm b/mpn/x86_64/lshift.asm
+index f368944..990b3b8 100644
+--- a/mpn/x86_64/lshift.asm
++++ b/mpn/x86_64/lshift.asm
+@@ -245,3 +245,4 @@ L(ast):	mov	(up), %r10
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/lshiftc.asm b/mpn/x86_64/lshiftc.asm
+index c4ba04a..4fd4430 100644
+--- a/mpn/x86_64/lshiftc.asm
++++ b/mpn/x86_64/lshiftc.asm
+@@ -180,3 +180,4 @@ L(ast):	mov	(up), %r10
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/lshsub_n.asm b/mpn/x86_64/lshsub_n.asm
+index 4d428c0..d263565 100644
+--- a/mpn/x86_64/lshsub_n.asm
++++ b/mpn/x86_64/lshsub_n.asm
+@@ -170,3 +170,4 @@ L(end):
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/missing.asm b/mpn/x86_64/missing.asm
+index 9b65c89..7914b82 100644
+--- a/mpn/x86_64/missing.asm
++++ b/mpn/x86_64/missing.asm
+@@ -128,3 +128,4 @@ PROLOGUE(__gmp_adcx)
+ 	ret
+ EPILOGUE()
+ PROTECT(__gmp_adcx)
++CF_PROT
+diff --git a/mpn/x86_64/mod_1_1.asm b/mpn/x86_64/mod_1_1.asm
+index 09b5dd1..287f61d 100644
+--- a/mpn/x86_64/mod_1_1.asm
++++ b/mpn/x86_64/mod_1_1.asm
+@@ -234,3 +234,4 @@ L(z):
+ 	ret
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/mod_1_2.asm b/mpn/x86_64/mod_1_2.asm
+index 09d856e..1cd6dd1 100644
+--- a/mpn/x86_64/mod_1_2.asm
++++ b/mpn/x86_64/mod_1_2.asm
+@@ -237,3 +237,4 @@ ifdef(`SHLD_SLOW',`
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/mod_1_4.asm b/mpn/x86_64/mod_1_4.asm
+index ae34617..fb685ef 100644
+--- a/mpn/x86_64/mod_1_4.asm
++++ b/mpn/x86_64/mod_1_4.asm
+@@ -268,3 +268,4 @@ ifdef(`SHLD_SLOW',`
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/mod_34lsub1.asm b/mpn/x86_64/mod_34lsub1.asm
+index 62bdcfa..2cf5751 100644
+--- a/mpn/x86_64/mod_34lsub1.asm
++++ b/mpn/x86_64/mod_34lsub1.asm
+@@ -135,46 +135,55 @@ L(tab):	JMPENT(	L(0), L(tab))
+ 	JMPENT(	L(8), L(tab))
+ 	TEXT
+ 
+-L(6):	add	(ap), %rax
++L(6):	CFPROT_ENDBR
++	add	(ap), %rax
+ 	adc	8(ap), %rcx
+ 	adc	16(ap), %rdx
+ 	adc	$0, %r9
+ 	add	$24, ap
+-L(3):	add	(ap), %rax
++L(3):	CFPROT_ENDBR
++	add	(ap), %rax
+ 	adc	8(ap), %rcx
+ 	adc	16(ap), %rdx
+ 	jmp	L(cj1)
+ 
+-L(7):	add	(ap), %rax
++L(7):	CFPROT_ENDBR
++	add	(ap), %rax
+ 	adc	8(ap), %rcx
+ 	adc	16(ap), %rdx
+ 	adc	$0, %r9
+ 	add	$24, ap
+-L(4):	add	(ap), %rax
++L(4):	CFPROT_ENDBR
++	add	(ap), %rax
+ 	adc	8(ap), %rcx
+ 	adc	16(ap), %rdx
+ 	adc	$0, %r9
+ 	add	$24, ap
+-L(1):	add	(ap), %rax
++L(1):	CFPROT_ENDBR
++	add	(ap), %rax
+ 	adc	$0, %rcx
+ 	jmp	L(cj2)
+ 
+-L(8):	add	(ap), %rax
++L(8):	CFPROT_ENDBR
++	add	(ap), %rax
+ 	adc	8(ap), %rcx
+ 	adc	16(ap), %rdx
+ 	adc	$0, %r9
+ 	add	$24, ap
+-L(5):	add	(ap), %rax
++L(5):	CFPROT_ENDBR
++	add	(ap), %rax
+ 	adc	8(ap), %rcx
+ 	adc	16(ap), %rdx
+ 	adc	$0, %r9
+ 	add	$24, ap
+-L(2):	add	(ap), %rax
++L(2):	CFPROT_ENDBR
++	add	(ap), %rax
+ 	adc	8(ap), %rcx
+ 
+ L(cj2):	adc	$0, %rdx
+ L(cj1):	adc	$0, %r9
+-L(0):	add	%r9, %rax
++L(0):	CFPROT_ENDBR
++	add	%r9, %rax
+ 	adc	$0, %rcx
+ 	adc	$0, %rdx
+ 	adc	$0, %rax
+@@ -203,3 +212,4 @@ L(0):	add	%r9, %rax
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/mode1o.asm b/mpn/x86_64/mode1o.asm
+index 2cd2b08..c10a5a6 100644
+--- a/mpn/x86_64/mode1o.asm
++++ b/mpn/x86_64/mode1o.asm
+@@ -169,3 +169,4 @@ L(one):
+ 
+ EPILOGUE(mpn_modexact_1c_odd)
+ EPILOGUE(mpn_modexact_1_odd)
++CF_PROT
+diff --git a/mpn/x86_64/mul_1.asm b/mpn/x86_64/mul_1.asm
+index b032afc..6ea9a4a 100644
+--- a/mpn/x86_64/mul_1.asm
++++ b/mpn/x86_64/mul_1.asm
+@@ -181,3 +181,4 @@ IFDOS(``pop	%rdi		'')
+ IFDOS(``pop	%rsi		'')
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/mul_2.asm b/mpn/x86_64/mul_2.asm
+index f408c52..6b73737 100644
+--- a/mpn/x86_64/mul_2.asm
++++ b/mpn/x86_64/mul_2.asm
+@@ -190,3 +190,4 @@ L(m22):	mul	v1
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/mulx/aorsmul_1.asm b/mpn/x86_64/mulx/aorsmul_1.asm
+index 285c073..942cf6a 100644
+--- a/mpn/x86_64/mulx/aorsmul_1.asm
++++ b/mpn/x86_64/mulx/aorsmul_1.asm
+@@ -159,3 +159,4 @@ L(wd1):	ADCSBB	%rbx, 24(rp)
+ 	ret
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/mulx/mul_1.asm b/mpn/x86_64/mulx/mul_1.asm
+index 34a044d..4a0e6ef 100644
+--- a/mpn/x86_64/mulx/mul_1.asm
++++ b/mpn/x86_64/mulx/mul_1.asm
+@@ -152,3 +152,4 @@ L(wd1):	adc	%r12, %rbx
+ 	ret
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/nano/dive_1.asm b/mpn/x86_64/nano/dive_1.asm
+index e9a0763..d57c444 100644
+--- a/mpn/x86_64/nano/dive_1.asm
++++ b/mpn/x86_64/nano/dive_1.asm
+@@ -164,3 +164,4 @@ L(one):	shr	R8(%rcx), %rax
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/pentium4/aors_n.asm b/mpn/x86_64/pentium4/aors_n.asm
+index 8e6ee1b..d3daf6f 100644
+--- a/mpn/x86_64/pentium4/aors_n.asm
++++ b/mpn/x86_64/pentium4/aors_n.asm
+@@ -194,3 +194,4 @@ L(ret):	mov	R32(%rbx), R32(%rax)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/pentium4/aorslshC_n.asm b/mpn/x86_64/pentium4/aorslshC_n.asm
+index d03c6a3..a4cd689 100644
+--- a/mpn/x86_64/pentium4/aorslshC_n.asm
++++ b/mpn/x86_64/pentium4/aorslshC_n.asm
+@@ -201,3 +201,4 @@ L(c3):	mov	$1, R8(%rax)
+ 	jmp	L(rc3)
+ EPILOGUE()
+ ASM_END()
++CF_PROT
+diff --git a/mpn/x86_64/pentium4/lshift.asm b/mpn/x86_64/pentium4/lshift.asm
+index d3b5213..baa4820 100644
+--- a/mpn/x86_64/pentium4/lshift.asm
++++ b/mpn/x86_64/pentium4/lshift.asm
+@@ -164,3 +164,4 @@ L(ast):	movq	(up), %mm2
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/pentium4/lshiftc.asm b/mpn/x86_64/pentium4/lshiftc.asm
+index fc64676..e7ed07f 100644
+--- a/mpn/x86_64/pentium4/lshiftc.asm
++++ b/mpn/x86_64/pentium4/lshiftc.asm
+@@ -177,3 +177,4 @@ L(ast):	movq	(up), %mm2
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/pentium4/mod_34lsub1.asm b/mpn/x86_64/pentium4/mod_34lsub1.asm
+index f34b3f0..adb4ae6 100644
+--- a/mpn/x86_64/pentium4/mod_34lsub1.asm
++++ b/mpn/x86_64/pentium4/mod_34lsub1.asm
+@@ -165,3 +165,4 @@ L(combine):
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/pentium4/rsh1aors_n.asm b/mpn/x86_64/pentium4/rsh1aors_n.asm
+index 5528ce4..64a6322 100644
+--- a/mpn/x86_64/pentium4/rsh1aors_n.asm
++++ b/mpn/x86_64/pentium4/rsh1aors_n.asm
+@@ -332,3 +332,4 @@ L(cj1):	or	%r14, %rbx
+ L(c3):	mov	$1, R8(%rax)
+ 	jmp	L(rc3)
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/pentium4/rshift.asm b/mpn/x86_64/pentium4/rshift.asm
+index b7c1ee2..758ca64 100644
+--- a/mpn/x86_64/pentium4/rshift.asm
++++ b/mpn/x86_64/pentium4/rshift.asm
+@@ -167,3 +167,4 @@ L(ast):	movq	(up), %mm2
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/popham.asm b/mpn/x86_64/popham.asm
+index 9005f81..a52ea0f 100644
+--- a/mpn/x86_64/popham.asm
++++ b/mpn/x86_64/popham.asm
+@@ -175,3 +175,4 @@ L(end):
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/rsh1aors_n.asm b/mpn/x86_64/rsh1aors_n.asm
+index a3e9cc5..d28cc32 100644
+--- a/mpn/x86_64/rsh1aors_n.asm
++++ b/mpn/x86_64/rsh1aors_n.asm
+@@ -187,3 +187,4 @@ L(end):	mov	%rbx, (rp)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/rshift.asm b/mpn/x86_64/rshift.asm
+index 3f344f1..2c45172 100644
+--- a/mpn/x86_64/rshift.asm
++++ b/mpn/x86_64/rshift.asm
+@@ -174,3 +174,4 @@ L(ast):	mov	(up), %r10
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/sec_tabselect.asm b/mpn/x86_64/sec_tabselect.asm
+index e8aed26..2198b4b 100644
+--- a/mpn/x86_64/sec_tabselect.asm
++++ b/mpn/x86_64/sec_tabselect.asm
+@@ -174,3 +174,4 @@ L(b00):	pop	%r15
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/sqr_diag_addlsh1.asm b/mpn/x86_64/sqr_diag_addlsh1.asm
+index 4ad034c..6db16f6 100644
+--- a/mpn/x86_64/sqr_diag_addlsh1.asm
++++ b/mpn/x86_64/sqr_diag_addlsh1.asm
+@@ -114,3 +114,4 @@ L(end):	add	%r10, %r8
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/sublsh1_n.asm b/mpn/x86_64/sublsh1_n.asm
+index c6d829f..2f0fe01 100644
+--- a/mpn/x86_64/sublsh1_n.asm
++++ b/mpn/x86_64/sublsh1_n.asm
+@@ -158,3 +158,4 @@ L(end):	add	R32(%rbp), R32(%rax)
+ 	FUNC_EXIT()
+ 	ret
+ EPILOGUE()
++CF_PROT
+diff --git a/mpn/x86_64/x86_64-defs.m4 b/mpn/x86_64/x86_64-defs.m4
+index a626419..80f549e 100644
+--- a/mpn/x86_64/x86_64-defs.m4
++++ b/mpn/x86_64/x86_64-defs.m4
+@@ -93,8 +93,38 @@ m4_assert_numargs(1)
+ `	GLOBL	$1
+ 	TYPE($1,`function')
+ $1:
++	CFPROT_ENDBR
+ ')
+ 
++dnl Generates the endbr64 instructions
++dnl Using macro, so it can be easily extended to use some arch specific conditional defines
++define(`CFPROT_ENDBR',
++``
++	endbr64''
++)
++
++dnl Append the .gnu-property to the end of files
++dnl This is needed for a -fcf-protection 
++dnl Again, using macro for easy arch specific defines
++dnl
++define(`CF_PROT',``
++	.section	.note.gnu.property,"a"
++	.align 8
++	.long	 1f - 0f
++	.long	 4f - 1f
++	.long	 5
++0:
++	.string	 "GNU"
++1:
++	.align 8
++	.long	 0xc0000002
++	.long	 3f - 2f
++2:
++	.long	 0x3
++3:
++ 	.align 8
++4:
++'')
+ 
+ dnl  Usage: ASSERT([cond][,instructions])
+ dnl
diff --git a/SOURCES/gmp-mparam.h b/SOURCES/gmp-mparam.h
new file mode 100644
index 0000000..1d4e087
--- /dev/null
+++ b/SOURCES/gmp-mparam.h
@@ -0,0 +1,88 @@
+/* Generic x86 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
+2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/*
+ * This gmp-mparam.h is a wrapper include file for the original gmp-mparam.h, 
+ * which has been renamed to gmp-mparam-<arch>.h. There are conflicts for the
+ * original gmp-mparam.h on multilib systems, which result from arch-specific
+ * configuration options. Please do not use the arch-specific file directly.
+ *
+ * Copyright (C) 2006 Red Hat, Inc.
+ * Thomas Woerner <twoerner@redhat.com>
+ */
+
+#ifdef gmp_mparam_wrapper_h
+#error "gmp_mparam_wrapper_h should not be defined!"
+#endif
+#define gmp_mparam_wrapper_h
+
+#if defined(__arm__)
+#include "gmp-mparam-arm.h"
+#elif defined(__i386__)
+#include "gmp-mparam-i386.h"
+#elif defined(__ia64__)
+#include "gmp-mparam-ia64.h"
+#elif defined(__powerpc64__)
+# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#include "gmp-mparam-ppc64.h"
+# else
+#include "gmp-mparam-ppc64le.h"
+# endif
+#elif defined(__powerpc__)
+# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#include "gmp-mparam-ppc.h"
+# else
+#include "gmp-mparam-ppcle.h"
+# endif
+#elif defined(__s390x__)
+#include "gmp-mparam-s390x.h"
+#elif defined(__s390__)
+#include "gmp-mparam-s390.h"
+#elif defined(__x86_64__)
+#include "gmp-mparam-x86_64.h"
+#elif defined(__alpha__)
+#include "gmp-mparam-alpha.h"
+#elif defined(__sh__)
+#include "gmp-mparam-sh.h"
+#elif defined(__sparc__) && defined (__arch64__)
+#include "gmp-mparam-sparc64.h"
+#elif defined(__sparc__)                      
+#include "gmp-mparam-sparc.h"
+#elif defined(__aarch64__)
+#include "gmp-mparam-aarch64.h"
+#elif defined(__mips64) && defined(__MIPSEL__)
+#include "gmp-mparam-mips64el.h"
+#elif defined(__mips64)
+#include "gmp-mparam-mips64.h"
+#elif defined(__mips) && defined(__MIPSEL__)
+#include "gmp-mparam-mipsel.h"
+#elif defined(__mips)
+#include "gmp-mparam-mips.h"
+#elif defined(__riscv)
+#if __riscv_xlen == 64
+#include "gmp-mparam-riscv64.h"
+#else
+#error "No support for riscv32"
+#endif
+#else
+#error "The gmp-devel package is not usable with the architecture."
+#endif
+
+#undef gmp_mparam_wrapper_h
diff --git a/SOURCES/gmp.h b/SOURCES/gmp.h
new file mode 100644
index 0000000..0a91606
--- /dev/null
+++ b/SOURCES/gmp.h
@@ -0,0 +1,88 @@
+/* Definitions for GNU multiple precision functions.   -*- mode: c -*-
+
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
+2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/*
+ * This gmp.h is a wrapper include file for the original gmp.h, which has been
+ * renamed to gmp-<arch>.h. There are conflicts for the original gmp.h on
+ * multilib systems, which result from arch-specific configuration options.
+ * Please do not use the arch-specific file directly.
+ *
+ * Copyright (C) 2006 Red Hat, Inc.
+ * Thomas Woerner <twoerner@redhat.com>
+ */
+
+#ifdef gmp_wrapper_h
+#error "gmp_wrapper_h should not be defined!"
+#endif
+#define gmp_wrapper_h
+
+#if defined(__arm__)
+#include "gmp-arm.h"
+#elif defined(__i386__)
+#include "gmp-i386.h"
+#elif defined(__ia64__)
+#include "gmp-ia64.h"
+#elif defined(__powerpc64__)
+# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#include "gmp-ppc64.h"
+# else
+#include "gmp-ppc64le.h"
+# endif
+#elif defined(__powerpc__)
+# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#include "gmp-ppc.h"
+# else
+#include "gmp-ppcle.h"
+# endif
+#elif defined(__s390x__)
+#include "gmp-s390x.h"
+#elif defined(__s390__)
+#include "gmp-s390.h"
+#elif defined(__x86_64__)
+#include "gmp-x86_64.h"
+#elif defined(__alpha__)
+#include "gmp-alpha.h"
+#elif defined(__sh__)
+#include "gmp-sh.h"
+#elif defined(__sparc__) && defined (__arch64__)
+#include "gmp-sparc64.h"
+#elif defined(__sparc__)
+#include "gmp-sparc.h"
+#elif defined(__aarch64__)
+#include "gmp-aarch64.h"
+#elif defined(__mips64) && defined(__MIPSEL__)
+#include "gmp-mips64el.h"
+#elif defined(__mips64)
+#include "gmp-mips64.h"
+#elif defined(__mips) && defined(__MIPSEL__)
+#include "gmp-mipsel.h"
+#elif defined(__mips)
+#include "gmp-mips.h"
+#elif defined(__riscv)
+#if __riscv_xlen == 64
+#include "gmp-riscv64.h"
+#else
+#error "No support for riscv32"
+#endif
+#else
+#error "The gmp-devel package is not usable with the architecture."
+#endif
+
+#undef gmp_wrapper_h
diff --git a/SOURCES/ibm_z13_simd_part1.patch b/SOURCES/ibm_z13_simd_part1.patch
new file mode 100644
index 0000000..86bb9c3
--- /dev/null
+++ b/SOURCES/ibm_z13_simd_part1.patch
@@ -0,0 +1,596 @@
+Co-authored-by: Stefan Liebler <stli at linux.ibm.com>
+---
+ mpn/s390_64/z13/addmul_1.c   | 358 +++++++++++++++++++++++++++++++++++
+ mpn/s390_64/z13/common-vec.h | 175 +++++++++++++++++
+ mpn/s390_64/z13/mul_1.c      |  31 +++
+ 3 files changed, 564 insertions(+)
+ create mode 100644 mpn/s390_64/z13/addmul_1.c
+ create mode 100644 mpn/s390_64/z13/common-vec.h
+ create mode 100644 mpn/s390_64/z13/mul_1.c
+
+diff --git a/mpn/s390_64/z13/addmul_1.c b/mpn/s390_64/z13/addmul_1.c
+new file mode 100644
+index 000000000..022e5edcc
+--- /dev/null
++++ b/mpn/s390_64/z13/addmul_1.c
+@@ -0,0 +1,359 @@
++/* Addmul_1 / mul_1 for IBM z13 and later
++   Contributed by Marius Hillenbrand
++
++Copyright 2021 Free Software Foundation, Inc.
++
++This file is part of the GNU MP Library.
++
++The GNU MP Library is free software; you can redistribute it and/or modify
++it under the terms of either:
++
++  * the GNU Lesser General Public License as published by the Free
++    Software Foundation; either version 3 of the License, or (at your
++    option) any later version.
++
++or
++
++  * the GNU General Public License as published by the Free Software
++    Foundation; either version 2 of the License, or (at your option) any
++    later version.
++
++or both in parallel, as here.
++
++The GNU MP Library is distributed in the hope that it will be useful, but
++WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received copies of the GNU General Public License and the
++GNU Lesser General Public License along with the GNU MP Library.  If not,
++see https://www.gnu.org/licenses/.  */
++
++#include "gmp.h"
++#include "gmp-impl.h"
++#include "s390_64/z13/common-vec.h"
++
++#undef FUNCNAME
++
++#ifdef DO_INLINE
++#  ifdef OPERATION_addmul_1
++#    define ADD
++#    define FUNCNAME inline_addmul_1
++#  elif defined(OPERATION_mul_1)
++#    define FUNCNAME inline_mul_1
++#  endif
++
++#else
++#  ifdef OPERATION_addmul_1
++#    define ADD
++#    define FUNCNAME mpn_addmul_1
++#  elif defined(OPERATION_mul_1)
++#    define FUNCNAME mpn_mul_1
++#  endif
++#endif
++
++#ifdef DO_INLINE
++static inline mp_limb_t
++FUNCNAME (mp_ptr rp, mp_srcptr s1p, mp_size_t n, mp_limb_t s2limb)
++    __attribute__ ((always_inline));
++
++static inline
++#endif
++mp_limb_t
++FUNCNAME (mp_ptr rp, mp_srcptr s1p, mp_size_t n, mp_limb_t s2limb)
++{
++  ASSERT (n >= 1);
++  ASSERT (MPN_SAME_OR_INCR_P(rp, s1p, n));
++
++  /* Combine 64x64 multiplication into GPR pairs (MLGR) with 128-bit adds in
++     VRs (using each VR as a single 128-bit accumulator).
++     The inner loop is unrolled to four limbs, with two blocks of four
++     multiplications each. Since the MLGR operation operates on even/odd GPR
++     pairs, pin the products appropriately. */
++
++  /* products as GPR pairs */
++  register mp_limb_t p0_high asm("r0");
++  register mp_limb_t p0_low asm("r1");
++
++  register mp_limb_t p1_high asm("r8");
++  register mp_limb_t p1_low asm("r9");
++
++  register mp_limb_t p2_high asm("r6");
++  register mp_limb_t p2_low asm("r7");
++
++  register mp_limb_t p3_high asm("r10");
++  register mp_limb_t p3_low asm("r11");
++
++  /* carry flag for 128-bit add in VR for first carry chain */
++  vec_t carry_vec0 = { .dw = vec_splat_u64 (0) };
++  mp_limb_t carry_limb = 0;
++
++#ifdef ADD
++  /* 2nd carry flag for 2nd carry chain with addmul */
++  vec_t carry_vec1 = { .dw = vec_splat_u64 (0) };
++  vec_t sum0;
++  vec_t rp0_addend, rp1_addend;
++  rp0_addend.dw = vec_splat_u64 (0);
++  rp1_addend.dw = vec_splat_u64 (0);
++#endif
++  vec_t sum1;
++
++  vec_t carry_prod = { .dw = vec_splat_u64 (0) };
++
++  /* The scalar multiplications compete with pointer and index increments for
++   * issue ports. Thus, increment the loop index in the middle of the loop so
++   * that the operations for the next iteration's multiplications can be
++   * loaded in time (looks horrible, yet helps performance) and make sure we
++   * use addressing with base reg + index reg + immediate displacement
++   * (so that only the single index needs incrementing, instead of multiple
++   * pointers). */
++#undef LOOP_ADVANCE
++#undef IDX_OFFSET
++
++#define LOOP_ADVANCE 4 * sizeof (mp_limb_t)
++#define IDX_OFFSET (LOOP_ADVANCE)
++  register ssize_t idx = 0 - IDX_OFFSET;
++
++  /*
++   * branch-on-count implicitly hint to the branch prediction as taken, while
++   * compare-and-branch hints as not taken. currently, using branch-on-count
++   * has a performance advantage, but it is not clear that it is generally the
++   * better choice (e.g., branch-on-count requires decrementing the separate
++   * counter). so, allow switching the loop condition to enable either
++   * category of branch instructions:
++   * - idx is less than an upper bound, for compare-and-branch
++   * - iteration counter greater than zero, for branch-on-count
++   */
++#define BRCTG
++#ifdef BRCTG
++  ssize_t iterations = (size_t)n / 4;
++#else
++  ssize_t const idx_bound = n * sizeof (mp_limb_t) - IDX_OFFSET;
++#endif
++
++  /* products will be transferred into VRs before adding up.
++   * see main loop below for comments on accumulation scheme. */
++  vec_t product0, product1, product2;
++
++  product0.dw = vec_splat_u64 (0);
++
++  switch ((size_t)n % 4)
++    {
++    case 0:
++      break;
++
++    case 1:
++      idx = 1 * sizeof (mp_limb_t) - IDX_OFFSET;
++
++      p3_low = s1p[0];
++      s390_umul_ppmm (p3_high, p3_low, s2limb);
++
++#ifdef ADD
++      rp0_addend.dw[1] = rp[0];
++      product0.dw[1] = p3_low;
++
++      sum0.sw = vec_add_u128 (product0.sw, rp0_addend.sw);
++      carry_vec1.dw = vec_permi (sum0.dw, sum0.dw, 0);
++
++      rp[0] = sum0.dw[1];
++#else
++      rp[0] = p3_low;
++#endif
++
++      carry_limb = p3_high;
++      break;
++
++    case 2:
++      p0_low = s1p[0];
++      p3_low = s1p[1];
++      idx = 2 * sizeof (mp_limb_t) - IDX_OFFSET;
++
++      s390_double_umul_ppmm (p0_high, p0_low, p3_high, p3_low, s2limb);
++
++      carry_prod.dw[0] = p3_low;
++
++      product0.dw = vec_load_2di_as_pair (p0_high, p0_low);
++
++      carry_limb = p3_high;
++
++#ifdef ADD
++      rp0_addend = vec_load_elements_reversed (rp, 0);
++      sum0.sw = vec_add_u128 (carry_prod.sw, rp0_addend.sw);
++      carry_vec0.sw = vec_addc_u128 (carry_prod.sw, rp0_addend.sw);
++
++      sum1.sw = vec_add_u128 (sum0.sw, product0.sw);
++      carry_vec1.sw = vec_addc_u128 (sum0.sw, product0.sw);
++#else
++      sum1.sw = vec_add_u128 (carry_prod.sw, product0.sw);
++      carry_vec0.sw = vec_addc_u128 (carry_prod.sw, product0.sw);
++#endif
++
++      vec_store_elements_reversed (rp, 0, sum1);
++
++      break;
++
++    case 3:
++      idx = 3 * sizeof (mp_limb_t) - IDX_OFFSET;
++
++      p0_low = s1p[0];
++      s390_umul_ppmm (p0_high, p0_low, s2limb);
++
++#ifdef ADD
++      rp0_addend.dw[1] = rp[0];
++      product0.dw[1] = p0_low;
++
++      sum0.sw = vec_add_u128 (product0.sw, rp0_addend.sw);
++      carry_vec1.dw = vec_permi (sum0.dw, sum0.dw, 0);
++
++      rp[0] = sum0.dw[1];
++#else
++      rp[0] = p0_low;
++#endif
++      carry_limb = p0_high;
++
++      p1_low = s1p[1];
++      p3_low = s1p[2];
++
++      s390_double_umul_ppmm (p1_high, p1_low, p3_high, p3_low, s2limb);
++
++      carry_prod.dw = vec_load_2di_as_pair (p3_low, carry_limb);
++      product1.dw = vec_load_2di_as_pair (p1_high, p1_low);
++      carry_limb = p3_high;
++
++#ifdef ADD
++      rp0_addend = vec_load_elements_reversed (rp, 8);
++      sum0.sw = vec_add_u128 (carry_prod.sw, rp0_addend.sw);
++      carry_vec0.sw = vec_addc_u128 (carry_prod.sw, rp0_addend.sw);
++
++      sum1.sw = vec_adde_u128 (sum0.sw, product1.sw, carry_vec1.sw);
++      carry_vec1.sw = vec_addec_u128 (sum0.sw, product1.sw, carry_vec1.sw);
++#else
++      sum1.sw = vec_adde_u128 (carry_prod.sw, product1.sw, carry_vec0.sw);
++      carry_vec0.sw
++          = vec_addec_u128 (carry_prod.sw, product1.sw, carry_vec0.sw);
++#endif
++      vec_store_elements_reversed (rp, 8, sum1);
++      break;
++    }
++
++#ifdef BRCTG
++  for (; iterations > 0; iterations--)
++    {
++#else
++  while (idx < idx_bound)
++    {
++#endif
++      vec_t overlap_addend0;
++      vec_t overlap_addend1;
++
++      /* The 64x64->128 MLGR multiplies two factors in GPRs and stores the
++       * result in a GPR pair. One of the factors is taken from the GPR pair
++       * and overwritten.
++       * To reuse factors, it turned out cheaper to load limbs multiple times
++       * than copying GPR contents. Enforce that and the use of addressing by
++       * base + index gpr + immediate displacement via inline asm.
++       */
++      ASM_LOADGPR (p0_low, s1p, idx, 0 + IDX_OFFSET);
++      ASM_LOADGPR (p1_low, s1p, idx, 8 + IDX_OFFSET);
++      ASM_LOADGPR (p2_low, s1p, idx, 16 + IDX_OFFSET);
++      ASM_LOADGPR (p3_low, s1p, idx, 24 + IDX_OFFSET);
++
++      /*
++       * accumulate products as follows (for addmul):
++       *                       | rp[i+3] | rp[i+2] | rp[i+1] | rp[i]   |
++       *                                             p0_high | p0_low  |
++       *                                   p1_high | p1_low  | carry-limb in
++       *                         p2_high | p2_low  |
++       * c-limb out <- p3_high | p3_low  |
++       *                       | <    128-bit VR   > <   128-bit VR    >
++       *
++       *                         <   rp1_addend    > <  rp0_addend     >
++       *     carry-chain 0  <-   +           <-      +  <- carry_vec0[127]
++       *                         <   product1      > <  product0       >
++       *     carry-chain 1  <-   +           <-      +  <- carry_vec1[127]
++       *                         < overlap_addend1 > < overlap_addend0 >
++       *
++       * note that a 128-bit add with carry in + out is built from two insns
++       * - vec_adde_u128 (vacq) provides sum
++       * - vec_addec_u128 (vacccq) provides the new carry bit
++       */
++
++      s390_double_umul_ppmm (p0_high, p0_low, p1_high, p1_low, s2limb);
++
++      /*
++       * "barrier" to enforce scheduling loads for all limbs and first round
++       * of MLGR before anything else.
++       */
++      asm volatile("");
++
++      product0.dw = vec_load_2di_as_pair (p0_high, p0_low);
++
++#ifdef ADD
++      rp0_addend = vec_load_elements_reversed_idx (rp, idx, 0 + IDX_OFFSET);
++      rp1_addend = vec_load_elements_reversed_idx (rp, idx, 16 + IDX_OFFSET);
++#endif
++      /* increment loop index to unblock dependant loads of limbs for the next
++       * iteration (see above at #define LOOP_ADVANCE) */
++      idx += LOOP_ADVANCE;
++
++      s390_double_umul_ppmm (p2_high, p2_low, p3_high, p3_low, s2limb);
++
++      overlap_addend0.dw = vec_load_2di_as_pair (p1_low, carry_limb);
++      asm volatile("");
++
++#ifdef ADD
++      sum0.sw = vec_adde_u128 (product0.sw, rp0_addend.sw, carry_vec0.sw);
++      sum1.sw = vec_adde_u128 (sum0.sw, overlap_addend0.sw, carry_vec1.sw);
++
++      carry_vec0.sw
++          = vec_addec_u128 (product0.sw, rp0_addend.sw, carry_vec0.sw);
++      carry_vec1.sw
++          = vec_addec_u128 (sum0.sw, overlap_addend0.sw, carry_vec1.sw);
++#else
++      sum1.sw = vec_adde_u128 (product0.sw, overlap_addend0.sw, carry_vec0.sw);
++      carry_vec0.sw
++          = vec_addec_u128 (product0.sw, overlap_addend0.sw, carry_vec0.sw);
++#endif
++
++      asm volatile("");
++      product2.dw = vec_load_2di_as_pair (p2_high, p2_low);
++      overlap_addend1.dw = vec_load_2di_as_pair (p3_low, p1_high);
++
++      vec_t sum4;
++
++#ifdef ADD
++      vec_t sum3;
++      sum3.sw = vec_adde_u128 (product2.sw, rp1_addend.sw, carry_vec0.sw);
++      sum4.sw = vec_adde_u128 (sum3.sw, overlap_addend1.sw, carry_vec1.sw);
++
++      carry_vec0.sw
++          = vec_addec_u128 (product2.sw, rp1_addend.sw, carry_vec0.sw);
++      carry_vec1.sw
++          = vec_addec_u128 (sum3.sw, overlap_addend1.sw, carry_vec1.sw);
++#else
++      sum4.sw = vec_adde_u128 (product2.sw, overlap_addend1.sw, carry_vec0.sw);
++      carry_vec0.sw
++          = vec_addec_u128 (product2.sw, overlap_addend1.sw, carry_vec0.sw);
++#endif
++      vec_store_elements_reversed_idx (rp, idx, IDX_OFFSET - LOOP_ADVANCE,
++                                       sum1);
++      vec_store_elements_reversed_idx (rp, idx, 16 + IDX_OFFSET - LOOP_ADVANCE,
++                                       sum4);
++
++      carry_limb = p3_high;
++    }
++
++#ifdef ADD
++  carry_vec0.dw += carry_vec1.dw;
++  carry_limb += carry_vec0.dw[1];
++#else
++  carry_limb += carry_vec0.dw[1];
++#endif
++
++  return carry_limb;
++}
++
++#undef OPERATION_addmul_1
++#undef OPERATION_mul_1
++#undef FUNCNAME
++#undef ADD
+diff --git a/mpn/s390_64/z13/common-vec.h b/mpn/s390_64/z13/common-vec.h
+new file mode 100644
+index 000000000..a59e6eefe
+--- /dev/null
++++ b/mpn/s390_64/z13/common-vec.h
+@@ -0,0 +1,175 @@
++/* Common vector helpers and macros for IBM z13 and later
++
++Copyright 2021 Free Software Foundation, Inc.
++
++This file is part of the GNU MP Library.
++
++The GNU MP Library is free software; you can redistribute it and/or modify
++it under the terms of either:
++
++  * the GNU Lesser General Public License as published by the Free
++    Software Foundation; either version 3 of the License, or (at your
++    option) any later version.
++
++or
++
++  * the GNU General Public License as published by the Free Software
++    Foundation; either version 2 of the License, or (at your option) any
++    later version.
++
++or both in parallel, as here.
++
++The GNU MP Library is distributed in the hope that it will be useful, but
++WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received copies of the GNU General Public License and the
++GNU Lesser General Public License along with the GNU MP Library.  If not,
++see https://www.gnu.org/licenses/.  */
++
++#ifndef __S390_64_Z13_COMMON_VEC_H
++#define __S390_64_Z13_COMMON_VEC_H
++
++#include <unistd.h>
++#include <vecintrin.h>
++
++/*
++ * Vector intrinsics use vector element types that kind-of make sense for the
++ * specific operation (e.g., vec_permi permutes doublewords). To use VRs
++ * interchangeably with different intrinsics, typedef the two variants and wrap
++ * them in a union.
++ */
++#define VLEN_BYTES 16
++typedef unsigned long long v2di __attribute__ ((vector_size (VLEN_BYTES)));
++typedef unsigned char v16qi __attribute__ ((vector_size (VLEN_BYTES)));
++
++/*
++ * The Z vector intrinsics use vectors with different element types (e.g.,
++ * v16qi for the 128-bit adds and v2di for vec_permi).
++ */
++union vec
++{
++  v2di dw;
++  v16qi sw;
++};
++
++typedef union vec vec_t;
++
++/*
++ * single-instruction combine of two GPRs into a VR
++ */
++static inline v2di
++vec_load_2di_as_pair (unsigned long a, unsigned long b)
++{
++  v2di res;
++  __asm__("vlvgp\t%0,%1,%2" : "=v"(res) : "r"(a), "r"(b));
++  return res;
++}
++
++/*
++ * 64x64 mult where caller needs to care about proper register allocation:
++ * multiply xl with m1, treating both as unsigned, and place the result in
++ * xh:xl.
++ * mlgr operates on register pairs, so xh must be an even gpr followed by xl
++ */
++#define s390_umul_ppmm(xh, xl, m1)                                              \
++  do                                                                          \
++    {                                                                         \
++      asm("mlgr\t%0,%3" : "=r"(xh), "=r"(xl) : "%1"(xl), "r"(m1));            \
++    }                                                                         \
++  while (0);
++
++/*
++ * two 64x64 multiplications, scheduled so that they will dispatch and issue to
++ * different sides: each mlgr is dispatched alone in an instruction group and
++ * subsequent groups will issue on different execution sides.
++ * there is a variant where both products use the same multiplicand and one
++ * that uses two different multiplicands. constraints from s390_umul_ppmm apply
++ * here.
++ */
++#define s390_double_umul_ppmm(X0H, X0L, X1H, X1L, MX)                           \
++  do                                                                          \
++    {                                                                         \
++      asm("mlgr\t%[x0h],%[mx]\n\t"                                            \
++          "mlgr\t%[x1h],%[mx]"                                                \
++          : [x0h] "=&r"(X0H), [x0l] "=&r"(X0L), [x1h] "=r"(X1H),              \
++            [x1l] "=r"(X1L)                                                   \
++          : "[x0l]"(X0L), "[x1l]"(X1L), [mx] "r"(MX));                        \
++    }                                                                         \
++  while (0);
++
++#define s390_double_umul_ppmm_distinct(X0H, X0L, X1H, X1L, MX0, MX1)            \
++  do                                                                          \
++    {                                                                         \
++      asm("mlgr\t%[x0h],%[mx0]\n\t"                                           \
++          "mlgr\t%[x1h],%[mx1]"                                               \
++          : [x0h] "=&r"(X0H), [x0l] "=&r"(X0L), [x1h] "=r"(X1H),              \
++            [x1l] "=r"(X1L)                                                   \
++          : "[x0l]"(X0L), "[x1l]"(X1L), [mx0] "r"(MX0), [mx1] "r"(MX1));      \
++    }                                                                         \
++  while (0);
++
++#define ASM_LOADGPR_BASE(DST, BASE, OFFSET)                                   \
++  asm volatile("lg\t%[r],%[off](%[b])"                                        \
++               : [r] "=r"(DST)                                                \
++               : [b] "a"(BASE), [off] "L"(OFFSET)                             \
++               : "memory");
++
++#define ASM_LOADGPR(DST, BASE, INDEX, OFFSET)                                 \
++  asm volatile("lg\t%[r],%[off](%[b],%[x])"                                   \
++               : [r] "=r"(DST)                                                \
++               : [b] "a"(BASE), [x] "a"(INDEX), [off] "L"(OFFSET)             \
++               : "memory");
++
++/*
++ * Load a vector register from memory and swap the two 64-bit doubleword
++ * elements.
++ */
++static inline vec_t
++vec_load_elements_reversed_idx (mp_limb_t const *base, ssize_t const index,
++                                ssize_t const offset)
++{
++  vec_t res;
++  char *ptr = (char *)base;
++
++  res.sw = *(v16qi *)(ptr + index + offset);
++  res.dw = vec_permi (res.dw, res.dw, 2);
++
++  return res;
++}
++
++static inline vec_t
++vec_load_elements_reversed (mp_limb_t const *base, ssize_t const offset)
++{
++  return vec_load_elements_reversed_idx (base, 0, offset);
++}
++
++/*
++ * Store a vector register to memory and swap the two 64-bit doubleword
++ * elements.
++ */
++static inline void
++vec_store_elements_reversed_idx (mp_limb_t *base, ssize_t const index,
++                                 ssize_t const offset, vec_t vec)
++{
++  char *ptr = (char *)base;
++
++  vec.dw = vec_permi (vec.dw, vec.dw, 2);
++  *(v16qi *)(ptr + index + offset) = vec.sw;
++}
++
++static inline void
++vec_store_elements_reversed (mp_limb_t *base, ssize_t const offset, vec_t vec)
++{
++  vec_store_elements_reversed_idx (base, 0, offset, vec);
++}
++
++#define ASM_VZERO(VEC)                                                        \
++  do                                                                          \
++    {                                                                         \
++      asm("vzero\t%[vec]" : [vec] "=v"(VEC));                                 \
++    }                                                                         \
++  while (0)
++
++#endif
+diff --git a/mpn/s390_64/z13/mul_1.c b/mpn/s390_64/z13/mul_1.c
+new file mode 100644
+index 000000000..7584dc8c7
+--- /dev/null
++++ b/mpn/s390_64/z13/mul_1.c
+@@ -0,0 +1,31 @@
++/* mul_1 for IBM z13 or later
++
++Copyright 2021 Free Software Foundation, Inc.
++
++This file is part of the GNU MP Library.
++
++The GNU MP Library is free software; you can redistribute it and/or modify
++it under the terms of either:
++
++  * the GNU Lesser General Public License as published by the Free
++    Software Foundation; either version 3 of the License, or (at your
++    option) any later version.
++
++or
++
++  * the GNU General Public License as published by the Free Software
++    Foundation; either version 2 of the License, or (at your option) any
++    later version.
++
++or both in parallel, as here.
++
++The GNU MP Library is distributed in the hope that it will be useful, but
++WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received copies of the GNU General Public License and the
++GNU Lesser General Public License along with the GNU MP Library.  If not,
++see https://www.gnu.org/licenses/.  */
++
++#include "s390_64/z13/addmul_1.c"
+-- 
+2.40.1
+
diff --git a/SOURCES/ibm_z13_simd_part2.patch b/SOURCES/ibm_z13_simd_part2.patch
new file mode 100644
index 0000000..347abd6
--- /dev/null
+++ b/SOURCES/ibm_z13_simd_part2.patch
@@ -0,0 +1,536 @@
+Co-authored-by: Stefan Liebler <stli at linux.ibm.com>
+---
+ mpn/s390_64/z13/aormul_2.c   | 476 +++++++++++++++++++++++++++++++++++
+ mpn/s390_64/z13/gmp-mparam.h |  37 +++
+ 2 files changed, 513 insertions(+)
+ create mode 100644 mpn/s390_64/z13/aormul_2.c
+ create mode 100644 mpn/s390_64/z13/gmp-mparam.h
+
+diff --git a/mpn/s390_64/z13/aormul_2.c b/mpn/s390_64/z13/aormul_2.c
+new file mode 100644
+index 000000000..9a69fc38e
+--- /dev/null
++++ b/mpn/s390_64/z13/aormul_2.c
+@@ -0,0 +1,477 @@
++/* Addmul_2 / mul_2 for IBM z13 or later
++
++Copyright 2021 Free Software Foundation, Inc.
++
++This file is part of the GNU MP Library.
++
++The GNU MP Library is free software; you can redistribute it and/or modify
++it under the terms of either:
++
++  * the GNU Lesser General Public License as published by the Free
++    Software Foundation; either version 3 of the License, or (at your
++    option) any later version.
++
++or
++
++  * the GNU General Public License as published by the Free Software
++    Foundation; either version 2 of the License, or (at your option) any
++    later version.
++
++or both in parallel, as here.
++
++The GNU MP Library is distributed in the hope that it will be useful, but
++WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received copies of the GNU General Public License and the
++GNU Lesser General Public License along with the GNU MP Library.  If not,
++see https://www.gnu.org/licenses/.  */
++
++#include "gmp.h"
++#include "gmp-impl.h"
++
++#include "s390_64/z13/common-vec.h"
++
++#undef FUNCNAME
++
++#ifdef DO_INLINE
++#  ifdef OPERATION_addmul_2
++#    define ADD
++#    define FUNCNAME inline_addmul_2
++#  elif defined(OPERATION_mul_2)
++#    define FUNCNAME inline_mul_2
++#  else
++#    error Missing define for operation to perform
++#  endif
++#else
++#  ifdef OPERATION_addmul_2
++#    define ADD
++#    define FUNCNAME mpn_addmul_2
++#  elif defined(OPERATION_mul_2)
++#    define FUNCNAME mpn_mul_2
++#  else
++#    error Missing define for operation to perform
++#  endif
++#endif
++
++#ifdef DO_INLINE
++static inline mp_limb_t
++FUNCNAME (mp_limb_t *rp, const mp_limb_t *up, mp_size_t n, const mp_limb_t *vp)
++    __attribute__ ((always_inline));
++
++static inline
++#endif
++mp_limb_t
++FUNCNAME (mp_limb_t *rp, const mp_limb_t *up, mp_size_t n,
++          const mp_limb_t *vp)
++{
++
++  /* Combine 64x64 multiplication into GPR pairs (MLGR) with 128-bit adds in
++     VRs (using each VR as a single 128-bit accumulator).
++     The inner loop is unrolled to four limbs, with two blocks of four
++     multiplications each. Since the MLGR operation operates on even/odd GPR
++     pairs, pin the products appropriately. */
++
++  register mp_limb_t p0_high asm("r0");
++  register mp_limb_t p0_low asm("r1");
++
++  register mp_limb_t p1_high asm("r8");
++  register mp_limb_t p1_low asm("r9");
++
++  register mp_limb_t p2_high asm("r6");
++  register mp_limb_t p2_low asm("r7");
++
++  register mp_limb_t p3_high asm("r10");
++  register mp_limb_t p3_low asm("r11");
++
++  vec_t carry_prod = { .dw = vec_splat_u64 (0) };
++  vec_t zero = { .dw = vec_splat_u64 (0) };
++
++  /* two carry-bits for the 128-bit VR adds - stored in VRs */
++#ifdef ADD
++  vec_t carry_vec0 = { .dw = vec_splat_u64 (0) };
++#endif
++  vec_t carry_vec1 = { .dw = vec_splat_u64 (0) };
++
++  vec_t tmp;
++
++  vec_t sum0, sum1;
++
++  /* products transferred into VRs for accumulating there */
++  vec_t pv0, pv3;
++  vec_t pv1_low, pv1_high, pv2_low, pv2_high;
++  vec_t low, middle, high;
++#ifdef ADD
++  vec_t rp0, rp1;
++#endif
++
++  register mp_limb_t v0 asm("r12");
++  register mp_limb_t v1 asm("r5");
++  v0 = vp[0];
++  v1 = vp[1];
++
++  /* The scalar multiplications compete with pointer and index increments for
++   * issue ports. Thus, increment the loop index in the middle of the loop so
++   * that the operations for the next iteration's multiplications can be
++   * loaded in time (looks horrible, yet helps performance) and make sure we
++   * use addressing with base reg + index reg + immediate displacement
++   * (so that only the single index needs incrementing, instead of multiple
++   * pointers). */
++#undef LOOP_ADVANCE
++#define LOOP_ADVANCE (4 * sizeof (mp_limb_t))
++#define IDX_OFFSET (LOOP_ADVANCE)
++
++  register ssize_t idx = 0 - IDX_OFFSET;
++#ifdef BRCTG
++  ssize_t iterations = (size_t)n / 4;
++#else
++  ssize_t const idx_bound = n * sizeof (mp_limb_t) - IDX_OFFSET;
++#endif
++
++  /*
++   * To minimize latency in the carry chain, accumulate in VRs with 128-bit
++   * adds with carry in and out. As a downside, these require two insns for
++   * each add - one to calculate the sum, one to deliver the carry out.
++   * To reduce the overall number of insns to execute, combine adding up
++   * product limbs such that there cannot be a carry out and one (for mul) or
++   * two (for addmul) adds with carry chains.
++   *
++   * Since (2^64-1) * (2^64-1) = (2^128-1) - 2 * (2^64-1), we can add two
++   * limbs into each 128-bit product without causing carry out.
++   *
++   * For each block of 2 limbs * 2 limbs
++   *
++   *                             |  u[i] * v[0] (p2) |
++   *                   |  u[i] * v[1] (p0) |
++   *                   | u[i+1] * v[0](p1) |
++   *         | u[i+1] * v[1](p3) |
++   *         <     128 bits     > <    128 bits      >
++   *
++   * we can begin accumulating with "simple" carry-oblivious 128-bit adds:
++   * - p0 + low limb of p1
++   *      + high limb of p2
++   *      and combine resulting low limb with p2's low limb
++   * - p3 + high limb of p1
++   *      + high limb of sum above
++   * ... which will will result in two 128-bit limbs to be fed into the carry
++   * chain(s).
++   * Overall, that scheme saves instructions and improves performance, despite
++   * slightly increasing latency between multiplications and carry chain (yet
++   * not in the carry chain).
++   */
++
++#define LOAD_LOW_LIMB(VEC, LIMB)                                              \
++  do                                                                          \
++    {                                                                         \
++      asm("vzero\t%[vec]\n\t"                                                 \
++          "vlvgg\t%[vec],%[limb],1"                                           \
++          : [vec] "=v"(VEC)                                                   \
++          : [limb] "r"(LIMB));                                                \
++    }                                                                         \
++  while (0)
++
++  /* for the 128-bit adds in the carry chain, to calculate a + b + carry-in we
++   * need paired vec_adde_u128 (delivers sum) and vec_addec_u128 (delivers new
++   * carry) */
++#define ADD_UP2_CARRY_INOUT(SUMIDX, CARRYIDX, ADDEND1, ADDEND2)               \
++  do                                                                          \
++    {                                                                         \
++      sum##SUMIDX.sw                                                          \
++          = vec_adde_u128 (ADDEND1.sw, ADDEND2.sw, carry_vec##CARRYIDX.sw);   \
++      carry_vec##CARRYIDX.sw                                                  \
++          = vec_addec_u128 (ADDEND1.sw, ADDEND2.sw, carry_vec##CARRYIDX.sw);  \
++    }                                                                         \
++  while (0)
++
++#define ADD_UP_CARRY_INOUT(SUMIDX, ADDEND1, ADDEND2)                          \
++  ADD_UP2_CARRY_INOUT (SUMIDX, SUMIDX, ADDEND1, ADDEND2)
++
++  /* variant without carry-in for prologue */
++#define ADD_UP2_CARRY_OUT(SUMIDX, CARRYIDX, ADDEND1, ADDEND2)                 \
++  do                                                                          \
++    {                                                                         \
++      sum##SUMIDX.sw = vec_add_u128 (ADDEND1.sw, ADDEND2.sw);                 \
++      carry_vec##CARRYIDX.sw = vec_addc_u128 (ADDEND1.sw, ADDEND2.sw);        \
++    }                                                                         \
++  while (0)
++
++#define ADD_UP_CARRY_OUT(SUMIDX, ADDEND1, ADDEND2)                            \
++  ADD_UP2_CARRY_OUT (SUMIDX, SUMIDX, ADDEND1, ADDEND2)
++
++  /* prologue for 4x-unrolled main loop */
++  switch ((size_t)n % 4)
++    {
++    case 1:
++      ASM_LOADGPR_BASE (p0_low, up, 0);
++      ASM_LOADGPR_BASE (p1_low, up, 0);
++      s390_double_umul_ppmm_distinct (p0_high, p0_low, p1_high, p1_low, v0, v1);
++      carry_prod.dw = vec_load_2di_as_pair (p1_high, p1_low);
++
++/* gcc tries to be too clever and vlr from a reg that is already zero. vzero is
++ * cheaper. */
++#  define NEW_CARRY(VEC, LIMB)                                                \
++    do                                                                        \
++      {                                                                       \
++        asm("vzero\t%[vec]\n\t"                                               \
++            "vlvgg\t%[vec],%[limb],1"                                         \
++            : [vec] "=v"(VEC)                                                 \
++            : [limb] "r"(LIMB));                                              \
++      }                                                                       \
++    while (0)
++
++      NEW_CARRY (tmp, p0_high);
++
++      carry_prod.sw = vec_add_u128 (carry_prod.sw, tmp.sw);
++#ifdef ADD
++      carry_vec1.dw[1] = __builtin_add_overflow (rp[0], p0_low, rp);
++#else
++      rp[0] = p0_low;
++#endif
++      idx += sizeof (mp_limb_t);
++      break;
++
++    case 2:
++      ASM_LOADGPR_BASE (p0_low, up, 0);
++      ASM_LOADGPR_BASE (p1_low, up, 8);
++      ASM_LOADGPR_BASE (p2_low, up, 0);
++      ASM_LOADGPR_BASE (p3_low, up, 8);
++
++      asm(""
++          : "=r"(p0_low), "=r"(p2_low)
++          : "r"(p3_low), "0"(p0_low), "r"(p1_low), "1"(p2_low));
++      s390_double_umul_ppmm_distinct (p0_high, p0_low, p1_high, p1_low, v1, v0);
++      s390_double_umul_ppmm_distinct (p2_high, p2_low, p3_high, p3_low, v0, v1);
++
++      pv0.dw = vec_load_2di_as_pair (p0_high, p0_low);
++      LOAD_LOW_LIMB (pv1_low, p1_low);
++      LOAD_LOW_LIMB (pv1_high, p1_high);
++      pv0.sw = vec_add_u128 (pv0.sw, pv1_low.sw);
++      LOAD_LOW_LIMB (pv2_high, p2_high);
++      pv3.dw = vec_load_2di_as_pair (p3_high, p3_low);
++      LOAD_LOW_LIMB (pv2_low, p2_low);
++      pv3.sw = vec_add_u128 (pv3.sw, pv1_high.sw);
++      middle.sw = vec_add_u128 (pv0.sw, pv2_high.sw);
++      low.dw = vec_permi (middle.dw, pv2_low.dw, 3);
++      middle.dw = vec_permi (zero.dw, middle.dw, 0);
++      high.sw = vec_add_u128 (middle.sw, pv3.sw);
++#ifdef ADD
++      rp0 = vec_load_elements_reversed (rp, 0);
++      ADD_UP_CARRY_OUT (0, rp0, carry_prod);
++#else
++      sum0 = carry_prod;
++#endif
++      ADD_UP_CARRY_OUT (1, sum0, low);
++      vec_store_elements_reversed (rp, 0, sum1);
++      carry_prod = high;
++
++      idx += 2 * sizeof (mp_limb_t);
++      break;
++
++    case 3:
++      ASM_LOADGPR_BASE (p0_low, up, 0);
++      ASM_LOADGPR_BASE (p1_low, up, 0);
++      s390_double_umul_ppmm_distinct (p0_high, p0_low, p1_high, p1_low, v0, v1);
++      carry_prod.dw = vec_load_2di_as_pair (p1_high, p1_low);
++      NEW_CARRY (tmp, p0_high);
++      carry_prod.sw = vec_add_u128 (carry_prod.sw, tmp.sw);
++
++#ifdef ADD
++      carry_vec1.dw[1] = __builtin_add_overflow (rp[0], p0_low, rp);
++#else
++      rp[0] = p0_low;
++#endif
++
++      ASM_LOADGPR_BASE (p0_low, up, 8);
++      ASM_LOADGPR_BASE (p1_low, up, 16);
++      ASM_LOADGPR_BASE (p2_low, up, 8);
++      ASM_LOADGPR_BASE (p3_low, up, 16);
++
++      asm(""
++          : "=r"(p0_low), "=r"(p2_low)
++          : "r"(p3_low), "0"(p0_low), "r"(p1_low), "1"(p2_low));
++      s390_double_umul_ppmm_distinct (p0_high, p0_low, p1_high, p1_low, v1, v0);
++      s390_double_umul_ppmm_distinct (p2_high, p2_low, p3_high, p3_low, v0, v1);
++
++      pv0.dw = vec_load_2di_as_pair (p0_high, p0_low);
++
++      LOAD_LOW_LIMB (pv1_low, p1_low);
++      LOAD_LOW_LIMB (pv1_high, p1_high);
++
++      pv0.sw = vec_add_u128 (pv0.sw, pv1_low.sw);
++      LOAD_LOW_LIMB (pv2_high, p2_high);
++      pv3.dw = vec_load_2di_as_pair (p3_high, p3_low);
++
++      LOAD_LOW_LIMB (pv2_low, p2_low);
++
++      pv3.sw = vec_add_u128 (pv3.sw, pv1_high.sw);
++      middle.sw = vec_add_u128 (pv0.sw, pv2_high.sw);
++
++      low.dw = vec_permi (middle.dw, pv2_low.dw, 3);
++      middle.dw = vec_permi (zero.dw, middle.dw, 0);
++      high.sw = vec_add_u128 (middle.sw, pv3.sw);
++
++#ifdef ADD
++      vec_t rp0 = vec_load_elements_reversed (rp, 8);
++      ADD_UP_CARRY_OUT (0, rp0, carry_prod);
++#else
++      sum0 = carry_prod;
++#endif
++      ADD_UP_CARRY_INOUT (1, sum0, low);
++
++      vec_store_elements_reversed (rp, 8, sum1);
++
++      carry_prod = high;
++
++      idx += 3 * sizeof (mp_limb_t);
++      break;
++    }
++
++    /*
++     * branch-on-count implicitly hint to the branch prediction as taken, while
++     * compare-and-branch hints as not taken. currently, using branch-on-count
++     * has a performance advantage, but it is not clear that it is generally
++     * the better choice (e.g., branch-on-count requires decrementing the
++     * separate counter). so, allow switching the loop condition to enable
++     * either category of branch instructions:
++     * - idx is less than an upper bound, for compare-and-branch
++     * - iteration counter greater than zero, for branch-on-count
++     */
++#ifdef BRCTG
++  for (; iterations > 0; iterations--)
++    {
++#else
++  while (idx < idx_bound)
++    {
++#endif
++      /* The 64x64->128 MLGR multiplies two factors in GPRs and stores the
++       * result in a GPR pair. One of the factors is taken from the GPR pair
++       * and overwritten.
++       * To reuse factors, it turned out cheaper to load limbs multiple times
++       * than copying GPR contents. Enforce that and the use of addressing by
++       * base + index gpr + immediate displacement via inline asm.
++       */
++      ASM_LOADGPR (p0_low, up, idx, 0 + IDX_OFFSET);
++      ASM_LOADGPR (p1_low, up, idx, 8 + IDX_OFFSET);
++      ASM_LOADGPR (p2_low, up, idx, 0 + IDX_OFFSET);
++      ASM_LOADGPR (p3_low, up, idx, 8 + IDX_OFFSET);
++
++      s390_double_umul_ppmm_distinct (p0_high, p0_low, p1_high, p1_low, v1, v0);
++
++      pv0.dw = vec_load_2di_as_pair (p0_high, p0_low);
++
++      LOAD_LOW_LIMB (pv1_low, p1_low);
++      LOAD_LOW_LIMB (pv1_high, p1_high);
++
++      s390_double_umul_ppmm_distinct (p2_high, p2_low, p3_high, p3_low, v0, v1);
++
++      pv0.sw = vec_add_u128 (pv0.sw, pv1_low.sw);
++      LOAD_LOW_LIMB (pv2_high, p2_high);
++      pv3.dw = vec_load_2di_as_pair (p3_high, p3_low);
++
++      LOAD_LOW_LIMB (pv2_low, p2_low);
++
++      ASM_LOADGPR (p0_low, up, idx, 16 + IDX_OFFSET);
++      ASM_LOADGPR (p1_low, up, idx, 24 + IDX_OFFSET);
++      ASM_LOADGPR (p2_low, up, idx, 16 + IDX_OFFSET);
++      ASM_LOADGPR (p3_low, up, idx, 24 + IDX_OFFSET);
++
++      idx += LOOP_ADVANCE;
++
++      /*
++       * "barrier" to enforce scheduling the index increment before the second
++       * block of multiplications. not required for clang.
++       */
++#ifndef __clang__
++      asm(""
++          : "=r"(idx), "=r"(p0_high), "=r"(p2_high)
++          : "0"(idx), "1"(p0_high), "2"(p2_high));
++#endif
++
++      s390_double_umul_ppmm_distinct (p0_high, p0_low, p1_high, p1_low, v1, v0);
++      s390_double_umul_ppmm_distinct (p2_high, p2_low, p3_high, p3_low, v0, v1);
++
++      /*
++       * "barrier" to enforce scheduling all MLGRs first, before any adding
++       * up. note that clang produces better code without.
++       */
++#ifndef __clang__
++      asm(""
++          : "=v"(pv0.sw), "=v"(pv3.sw)
++          : "1"(pv3.sw), "0"(pv0.sw), "r"(p0_high), "r"(p2_high));
++#endif
++
++      pv3.sw = vec_add_u128 (pv3.sw, pv1_high.sw);
++      middle.sw = vec_add_u128 (pv0.sw, pv2_high.sw);
++
++      low.dw = vec_permi (middle.dw, pv2_low.dw,
++                          3); /* least-significant doubleword from both vectors */
++      middle.dw = vec_permi (zero.dw, middle.dw, 0);
++      high.sw = vec_add_u128 (middle.sw, pv3.sw);
++
++#ifdef ADD
++      rp0 = vec_load_elements_reversed_idx (rp, idx,
++                                            0 + IDX_OFFSET - LOOP_ADVANCE);
++      ADD_UP_CARRY_INOUT (0, rp0, carry_prod);
++#else
++      sum0 = carry_prod;
++#endif
++      ADD_UP_CARRY_INOUT (1, sum0, low);
++
++      vec_store_elements_reversed_idx (rp, idx, 0 + IDX_OFFSET - LOOP_ADVANCE,
++                                       sum1);
++
++      carry_prod = high;
++
++      vec_t pv0_2, pv3_2;
++      vec_t pv1_low_2, pv1_high_2, pv2_low_2, pv2_high_2;
++      vec_t low_2, middle_2, high_2;
++      vec_t sum2, sum3;
++
++      pv0_2.dw = vec_load_2di_as_pair (p0_high, p0_low);
++      LOAD_LOW_LIMB (pv1_low_2, p1_low);
++      LOAD_LOW_LIMB (pv1_high_2, p1_high);
++
++      pv0_2.sw = vec_add_u128 (pv0_2.sw, pv1_low_2.sw);
++      LOAD_LOW_LIMB (pv2_high_2, p2_high);
++      pv3_2.dw = vec_load_2di_as_pair (p3_high, p3_low);
++      pv3_2.sw = vec_add_u128 (pv3_2.sw, pv1_high_2.sw);
++      middle_2.sw = vec_add_u128 (pv0_2.sw, pv2_high_2.sw);
++
++      LOAD_LOW_LIMB (pv2_low_2, p2_low);
++      low_2.dw
++          = vec_permi (middle_2.dw, pv2_low_2.dw,
++                       3); /* least-significant doubleword from both vectors */
++      middle_2.dw = vec_permi (zero.dw, middle_2.dw, 0);
++      high_2.sw = vec_add_u128 (middle_2.sw, pv3_2.sw);
++
++      /*
++       * another "barrier" to influence scheduling. (also helps in clang)
++       */
++      asm("" : : "v"(pv0_2.sw), "r"(p2_high), "r"(p3_high), "v"(pv3_2.sw));
++
++#ifdef ADD
++      rp1 = vec_load_elements_reversed_idx (rp, idx,
++                                            16 + IDX_OFFSET - LOOP_ADVANCE);
++      ADD_UP2_CARRY_INOUT (2, 0, rp1, carry_prod);
++#else
++      sum2 = carry_prod;
++#endif
++      ADD_UP2_CARRY_INOUT (3, 1, sum2, low_2);
++
++      vec_store_elements_reversed_idx (rp, idx, 16 + IDX_OFFSET - LOOP_ADVANCE,
++                                       sum3);
++
++      carry_prod = high_2;
++    }
++
++#ifdef ADD
++  sum0.sw = vec_adde_u128 (carry_prod.sw, carry_vec0.sw, carry_vec1.sw);
++#else
++  sum0.sw = vec_add_u128 (carry_prod.sw, carry_vec1.sw);
++#endif
++
++  *(mp_ptr) (((char *)rp) + idx + 0 + IDX_OFFSET) = (mp_limb_t)sum0.dw[1];
++
++  return (mp_limb_t)sum0.dw[0];
++}
+diff --git a/mpn/s390_64/z13/gmp-mparam.h b/mpn/s390_64/z13/gmp-mparam.h
+new file mode 100644
+index 000000000..a17503fd0
+--- /dev/null
++++ b/mpn/s390_64/z13/gmp-mparam.h
+@@ -0,0 +1,37 @@
++/* S/390-64 for IBM z13 gmp-mparam.h -- Compiler/machine parameter header file.
++
++Copyright 1991, 1993, 1994, 2000-2011 Free Software Foundation, Inc.
++
++This file is part of the GNU MP Library.
++
++The GNU MP Library is free software; you can redistribute it and/or modify
++it under the terms of either:
++
++  * the GNU Lesser General Public License as published by the Free
++    Software Foundation; either version 3 of the License, or (at your
++    option) any later version.
++
++or
++
++  * the GNU General Public License as published by the Free Software
++    Foundation; either version 2 of the License, or (at your option) any
++    later version.
++
++or both in parallel, as here.
++
++The GNU MP Library is distributed in the hope that it will be useful, but
++WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received copies of the GNU General Public License and the
++GNU Lesser General Public License along with the GNU MP Library.  If not,
++see https://www.gnu.org/licenses/.  */
++
++#define GMP_LIMB_BITS 64
++#define GMP_LIMB_BYTES 8
++
++#define HAVE_NATIVE_mpn_addmul_2 1
++#define HAVE_NATIVE_mpn_mul_2 1
++
++#include "mpn/s390_64/gmp-mparam.h"
+-- 
+2.40.1
diff --git a/SOURCES/ibm_z13_simd_part3.patch b/SOURCES/ibm_z13_simd_part3.patch
new file mode 100644
index 0000000..19069ca
--- /dev/null
+++ b/SOURCES/ibm_z13_simd_part3.patch
@@ -0,0 +1,139 @@
+Co-authored-by: Stefan Liebler <stli at linux.ibm.com>
+---
+ mpn/s390_64/z13/mul_basecase.c | 124 +++++++++++++++++++++++++++++++++
+ 1 file changed, 124 insertions(+)
+ create mode 100644 mpn/s390_64/z13/mul_basecase.c
+
+diff --git a/mpn/s390_64/z13/mul_basecase.c b/mpn/s390_64/z13/mul_basecase.c
+new file mode 100644
+index 000000000..f1b7160b3
+--- /dev/null
++++ b/mpn/s390_64/z13/mul_basecase.c
+@@ -0,0 +1,125 @@
++/* mpn_mul_basecase for IBM z13 and later -- Internal routine to multiply two
++   natural numbers of length m and n.
++
++   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
++   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
++
++Copyright 2021 Free Software Foundation, Inc.
++
++This file is part of the GNU MP Library.
++
++The GNU MP Library is free software; you can redistribute it and/or modify
++it under the terms of either:
++
++  * the GNU Lesser General Public License as published by the Free
++    Software Foundation; either version 3 of the License, or (at your
++    option) any later version.
++
++or
++
++  * the GNU General Public License as published by the Free Software
++    Foundation; either version 2 of the License, or (at your option) any
++    later version.
++
++or both in parallel, as here.
++
++The GNU MP Library is distributed in the hope that it will be useful, but
++WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received copies of the GNU General Public License and the
++GNU Lesser General Public License along with the GNU MP Library.  If not,
++see https://www.gnu.org/licenses/.  */
++
++#include <stdlib.h>
++
++#include "gmp.h"
++#include "gmp-impl.h"
++
++/* Note: we explicitly inline all mul and addmul routines here to reduce the
++ * number of branches in prologues of unrolled functions. That comes at the
++   cost of duplicating common loop bodies in object code. */
++#define DO_INLINE
++
++/*
++ * tweak loop conditions in addmul subroutines to enable use of
++ * branch-relative-on-count (BRCTG) instructions, which currently results in
++ * better performance.
++ */
++#define BRCTG
++
++#include "s390_64/z13/common-vec.h"
++
++#define OPERATION_mul_1
++#include "s390_64/z13/addmul_1.c"
++#undef OPERATION_mul_1
++
++#define OPERATION_addmul_1
++#include "s390_64/z13/addmul_1.c"
++#undef OPERATION_addmul_1
++
++#define OPERATION_mul_2
++#include "s390_64/z13/aormul_2.c"
++#undef OPERATION_mul_2
++
++#define OPERATION_addmul_2
++#include "s390_64/z13/aormul_2.c"
++#undef OPERATION_addmul_2
++
++void
++mpn_mul_basecase (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp,
++                  mp_size_t vn)
++{
++  ASSERT (un >= vn);
++  ASSERT (vn >= 1);
++  ASSERT (!MPN_OVERLAP_P (rp, un + vn, up, un));
++  ASSERT (!MPN_OVERLAP_P (rp, un + vn, vp, vn));
++
++  /* The implementations of (add)mul_1/2 are 4x-unrolled. Pull out the branch
++   * for un%4 and inline specific variants. */
++
++#define BRANCH_FOR_MOD(N)                                                     \
++  do                                                                          \
++    {                                                                         \
++      if (vn >= 2)                                                            \
++        {                                                                     \
++          rp[un + 1] = inline_mul_2 (rp, up, un, vp);                         \
++          rp += 2, vp += 2, vn -= 2;                                          \
++        }                                                                     \
++      else                                                                    \
++        {                                                                     \
++          rp[un] = inline_mul_1 (rp, up, un, vp[0]);                          \
++          return;                                                             \
++        }                                                                     \
++                                                                              \
++      while (vn >= 2)                                                         \
++        {                                                                     \
++          rp[un + 2 - 1] = inline_addmul_2 (rp, up, un, vp);                  \
++          rp += 2, vp += 2, vn -= 2;                                          \
++        }                                                                     \
++                                                                              \
++      while (vn >= 1)                                                         \
++        {                                                                     \
++          rp[un] = inline_addmul_1 (rp, up, un, vp[0]);                       \
++          rp += 1, vp += 1, vn -= 1;                                          \
++        }                                                                     \
++    }                                                                         \
++  while (0);
++
++  switch (((size_t)un) % 4)
++    {
++    case 0:
++      BRANCH_FOR_MOD (0);
++      break;
++    case 1:
++      BRANCH_FOR_MOD (1);
++      break;
++    case 2:
++      BRANCH_FOR_MOD (2);
++      break;
++    case 3:
++      BRANCH_FOR_MOD (3);
++      break;
++    }
++}
+-- 
+2.40.1
diff --git a/SOURCES/ibm_z13_simd_part4.patch b/SOURCES/ibm_z13_simd_part4.patch
new file mode 100644
index 0000000..c87c17c
--- /dev/null
+++ b/SOURCES/ibm_z13_simd_part4.patch
@@ -0,0 +1,151 @@
+From: Marius Hillenbrand <mhillen at linux.ibm.com>
+
+---
+ mpn/s390_64/z13/gmp-mparam.h | 129 ++++++++++++++++++++++++++++++++++-
+ 1 file changed, 127 insertions(+), 2 deletions(-)
+
+diff --git a/mpn/s390_64/z13/gmp-mparam.h b/mpn/s390_64/z13/gmp-mparam.h
+index a17503fd0..50e7f39d1 100644
+--- a/mpn/s390_64/z13/gmp-mparam.h
++++ b/mpn/s390_64/z13/gmp-mparam.h
+@@ -1,6 +1,6 @@
+ /* S/390-64 for IBM z13 gmp-mparam.h -- Compiler/machine parameter header file.
+ 
+-Copyright 1991, 1993, 1994, 2000-2011 Free Software Foundation, Inc.
++Copyright 2021 Free Software Foundation, Inc.
+ 
+ This file is part of the GNU MP Library.
+ 
+@@ -34,4 +34,129 @@ see https://www.gnu.org/licenses/.  */
+ #define HAVE_NATIVE_mpn_addmul_2 1
+ #define HAVE_NATIVE_mpn_mul_2 1
+ 
+-#include "mpn/s390_64/gmp-mparam.h"
++/* Generated by tuneup.c, 2021-07-30, gcc 10.2 */
++
++#define DIVREM_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
++#define DIVREM_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
++#define MOD_1_1P_METHOD                      2
++#define MOD_1_NORM_THRESHOLD             MP_SIZE_T_MAX  /* never */
++#define MOD_1_UNNORM_THRESHOLD           MP_SIZE_T_MAX  /* never */
++#define MOD_1N_TO_MOD_1_1_THRESHOLD         17
++#define MOD_1U_TO_MOD_1_1_THRESHOLD         15
++#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
++#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
++#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      5
++#define USE_PREINV_DIVREM_1                  1
++#define DIV_QR_1N_PI1_METHOD                 3
++#define DIV_QR_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
++#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
++#define DIV_QR_2_PI2_THRESHOLD             996
++#define DIVEXACT_1_THRESHOLD                 4
++#define BMOD_1_TO_MOD_1_THRESHOLD            0  /* always */
++
++#define DIV_1_VS_MUL_1_PERCENT             404
++
++#define MUL_TOOM22_THRESHOLD                23
++#define MUL_TOOM33_THRESHOLD                94
++#define MUL_TOOM44_THRESHOLD               166
++#define MUL_TOOM6H_THRESHOLD               286
++#define MUL_TOOM8H_THRESHOLD               626
++
++#define MUL_TOOM32_TO_TOOM43_THRESHOLD     113
++#define MUL_TOOM32_TO_TOOM53_THRESHOLD     138
++#define MUL_TOOM42_TO_TOOM53_THRESHOLD     143
++#define MUL_TOOM42_TO_TOOM63_THRESHOLD     145
++#define MUL_TOOM43_TO_TOOM54_THRESHOLD     130
++
++#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
++#define SQR_TOOM2_THRESHOLD                 12
++#define SQR_TOOM3_THRESHOLD                 84
++#define SQR_TOOM4_THRESHOLD                234
++#define SQR_TOOM6_THRESHOLD                318
++#define SQR_TOOM8_THRESHOLD                478
++
++#define MULMID_TOOM42_THRESHOLD             42
++
++#define MULMOD_BNM1_THRESHOLD               13
++#define SQRMOD_BNM1_THRESHOLD                7
++
++#define MUL_FFT_MODF_THRESHOLD             332  /* k = 5 */
++#define MUL_FFT_TABLE3                                      \
++  { {    332, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
++    {     21, 7}, {     21, 8}, {     11, 7}, {     24, 8}, \
++    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
++    {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
++    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
++    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
++    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
++    {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
++    {     67,10}, {     47,11}, {   2048,12}, {   4096,13}, \
++    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
++    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
++    {2097152,22}, {4194304,23}, {8388608,24} }
++#define MUL_FFT_TABLE3_SIZE 47
++#define MUL_FFT_THRESHOLD                 2752
++
++#define SQR_FFT_MODF_THRESHOLD             240  /* k = 5 */
++#define SQR_FFT_TABLE3                                      \
++  { {    240, 5}, {      8, 4}, {     17, 5}, {     13, 6}, \
++    {      7, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
++    {      9, 5}, {     19, 6}, {     15, 7}, {      8, 6}, \
++    {     17, 7}, {      9, 6}, {     19, 7}, {     10, 6}, \
++    {     21, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
++    {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
++    {     21, 9}, {     11, 8}, {     23, 9}, {     15, 8}, \
++    {     31, 9}, {     19, 8}, {     39, 9}, {     23,10}, \
++    {     15, 9}, {     39,10}, {     23,11}, {     15,10}, \
++    {     31, 9}, {     63,10}, {     47,11}, {   2048,12}, \
++    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16}, \
++    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
++    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
++#define SQR_FFT_TABLE3_SIZE 52
++#define SQR_FFT_THRESHOLD                 1856
++
++#define MULLO_BASECASE_THRESHOLD             0  /* always */
++#define MULLO_DC_THRESHOLD                  25
++#define MULLO_MUL_N_THRESHOLD             5397
++#define SQRLO_BASECASE_THRESHOLD             0  /* always */
++#define SQRLO_DC_THRESHOLD                 396
++#define SQRLO_SQR_THRESHOLD               3704
++
++#define DC_DIV_QR_THRESHOLD                 15
++#define DC_DIVAPPR_Q_THRESHOLD              50
++#define DC_BDIV_QR_THRESHOLD                66
++#define DC_BDIV_Q_THRESHOLD                202
++
++#define INV_MULMOD_BNM1_THRESHOLD           46
++#define INV_NEWTON_THRESHOLD                29
++#define INV_APPR_THRESHOLD                  13
++
++#define BINV_NEWTON_THRESHOLD              312
++#define REDC_1_TO_REDC_2_THRESHOLD          79
++#define REDC_2_TO_REDC_N_THRESHOLD           0  /* always */
++
++#define MU_DIV_QR_THRESHOLD                979
++#define MU_DIVAPPR_Q_THRESHOLD             979
++#define MUPI_DIV_QR_THRESHOLD               13
++#define MU_BDIV_QR_THRESHOLD               942
++#define MU_BDIV_Q_THRESHOLD               1367
++
++#define POWM_SEC_TABLE  3,19,215,1730
++
++#define GET_STR_DC_THRESHOLD                10
++#define GET_STR_PRECOMPUTE_THRESHOLD        15
++#define SET_STR_DC_THRESHOLD               882
++#define SET_STR_PRECOMPUTE_THRESHOLD      2520
++
++#define FAC_DSC_THRESHOLD                  228
++#define FAC_ODD_THRESHOLD                   24
++
++#define MATRIX22_STRASSEN_THRESHOLD         19
++#define HGCD2_DIV1_METHOD                    1
++#define HGCD_THRESHOLD                      61
++#define HGCD_APPR_THRESHOLD                 51
++#define HGCD_REDUCE_THRESHOLD             1962
++#define GCD_DC_THRESHOLD                   217
++#define GCDEXT_DC_THRESHOLD                263
++#define JACOBI_BASE_METHOD                   4
++
+-- 
+2.40.1
diff --git a/SPECS/gmp.spec b/SPECS/gmp.spec
new file mode 100644
index 0000000..40e3f02
--- /dev/null
+++ b/SPECS/gmp.spec
@@ -0,0 +1,712 @@
+#
+# Important for %%{ix86}:
+# This rpm has to be build on a CPU with sse2 support like Pentium 4 !
+#
+
+Summary: A GNU arbitrary precision library
+Name: gmp
+Version: 6.1.2
+Release: 12%{?dist}
+Epoch: 1
+URL: http://gmplib.org/
+Source0: ftp://ftp.gmplib.org/pub/gmp-%{version}/gmp-%{version}.tar.bz2
+# or ftp://ftp.gnu.org/pub/gnu/gmp/gmp-%{version}.tar.xz
+Source2: gmp.h
+Source3: gmp-mparam.h
+Patch2: gmp-6.0.0-debuginfo.patch
+Patch3: gmp-fcf-protection.patch
+Patch4: cve-2021-43618.patch
+Patch5: ibm_z13_simd_part1.patch
+Patch6: ibm_z13_simd_part2.patch
+Patch7: ibm_z13_simd_part3.patch
+Patch8: ibm_z13_simd_part4.patch
+License: LGPLv3+ or GPLv2+
+Group: System Environment/Libraries
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
+BuildRequires: autoconf automake libtool
+BuildRequires: git
+#autoreconf on arm needs:
+BuildRequires: perl-Carp
+BuildRequires: fipscheck
+
+%description
+The gmp package contains GNU MP, a library for arbitrary precision
+arithmetic, signed integers operations, rational numbers and floating
+point numbers. GNU MP is designed for speed, for both small and very
+large operands. GNU MP is fast because it uses fullwords as the basic
+arithmetic type, it uses fast algorithms, it carefully optimizes
+assembly code for many CPUs' most common inner loops, and it generally
+emphasizes speed over simplicity/elegance in its operations.
+
+Install the gmp package if you need a fast arbitrary precision
+library.
+
+%package c++
+Summary: C++ bindings for the GNU MP arbitrary precision library
+Group: System Environment/Libraries
+Requires: %{name}%{?_isa} = %{epoch}:%{version}-%{release}
+
+%description c++
+Bindings for using the GNU MP arbitrary precision library in C++ applications.
+
+%package devel
+Summary: Development tools for the GNU MP arbitrary precision library
+Group: Development/Libraries
+Requires: %{name}%{?_isa} = %{epoch}:%{version}-%{release}
+Requires: %{name}-c++%{?_isa} = %{epoch}:%{version}-%{release}
+Requires(post): /sbin/install-info
+Requires(preun): /sbin/install-info
+
+%description devel
+The libraries, header files and documentation for using the GNU MP 
+arbitrary precision library in applications.
+
+If you want to develop applications which will use the GNU MP library,
+you'll need to install the gmp-devel package.  You'll also need to
+install the gmp package.
+
+%package static
+Summary: Development tools for the GNU MP arbitrary precision library
+Group: Development/Libraries
+Requires: %{name}-devel = %{epoch}:%{version}-%{release}
+
+%description static
+The static libraries for using the GNU MP arbitrary precision library 
+in applications.
+
+%prep
+%autosetup -S git
+
+# switch the defaults to new cpus on s390x
+%ifarch s390x
+( cd mpn/s390_64; ln -s z13 s390x )
+%endif
+
+%build
+autoreconf -ifv
+if as --help | grep -q execstack; then
+  # the object files do not require an executable stack
+  export CCAS="gcc -c -Wa,--noexecstack"
+fi
+
+%ifarch %{ix86}
+  export CFLAGS=$(echo %{optflags} | sed -e "s/-mtune=[^ ]*//g" | sed -e "s/-march=[^ ]*/-march=i686/g")
+  export CXXFLAGS=$(echo %{optflags} | sed -e "s/-mtune=[^ ]*//g" | sed -e "s/-march=[^ ]*/-march=i686/g")
+%endif
+
+export CCAS="$CCAS -Wa,--generate-missing-build-notes=yes"
+export CFLAGS="$(echo %{optflags}) -fplugin=annobin"
+export CXXFLAGS="$(echo %{optflags}) -fplugin=annobin"
+
+%configure --enable-cxx --enable-fat
+
+sed -e 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' \
+    -e 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' \
+    -e 's|-lstdc++ -lm|-lstdc++|' \
+    -i libtool
+export LD_LIBRARY_PATH=`pwd`/.libs
+make %{?_smp_mflags}
+make check
+
+# Add generation of HMAC checksums of the final stripped binaries
+# bz#1117188
+%define __spec_install_post \
+  %{?__debug_package:%{__debug_install_post}} \
+  %{__arch_install_post} \
+  %{__os_install_post} \
+  mkdir -p $RPM_BUILD_ROOT%{_libdir}/fipscheck \
+  fipshmac -d $RPM_BUILD_ROOT%{_libdir}/fipscheck $RPM_BUILD_ROOT%{_libdir}/libgmp.so.10.3.2 \
+  ln -s libgmp.so.10.3.2.hmac $RPM_BUILD_ROOT%{_libdir}/fipscheck/libgmp.so.10.hmac \
+  %{nil}
+
+%install
+export LD_LIBRARY_PATH=`pwd`/.libs
+make install DESTDIR=$RPM_BUILD_ROOT
+install -m 644 gmp-mparam.h ${RPM_BUILD_ROOT}%{_includedir}
+rm -f $RPM_BUILD_ROOT%{_libdir}/lib{gmp,mp,gmpxx}.la
+rm -f $RPM_BUILD_ROOT%{_infodir}/dir
+/sbin/ldconfig -n $RPM_BUILD_ROOT%{_libdir}
+ln -sf libgmpxx.so.4 $RPM_BUILD_ROOT%{_libdir}/libgmpxx.so
+
+# Rename gmp.h to gmp-<arch>.h and gmp-mparam.h to gmp-mparam-<arch>.h to 
+# avoid file conflicts on multilib systems and install wrapper include files
+# gmp.h and gmp-mparam-<arch>.h
+basearch=%{_arch}
+# always use i386 for iX86
+%ifarch %{ix86}
+basearch=i386
+%endif
+# always use arm for arm*
+%ifarch %{arm}
+basearch=arm
+%endif
+# superH architecture support
+%ifarch sh3 sh4
+basearch=sh
+%endif
+# Rename files and install wrappers
+
+mv %{buildroot}/%{_includedir}/gmp.h %{buildroot}/%{_includedir}/gmp-${basearch}.h
+install -m644 %{SOURCE2} %{buildroot}/%{_includedir}/gmp.h
+mv %{buildroot}/%{_includedir}/gmp-mparam.h %{buildroot}/%{_includedir}/gmp-mparam-${basearch}.h
+install -m644 %{SOURCE3} %{buildroot}/%{_includedir}/gmp-mparam.h
+
+
+%check
+%ifnarch ppc
+export LD_LIBRARY_PATH=`pwd`/.libs
+make %{?_smp_mflags} check
+%endif
+
+%post -p /sbin/ldconfig
+
+%postun -p /sbin/ldconfig
+
+%post c++ -p /sbin/ldconfig
+
+%postun c++ -p /sbin/ldconfig
+
+%post devel
+if [ -f %{_infodir}/gmp.info.gz ]; then
+    /sbin/install-info %{_infodir}/gmp.info.gz %{_infodir}/dir || :
+fi
+exit 0
+
+%preun devel
+if [ $1 = 0 ]; then
+    if [ -f %{_infodir}/gmp.info.gz ]; then
+        /sbin/install-info --delete %{_infodir}/gmp.info.gz %{_infodir}/dir || :
+    fi
+fi
+exit 0
+
+%files
+%defattr(-,root,root,-)
+%{!?_licensedir:%global license %%doc}
+%license COPYING COPYING.LESSERv3 COPYINGv2 COPYINGv3
+%doc NEWS README
+%{_libdir}/libgmp.so.*
+%{_libdir}/fipscheck/libgmp.so.10.3.2.hmac
+%{_libdir}/fipscheck/libgmp.so.10.hmac
+
+%files c++
+%{_libdir}/libgmpxx.so.*
+
+%files devel
+%defattr(-,root,root,-)
+%{_libdir}/libgmp.so
+%{_libdir}/libgmpxx.so
+%{_includedir}/*.h
+%{_infodir}/gmp.info*
+
+%files static
+%defattr(-,root,root,-)
+%{_libdir}/libgmp.a
+%{_libdir}/libgmpxx.a
+
+%changelog
+* Mon Feb 05 2024 Jakub Martisko <jamartis@redhat.com> - 1:6.1.2-12
+- Add s390x optimizations
+Resolves: RHEL-10549
+
+* Mon Jan 29 2024 Jakub Martisko <jamartis@redhat.com> - 1:6.1.2-11
+- Fix: CVE-2021-43618
+Resolves: RHEL-23055
+
+* Fri Jun 14 2019 Jakub Martisko <jamartis@redhat.com> - 1:6.1.2-10
+- Add gating.yaml
+Related: #1681026
+
+* Tue Jun 11 2019 Jakub Martisko <jamartis@redhat.com> - 1:6.1.2-9
+- Add support for intel CET and -fcf-protection
+- Add missing compiler/linker flags
+Related: #1630567
+
+* Thu Jul 26 2018 David Kaspar [Dee'Kej] <dkaspar@redhat.com> - 1:6.1.2-8
+- Missing fipschecks added into build process (bug #1553679)
+- --enable-fat option added to %%configure (bug #1493218)
+
+* Wed Feb 07 2018 Fedora Release Engineering <releng@fedoraproject.org> - 1:6.1.2-7
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild
+
+* Wed Aug 02 2017 Fedora Release Engineering <releng@fedoraproject.org> - 1:6.1.2-6
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild
+
+* Wed Jul 26 2017 Fedora Release Engineering <releng@fedoraproject.org> - 1:6.1.2-5
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild
+
+* Mon Mar 13 2017 David Kaspar [Dee'Kej] <dkaspar@redhat.com> - 1:6.1.2-4
+- Fix the build process for ix89 family
+
+* Fri Feb 17 2017 David Kaspar [Dee'Kej] <dkaspar@redhat.com> - 1:6.1.2-3
+- Build process updated to correctly build .debug_info for i386
+  and to correctly use hardening flags
+
+* Fri Feb 10 2017 Fedora Release Engineering <releng@fedoraproject.org> - 1:6.1.2-2
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild
+
+* Tue Dec 20 2016 Frantisek Kluknavsky <fkluknav@redhat.com> - 1:6.1.2-1
+- rebase
+
+* Wed Jun 22 2016 Frantisek Kluknavsky <fkluknav@redhat.com> - 1:6.1.1-1
+- rebase
+
+* Fri Apr 08 2016 Yaakov Selkowitz <yselkowi@redhat.com> - 1:6.1.0-3
+- Split c++ subpackage (#1325439)
+
+* Wed Feb 03 2016 Fedora Release Engineering <releng@fedoraproject.org> - 1:6.1.0-2
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_24_Mass_Rebuild
+
+* Wed Nov 25 2015 Frantisek Kluknavsky <fkluknav@redhat.com> - 1:6.1.0-1
+- rebase to 6.1.0
+- gmp-6.0.0-ppc64.patch already upstream, dropped
+
+* Mon Sep 14 2015 Frantisek Kluknavsky <fkluknav@redhat.com> - 1:6.0.0-13
+- do not package sse2 variant, use --enable-fat instead (a bit dangerous, some low level routines will be skipped in `make check`)
+
+* Fri Sep 04 2015 Michal Toman <mtoman@fedoraproject.org> - 1:6.0.0-12
+- Add support for MIPS architecture to gmp.h and gmp-mparam.h
+
+* Wed Jun 17 2015 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1:6.0.0-11
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild
+
+* Sat May 02 2015 Kalev Lember <kalevlember@gmail.com> - 1:6.0.0-10
+- Rebuilt for GCC 5 C++11 ABI change
+
+* Thu Apr 02 2015 Frantisek Kluknavsky <fkluknav@redhat.com> - 1:6.0.0-9
+- bug965318 - improve debuginfo of assembler sources
+
+* Thu Sep 04 2014 Dan Horák <dan[at]danny.cz> - 1:6.0.0-8
+- drop s390x patch, support is already in upstream
+
+* Sat Aug 16 2014 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1:6.0.0-7
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_22_Mass_Rebuild
+
+* Sat Jul 12 2014 Tom Callaway <spot@fedoraproject.org> - 1:6.0.0-6
+- fix license handling
+
+* Thu Jul 10 2014 Brent Baude <baude@us.ibm.com> - 1:6.0.0-5
+- Fix gmp headers for ppc64le (#1083429)
+
+* Sat Jun 07 2014 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1:6.0.0-4
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild
+
+* Thu Apr 24 2014 Karsten Hopp <karsten@redhat.com> 6.0.0-3
+- set default for BMOD_1_TO_MOD_1_THRESHOLD on ppc64, patch by 
+  Torbjorn Granlund:
+  https://gmplib.org/repo/gmp/rev/4a6d258b467f
+
+* Mon Apr 14 2014 Frantisek Kluknavsky <fkluknav@redhat.com> - 1:6.0.0-2
+- rebase
+
+* Wed Nov 06 2013 Frantisek Kluknavsky <fkluknav@redhat.com> - 1:5.1.3-2
+- support for aarch64
+
+* Wed Nov 06 2013 Frantisek Kluknavsky <fkluknav@redhat.com> - 1:5.1.3-1
+- rebase to 5.1.3
+
+* Sat Aug 03 2013 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1:5.1.2-2
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_20_Mass_Rebuild
+
+* Thu May 30 2013 Frantisek Kluknavsky <fkluknav@redhat.com> - 1:5.1.2-1
+- rebase to 5.1.2
+
+* Thu Mar 28 2013 Frantisek Kluknavsky <fkluknav@redhat.com> - 1:5.1.1-3
+- added build dependency needed to autoreconf on arm
+
+* Thu Feb 14 2013 Frantisek Kluknavsky <fkluknav@redhat.com> - 1:5.1.1-2
+- rebase to 5.1.1
+- deleted unapplicable part of gmp-4.0.1-s390.patch
+
+* Fri Jan 25 2013 Frantisek Kluknavsky <fkluknav@redhat.com> - 1:5.1.0-1
+- rebase to 5.1.0, de-ansi patch no longer applicable
+- upstream dropped libmp.so (bsdmp-like interface)
+- silenced bogus date in changelog
+
+* Tue Jan 22 2013 Peter Robinson <pbrobinson@fedoraproject.org> 1:5.0.5-6
+- Rebuild against new binutils to fix FTBFS on ARM
+
+* Fri Nov 23 2012 Frantisek Kluknavsky <fkluknav@redhat.com> - 1:5.0.5-5
+- minor spec cleanup
+
+* Fri Jul 20 2012 Peter Schiffer <pschiffe@redhat.com> 1:5.0.5-3
+- fixed FTBFS
+
+* Thu Jul 19 2012 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1:5.0.5-2
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild
+
+* Mon Jun 25 2012 Peter Schiffer <pschiffe@redhat.com> 1:5.0.5-1
+- resolves: #820897
+  update to 5.0.5
+
+* Thu Apr 19 2012 Peter Schiffer <pschiffe@redhat.com> 1:5.0.4-1
+- resolves: #785116
+  update to 5.0.4
+
+* Tue Feb 28 2012 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1:5.0.2-6
+- Rebuilt for c++ ABI breakage
+
+* Thu Jan 19 2012 Peter Schiffer <pschiffe@redhat.com> 1:5.0.2-5
+- fixed FTBFS with gcc 4.7 on 32bit arch
+
+* Fri Jan 13 2012 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1:5.0.2-4
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_17_Mass_Rebuild
+
+* Fri Oct 14 2011 Peter Schiffer <pschiffe@redhat.com> 1:5.0.2-3
+- removed old compatibility library
+
+* Mon Sep 26 2011 Peter Schiffer <pschiffe@redhat.com> 1:5.0.2-2
+- temporary build wild old compatibility library version
+
+* Tue Sep 20 2011 Peter Schiffer <pschiffe@redhat.com> 1:5.0.2-1
+- resolves: #702919
+  update to 5.0.2
+- resolves: #738091
+  removed unused direct shlib dependency on libm
+  updated license in gmp.h and gmp-mparam.h files
+
+* Mon Jun 13 2011 Ivana Hutarova Varekova <varekova@redhat.com> 1:4.3.2-4
+- Resolves: #706374
+  fix sse2/libgmp.so.3.5.2 debuginfo data
+
+* Tue Feb 08 2011 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1:4.3.2-3
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild
+
+* Wed Nov 24 2010 Ivana Hutarova Varekova <varekova@redhat.com> 1:4.3.2-2
+- fix Requires tag
+
+* Wed Nov 24 2010 Ivana Hutarova Varekova <varekova@redhat.com> 1:4.3.2-1
+- downgrade from 5.0.1 to 4.3.2
+
+* Mon May 24 2010 Ivana Hutarova Varekova <varekova@redhat.com> 5.0.1-1
+- update to 5.0.1
+
+* Tue Mar  2 2010 Ivana Hutarova Varekova <varekova@redhat.com> 4.3.1-7
+- fix the license tag
+
+* Fri Nov 27 2009 Ivana Hutarova Varekova <varekova@redhat.com> 4.3.1-6
+- remove unnecessary dependences
+  remove duplicated documentation
+
+* Mon Aug 10 2009 Ivana Varekova <varekova@redhat.com> 4.3.1-5
+- fix installation with --excludedocs option (#515947)
+
+* Fri Jul 24 2009 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 4.3.1-4
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_12_Mass_Rebuild
+
+* Wed Jun 17 2009 Ivana Varekova <varekova@redhat.com> 4.3.1-3
+- rebuild
+
+* Mon Jun 15 2009 Ivana Varekova <varekova@redhat.com> 4.3.1-2
+- Resolves: #505592
+  add RPM_OPT_FLAGS
+
+* Thu May 28 2009 Ivana Varekova <varekova@redhat.com> 4.3.1-1
+- update to 4.3.1
+- remove configure macro (built problem)
+
+* Thu Apr 09 2009 Dennis Gilmore <dennis@ausil.us> - 4.2.4-6
+- no check that --host and --target are the same when building i586  or sparcv9 they are not
+
+* Tue Feb 24 2009 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 4.2.4-5
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_11_Mass_Rebuild
+
+* Tue Dec 23 2008 Ivana Varekova <varekova@redhat.com> 4.2.4-4
+- fix spec file
+
+* Mon Dec  8 2008 Ivana Varekova <varekova@redhat.com> 4.2.4-3
+- remove useless option (#475073)
+
+* Wed Dec  3 2008 Stepan Kasal <skasal@redhat.com> 4.2.4-2
+- Run full autoreconf, add automake to BuildRequires.
+
+* Mon Nov 10 2008 Ivana Varekova <varekova@redhat.com> 4.2.4-1
+- update to 4.2.4
+
+* Fri Nov  7 2008 Ivana Varekova <varekova@redhat.com> 4.2.2-9
+- remove useless patch (#470200)
+
+* Thu Apr 24 2008 Tom "spot" Callaway <tcallawa@redhat.com> 4.2.2-8
+- add sparc/sparc64 support
+
+* Wed Mar 19 2008 Ivana Varekova <varekova@redhat.com> 4.2.2-7
+- add superH support (#437688)
+
+* Wed Feb 13 2008 Ivana varekova <varekova@redhat.com> 4.2.2-6
+- fix gcc-4.3 problem - add <cstdio> (#432336)
+
+* Fri Feb  8 2008 Ivana Varekova <varekova@redhat.com> 4.2.2-5
+- split the devel subpackage to devel and static parts
+
+* Thu Feb  7 2008 Ivana Varekova <varekova@redhat.com> 4.2.2-4
+- change license tag
+
+* Mon Sep 24 2007 Ivana Varekova <varekova@redhat.com> 4.2.2-3
+- fix libgmpxx.so link
+
+* Thu Sep 20 2007 Ivana Varekova <varekova@redhat.com> 4.2.2-2
+- fix check tag
+
+* Wed Sep 19 2007 Ivana Varekova <varekova@redhat.com> 4.2.2-1
+- update to 4.2.2
+
+* Mon Aug 20 2007 Ivana Varekova <varekova@redhat.com> 4.2.1-3
+- spec file cleanup (#253439)
+
+* Tue Aug  7 2007 Ivana Varekova <varekova@redhat.com> 4.2.1-2
+- add arm support (#245456)
+  thanks to Lennert Buytenhek
+
+* Mon Aug  6 2007 Ivana Varekova <varekova@redhat.com> 4.2.1-1
+- update to 4.2.1
+- do some spec cleanups
+- fix 238794 - gmp-devel depends on {version} but not on 
+  {version}-{release}
+- remove mpfr (moved to separate package)
+
+* Thu Jul 05 2007 Florian La Roche <laroche@redhat.com> 4.1.4-13
+- don't fail scripts to e.g. allow excludedocs installs
+
+* Tue Apr 24 2007 Karsten Hopp <karsten@redhat.com> 4.1.4-12.3
+- fix library permissions
+
+* Wed Mar 14 2007 Karsten Hopp <karsten@redhat.com> 4.1.4-12.2
+- fix typo
+
+* Wed Mar 14 2007 Thomas Woerner <twoerner@redhat.com> 4.1.4-12.1
+- added alpha support for gmp.h and gmp-mparam.h wrappers
+
+* Fri Feb 23 2007 Karsten Hopp <karsten@redhat.com> 4.1.4-12
+- remove trailing dot from summary
+- fix buildroot
+- fix post/postun/... requirements
+- use make install DESTDIR=...
+- replace tabs with spaces
+- convert changelog to utf-8
+
+* Wed Jan 17 2007 Jakub Jelinek <jakub@redhat.com> 4.1.4-11
+- make sure libmpfr.a doesn't contain SSE2 instructions on i?86 (#222371)
+- rebase to mpfr 2.2.1 from 2.2.0 + cumulative fixes
+
+* Thu Nov  2 2006 Thomas Woerner <twoerner@redhat.com> 4.1.4-10
+- fixed arch order in gmp.h and gmp-mparam.h wrapper for all architectures
+
+* Thu Nov  2 2006 Joe Orton <jorton@redhat.com> 4.1.4-10
+- include ppc64 header on ppc64 not ppc header
+
+* Fri Oct 27 2006 Thomas Woerner <twoerner@redhat.com> - 4.1.4-9
+- fixed multilib devel conflicts for gmp (#212286)
+
+* Thu Oct 26 2006 Jakub Jelinek <jakub@redhat.com> - 4.1.4-8
+- upgrade mpfr to 2.2.0 (#211971)
+- apply mpfr 2.2.0 cumulative patch
+
+* Fri Jul 14 2006 Thomas Woerner <twoerner@redhat.com> - 4.1.4-7
+- release bump
+
+* Fri Feb 10 2006 Jesse Keating <jkeating@redhat.com> - 4.1.4-6.2.1
+- bump again for double-long bug on ppc(64)
+
+* Tue Feb 07 2006 Jesse Keating <jkeating@redhat.com> - 4.1.4-6.2
+- rebuilt for new gcc4.1 snapshot and glibc changes
+
+* Fri Dec 09 2005 Jesse Keating <jkeating@redhat.com>
+- rebuilt
+
+* Mon Apr 18 2005 Thomas Woerner <twoerner@redhat.com> 4.1.4-6
+- fixed __setfpucw call in mpfr-test.h
+
+* Wed Mar 02 2005 Karsten Hopp <karsten@redhat.de> 4.1.4-5
+- build with gcc-4
+
+* Wed Feb 09 2005 Karsten Hopp <karsten@redhat.de> 4.1.4-4
+- rebuilt
+
+* Sun Sep 26 2004 Florian La Roche <Florian.LaRoche@redhat.de>
+- 4.1.4
+- disable ppc64 patch, now fixed upstream
+
+* Tue Jun 15 2004 Elliot Lee <sopwith@redhat.com>
+- rebuilt
+
+* Mon May 24 2004 Thomas Woerner <twoerner@redhat.com> 4.1.3-1
+- new version 4.1.3
+
+* Wed Mar 31 2004 Thomas Woerner <twoerner@redhat.com> 4.1.2-14
+- dropped RPATH (#118506)
+
+* Sat Mar 06 2004 Florian La Roche <Florian.LaRoche@redhat.de>
+- also build SSE2 DSOs, patch from Ulrich Drepper
+
+* Tue Mar 02 2004 Elliot Lee <sopwith@redhat.com>
+- rebuilt
+
+* Fri Feb 13 2004 Elliot Lee <sopwith@redhat.com>
+- rebuilt
+
+* Thu Jan 29 2004 Thomas Woerner <twoerner@redhat.com> 4.1.2-11
+- BuildRequires for automake16
+
+* Mon Dec 01 2003 Florian La Roche <Florian.LaRoche@redhat.de>
+- fix symlink to libgmpxx.so.3  #111135
+- add patch to factorize.c from gmp homepage
+
+* Thu Oct 23 2003 Joe Orton <jorton@redhat.com> 4.1.2-9
+- build with -Wa,--noexecstack
+
+* Thu Oct 23 2003 Joe Orton <jorton@redhat.com> 4.1.2-8
+- build assembly code with -Wa,--execstack
+- use parallel make
+- run tests, and fix C++ therein
+
+* Thu Oct 02 2003 Florian La Roche <Florian.LaRoche@redhat.de>
+- enable mpfr  #104395
+- enable cxx  #80195
+- add COPYING.LIB
+- add fixes from gmp web-site
+- remove some cruft patches for older libtool releases
+
+* Wed Jun 04 2003 Elliot Lee <sopwith@redhat.com>
+- rebuilt
+
+* Tue Jun 03 2003 Florian La Roche <Florian.LaRoche@redhat.de>
+- make configure.in work with newer autoconf
+
+* Sun Jun 01 2003 Florian La Roche <Florian.LaRoche@redhat.de>
+- do not set extra_functions for s390x  #92001
+
+* Thu Feb 13 2003 Elliot Lee <sopwith@redhat.com> 4.1.2-3
+- Add ppc64 patch, accompanied by running auto*
+
+* Wed Jan 22 2003 Tim Powers <timp@redhat.com>
+- rebuilt
+
+* Wed Jan 01 2003 Florian La Roche <Florian.LaRoche@redhat.de>
+- update to 4.1.2
+
+* Tue Dec 03 2002 Florian La Roche <Florian.LaRoche@redhat.de>
+- update to 4.1.1
+- remove un-necessary patches
+- adjust s390/x86_64 patch
+
+* Sun Oct 06 2002 Florian La Roche <Florian.LaRoche@redhat.de>
+- add s390x patch
+- disable current x86-64 support in longlong.h
+
+* Mon Jul  8 2002 Trond Eivind Glomsrød <teg@redhat.com> 4.1-4
+- Add 4 patches, among them one for #67918
+- Update URL
+- s/Copyright/License/
+
+* Mon Jul  8 2002 Trond Eivind Glomsrød <teg@redhat.com> 4.1-3
+- Redefine the configure macro, the included configure 
+  script isn't happy about the rpm default one (#68190). Also, make
+  sure the included libtool isn't replaced,
+
+* Fri Jun 21 2002 Tim Powers <timp@redhat.com>
+- automated rebuild
+
+* Sat May 25 2002 Florian La Roche <Florian.LaRoche@redhat.de>
+- update to version 4.1
+- patch s390 gmp-mparam.h to match other archs.
+
+* Thu May 23 2002 Tim Powers <timp@redhat.com>
+- automated rebuild
+
+* Mon Mar 11 2002 Trond Eivind Glomsrød <teg@redhat.com> 4.0.1-3
+- Use standard %%configure macro and edit %%{_tmppath}
+
+* Tue Feb 26 2002 Trond Eivind Glomsrød <teg@redhat.com> 4.0.1-2
+- Rebuild
+
+* Tue Jan 22 2002 Florian La Roche <Florian.LaRoche@redhat.de>
+- update to 4.0.1
+- bzip2 src
+
+* Wed Jan 09 2002 Tim Powers <timp@redhat.com>
+- automated rebuild
+
+* Sun Jun 24 2001 Elliot Lee <sopwith@redhat.com>
+- Bump release + rebuild.
+
+* Mon Feb 05 2001 Philipp Knirsch <pknirsch@redhat.de>
+- Fixed bugzilla bug #25515 where GMP wouldn't work on IA64 as IA64 is not
+correctly identified as a 64 bit platform.
+
+* Mon Dec 18 2000 Preston Brown <pbrown@redhat.com>
+- include bsd mp library
+
+* Tue Oct 17 2000 Florian La Roche <Florian.LaRoche@redhat.de>
+- update to 3.1.1
+
+* Sun Sep  3 2000 Florian La Roche <Florian.LaRoche@redhat.com>
+- update to 3.1
+
+* Sat Aug 19 2000 Preston Brown <pbrown@redhat.com>
+- devel subpackage depends on main package so that .so symlink is OK.
+
+* Thu Jul 13 2000 Prospector <bugzilla@redhat.com>
+- automatic rebuild
+
+* Sat Jun  3 2000 Nalin Dahyabhai <nalin@redhat.com>
+- switch to the configure and makeinstall macros
+- FHS-compliance fixing
+- move docs to non-devel package
+
+* Fri Apr 28 2000 Bill Nottingham <notting@redhat.com>
+- libtoolize for ia64
+
+* Fri Apr 28 2000 Florian La Roche <Florian.LaRoche@redhat.com>
+- update to 3.0.1
+
+* Thu Apr 27 2000 Jakub Jelinek <jakub@redhat.com>
+- sparc64 fixes for 3.0
+
+* Wed Apr 26 2000 Florian La Roche <Florian.LaRoche@redhat.com>
+- update to 3.0
+
+* Mon Feb 14 2000 Matt Wilson <msw@redhat.com>
+- #include <string.h> in files that use string functions
+
+* Wed Feb 02 2000 Cristian Gafton <gafton@redhat.com>
+- fix description and summary
+
+* Mon Dec 06 1999 Michael K. Johnson <johnsonm@redhat.com>
+- s/GPL/LGPL/
+- build as non-root (#7604)
+
+* Mon Sep 06 1999 Jakub Jelinek <jj@ultra.linux.cz>
+- merge in some debian gmp fixes
+- Ulrich Drepper's __gmp_scale2 fix
+- my mpf_set_q fix
+- sparc64 fixes
+
+* Wed Apr 28 1999 Cristian Gafton <gafton@redhat.com>
+- add sparc patch for PIC handling
+
+* Sun Mar 21 1999 Cristian Gafton <gafton@redhat.com> 
+- auto rebuild in the new build environment (release 8)
+
+* Thu Feb 11 1999 Michael Johnson <johnsonm@redhat.com>
+- include the private header file gmp-mparam.h because several
+  apps seem to assume that they are building against the gmp
+  source tree and require it.  Sigh.
+
+* Tue Jan 12 1999 Michael K. Johnson <johnsonm@redhat.com>
+- libtoolize to work on arm
+
+* Thu Sep 10 1998 Cristian Gafton <gafton@redhat.com>
+- yet another touch of the spec file
+
+* Wed Sep  2 1998 Michael Fulbright <msf@redhat.com>
+- looked over before inclusion in RH 5.2
+
+* Sun May 24 1998 Dick Porter <dick@cymru.net>
+- Patch Makefile.in, not Makefile
+- Don't specify i586, let configure decide the arch
+
+* Sat Jan 24 1998 Marc Ewing <marc@redhat.com>
+- started with package from Toshio Kuratomi <toshiok@cats.ucsc.edu>
+- cleaned up file list
+- fixed up install-info support
+