From 6233ce1ee9e0a39d223ff7e551a104e85e61acc2 Mon Sep 17 00:00:00 2001 From: MSVSphere Packaging Team Date: Wed, 6 Nov 2024 03:06:43 +0300 Subject: [PATCH] import gcc-toolset-13-gcc-13.3.1-2.1.el8_10 --- .gcc-toolset-13-gcc.metadata | 4 +- .gitignore | 4 +- SOURCES/gcc13-libstdc++-compat.patch | 99 +- SOURCES/gcc13-libstdc++-docs.patch | 4 +- SOURCES/gcc13-pr107071.patch | 28 + SOURCES/gcc13-testsuite-no-ssp.patch | 46 - SOURCES/gcc13-testsuite-plugin.patch | 94 -- SOURCES/gcc13-vector-merge-1.patch | 522 ++++++ SOURCES/gcc13-vector-merge-2.patch | 240 +++ SOURCES/gcc13-vector-merge-3.patch | 306 ++++ SOURCES/gcc13-znver5.patch | 2246 ++++++++++++++++++++++++++ SPECS/gcc.spec | 195 ++- 12 files changed, 3577 insertions(+), 211 deletions(-) create mode 100644 SOURCES/gcc13-pr107071.patch delete mode 100644 SOURCES/gcc13-testsuite-no-ssp.patch delete mode 100644 SOURCES/gcc13-testsuite-plugin.patch create mode 100644 SOURCES/gcc13-vector-merge-1.patch create mode 100644 SOURCES/gcc13-vector-merge-2.patch create mode 100644 SOURCES/gcc13-vector-merge-3.patch create mode 100644 SOURCES/gcc13-znver5.patch diff --git a/.gcc-toolset-13-gcc.metadata b/.gcc-toolset-13-gcc.metadata index f61addb..876f677 100644 --- a/.gcc-toolset-13-gcc.metadata +++ b/.gcc-toolset-13-gcc.metadata @@ -1,4 +1,4 @@ -c6598a786781f7b8a3131f96995641f45e7b96a5 SOURCES/gcc-13.1.1-20230614.tar.xz +e1fb4ded21712a1724ce05b86ff738ee6d21256c SOURCES/gcc-13.3.1-20240611.tar.xz ae5fbb33bcb442121fbbf482a93f6b3c84d489ee SOURCES/isl-0.24.tar.bz2 003af8bc05476507f4dd02340b727b72b404e275 SOURCES/newlib-cygwin-9e09d6ed83cce4777a5950412647ccc603040409.tar.xz -efdf76a82380d621f0318f3ddf582e033ffd0f75 SOURCES/nvptx-tools-93e00909ceb9cbbc104f0fcba56c0361ffb3ca4b.tar.xz +6bfe7ac1b208935ee2c0939bb47cfe775c1c81b3 SOURCES/nvptx-tools-aa3404ad5a496cda5d79a50bedb1344fd63e8763.tar.xz diff --git a/.gitignore b/.gitignore index 0d6bc47..f568836 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -SOURCES/gcc-13.1.1-20230614.tar.xz +SOURCES/gcc-13.3.1-20240611.tar.xz SOURCES/isl-0.24.tar.bz2 SOURCES/newlib-cygwin-9e09d6ed83cce4777a5950412647ccc603040409.tar.xz -SOURCES/nvptx-tools-93e00909ceb9cbbc104f0fcba56c0361ffb3ca4b.tar.xz +SOURCES/nvptx-tools-aa3404ad5a496cda5d79a50bedb1344fd63e8763.tar.xz diff --git a/SOURCES/gcc13-libstdc++-compat.patch b/SOURCES/gcc13-libstdc++-compat.patch index 9d1d844..67cb374 100644 --- a/SOURCES/gcc13-libstdc++-compat.patch +++ b/SOURCES/gcc13-libstdc++-compat.patch @@ -4916,7 +4916,7 @@ +asm (".hidden _ZN9__gnu_cxxeqIPKwNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEEEEbRKNS_17__normal_iteratorIT_T0_EESE_"); +asm (".hidden _ZN9__gnu_cxxeqIPwNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEEEEbRKNS_17__normal_iteratorIT_T0_EESD_"); +asm (".hidden _ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE17_M_use_local_dataEv"); -+asm (".hidden _ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE11_S_allocateERS3_m"); ++//asm (".hidden _ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE11_S_allocateERS3_m"); --- libstdc++-v3/src/nonshared11/istream-inst.cc.jj 2023-05-19 12:22:56.739882146 +0200 +++ libstdc++-v3/src/nonshared11/istream-inst.cc 2023-05-19 13:36:14.104840565 +0200 @@ -0,0 +1,132 @@ @@ -7802,7 +7802,7 @@ +asm (".hidden _ZN9__gnu_cxxeqIPKcNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRKNS_17__normal_iteratorIT_T0_EESE_"); +asm (".hidden _ZN9__gnu_cxxeqIPcNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRKNS_17__normal_iteratorIT_T0_EESD_"); +asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE17_M_use_local_dataEv"); -+asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_S_allocateERS3_m"); ++//asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_S_allocateERS3_m"); --- libstdc++-v3/src/nonshared11/cow-stdexcept80.cc.jj 2023-05-19 12:22:56.747882033 +0200 +++ libstdc++-v3/src/nonshared11/cow-stdexcept80.cc 2023-05-19 13:36:14.097840663 +0200 @@ -0,0 +1,76 @@ @@ -8804,7 +8804,7 @@ + $(OPT_LDFLAGS) $(SECTION_LDFLAGS) $(AM_CXXFLAGS) $(LTLDFLAGS) -o $@ --- libstdc++-v3/src/nonshared20/tzdb80.cc.jj 2023-05-19 20:25:50.447297532 +0200 +++ libstdc++-v3/src/nonshared20/tzdb80.cc 2023-05-19 19:51:19.200552634 +0200 -@@ -0,0 +1,120 @@ +@@ -0,0 +1,128 @@ +// Copyright The GNU Toolchain Authors. +// +// This file is part of the GNU ISO C++ Library. This library is free @@ -8891,12 +8891,10 @@ +asm (".hidden _ZNSt6chrono9tzdb_list5_Node11_S_the_listE"); +asm (".hidden _ZTSSt23_Sp_counted_ptr_inplaceINSt6chrono9tzdb_list5_NodeESaIvELN9__gnu_cxx12_Lock_policyE2EE"); +asm (".hidden _ZTISt23_Sp_counted_ptr_inplaceINSt6chrono9tzdb_list5_NodeESaIvELN9__gnu_cxx12_Lock_policyE2EE"); ++#if 0 +asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev"); +asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Ev"); -+asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2EOS4_"); -+asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1EOS4_"); -+asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_disposeEv"); -+asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4_"); ++#endif +asm (".hidden _ZTSSt19_Sp_make_shared_tag"); +asm (".hidden _ZNSt16_Sp_counted_baseILN9__gnu_cxx12_Lock_policyE2EE10_M_releaseEv"); +asm (".hidden _ZNSt10unique_ptrINSt10filesystem7__cxx114path5_List5_ImplENS3_13_Impl_deleterEED2Ev"); @@ -8907,11 +8905,21 @@ +asm (".hidden _ZTISt16_Sp_counted_baseILN9__gnu_cxx12_Lock_policyE2EE"); +asm (".hidden _ZZNSt19_Sp_make_shared_tag5_S_tiEvE5__tag"); +asm (".hidden _ZZNSt8__detail18__waiter_pool_base6_S_forEPKvE3__w"); -+#if defined(__x86_64__) || defined(__aarch64__) || defined(__i386__) -+asm (".hidden _ZNKSt10filesystem7__cxx114path8iteratordeEv"); ++asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_disposeEv"); ++#if defined(__aarch64__) || defined(__x86_64__) ++//asm (".hidden _ZSt25__unguarded_linear_insertIN9__gnu_cxx17__normal_iteratorIPNSt6chrono9time_zoneESt6vectorIS3_SaIS3_EEEENS0_5__ops14_Val_comp_iterIZNSt6ranges8__detail16__make_comp_projINSB_4lessEMS3_KDoFSt17basic_string_viewIcSt11char_traitsIcEEvEEEDaRT_RT0_EUlOSL_OSN_E_EEEvSL_SN_"); +#endif ++asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4_"); ++asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1EOS4_"); ++asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2EOS4_"); +asm (".hidden _ZSt23__atomic_wait_address_vIiZNKSt13__atomic_baseIiE4waitEiSt12memory_orderEUlvE_EvPKT_S4_T0_"); ++#if defined(__i386__) || defined(__x86_64__) ++asm (".hidden _ZNKSt10filesystem7__cxx114path8iteratordeEv"); ++#endif +#if defined(__i386__) ++asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_mutateEjjPKcj"); ++asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEj"); ++asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEjjPKcj"); +asm (".hidden _ZSt16__introsort_loopIN9__gnu_cxx17__normal_iteratorIPNSt6chrono9time_zoneESt6vectorIS3_SaIS3_EEEEiNS0_5__ops15_Iter_comp_iterIZNSt6ranges8__detail16__make_comp_projINSB_4lessESt8identityEEDaRT_RT0_EUlOSG_OSI_E_EEEvSG_SG_SI_T1_"); +asm (".hidden _ZSt16__introsort_loopIN9__gnu_cxx17__normal_iteratorIPNSt6chrono9time_zoneESt6vectorIS3_SaIS3_EEEEiNS0_5__ops15_Iter_comp_iterIZNSt6ranges8__detail16__make_comp_projINSB_4lessEMS3_KDoFSt17basic_string_viewIcSt11char_traitsIcEEvEEEDaRT_RT0_EUlOSL_OSN_E_EEEvSL_SL_SN_T1_"); +asm (".hidden _ZSt16__introsort_loopIN9__gnu_cxx17__normal_iteratorIPNSt6chrono14time_zone_linkESt6vectorIS3_SaIS3_EEEEiNS0_5__ops15_Iter_comp_iterIZNSt6ranges8__detail16__make_comp_projINSB_4lessESt8identityEEDaRT_RT0_EUlOSG_OSI_E_EEEvSG_SG_SI_T1_"); @@ -9891,7 +9899,7 @@ +asm (".hidden _ZNSt8__detail31__from_chars_alnum_to_val_tableILb0EE5valueE"); --- libstdc++-v3/src/nonshared17/floating_from_chars110.cc.jj 2023-05-19 18:08:44.662186963 +0200 +++ libstdc++-v3/src/nonshared17/floating_from_chars110.cc 2023-05-19 19:46:37.242530879 +0200 -@@ -0,0 +1,34 @@ +@@ -0,0 +1,53 @@ +// Copyright (C) 2019-2023 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free @@ -9916,16 +9924,35 @@ + +#define _GLIBCXX_NONSHARED_CXX11_110 +#include "../c++17/floating_from_chars.cc" -+asm (".hidden _ZSt10from_charsIiENSt9enable_ifIXsrSt5__or_IJS1_IJSt7is_sameINSt9remove_cvIT_E4typeEaES2_IS6_sES2_IS6_iES2_IS6_lES2_IS6_xES2_IS6_nEEES1_IJS2_IS6_hES2_IS6_tES2_IS6_jES2_IS6_mES2_IS6_yES2_IS6_oEEES2_IcS6_EEE5valueESt17from_chars_resultE4typeEPKcSR_RS4_i"); -+asm (".hidden _ZSt10from_charsIiENSt9enable_ifIXsrSt5__or_IIS1_IISt7is_sameINSt9remove_cvIT_E4typeEaES2_IS6_sES2_IS6_iES2_IS6_lES2_IS6_xES2_IS6_nEEES1_IIS2_IS6_hES2_IS6_tES2_IS6_jES2_IS6_mES2_IS6_yES2_IS6_oEEES2_IcS6_EEE5valueESt17from_chars_resultE4typeEPKcSR_RS4_i"); +#ifndef __s390x__ -+asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE9_M_createERmm"); +#ifndef __i386__ ++#if !defined(__aarch64__) ++asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE9_M_mutateEmmPKcm"); ++asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE9_M_createERmm"); +asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE15_M_replace_coldEPcmPKcmm"); +#endif ++asm (".hidden _ZSt10from_charsIiENSt9enable_ifIXsrSt5__or_IJS1_IJSt7is_sameINSt9remove_cvIT_E4typeEaES2_IS6_sES2_IS6_iES2_IS6_lES2_IS6_xES2_IS6_nEEES1_IJS2_IS6_hES2_IS6_tES2_IS6_jES2_IS6_mES2_IS6_yES2_IS6_oEEES2_IcS6_EEE5valueESt17from_chars_resultE4typeEPKcSR_RS4_i"); ++asm (".hidden _ZSt10from_charsIiENSt9enable_ifIXsrSt5__or_IIS1_IISt7is_sameINSt9remove_cvIT_E4typeEaES2_IS6_sES2_IS6_iES2_IS6_lES2_IS6_xES2_IS6_nEEES1_IIS2_IS6_hES2_IS6_tES2_IS6_jES2_IS6_mES2_IS6_yES2_IS6_oEEES2_IcS6_EEE5valueESt17from_chars_resultE4typeEPKcSR_RS4_i"); ++#endif ++#endif ++#if defined (__s390x__) ++asm (".hidden _ZSt10from_charsIiENSt9enable_ifIXsrSt5__or_IIS1_IISt7is_sameINSt9remove_cvIT_E4typeEaES2_IS6_sES2_IS6_iES2_IS6_lES2_IS6_xES2_IS6_nEEES1_IIS2_IS6_hES2_IS6_tES2_IS6_jES2_IS6_mES2_IS6_yES2_IS6_oEEES2_IcS6_EEE5valueESt17from_chars_resultE4typeEPKcSR_RS4_i"); ++asm (".hidden _ZSt10from_charsIiENSt9enable_ifIXsrSt5__or_IJS1_IJSt7is_sameINSt9remove_cvIT_E4typeEaES2_IS6_sES2_IS6_iES2_IS6_lES2_IS6_xES2_IS6_nEEES1_IJS2_IS6_hES2_IS6_tES2_IS6_jES2_IS6_mES2_IS6_yES2_IS6_oEEES2_IcS6_EEE5valueESt17from_chars_resultE4typeEPKcSR_RS4_i"); ++#endif ++#if defined (__powerpc64__) ++asm (".hidden _ZSt10from_charsPKcS0_Ru9__ieee128St12chars_format"); ++//asm (".hidden _ZSt8to_charsPcS_u9__ieee128"); ++//asm (".hidden _ZSt8to_charsPcS_u9__ieee128St12chars_format"); ++//asm (".hidden _ZSt8to_charsPcS_u9__ieee128St12chars_formati"); +#endif -+asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE9_M_mutateEmmPKcm"); +asm (".hidden _ZNSt8__detail31__from_chars_alnum_to_val_tableILb0EE5valueE"); ++#ifdef __i386__ ++asm (".hidden _ZSt10from_charsIiENSt9enable_ifIXsrSt5__or_IJS1_IJSt7is_sameINSt9remove_cvIT_E4typeEaES2_IS6_sES2_IS6_iES2_IS6_lES2_IS6_xEEES1_IJS2_IS6_hES2_IS6_tES2_IS6_jES2_IS6_mES2_IS6_yEEES2_IcS6_EEE5valueESt17from_chars_resultE4typeEPKcSP_RS4_i"); ++asm (".hidden _ZSt10from_charsIiENSt9enable_ifIXsrSt5__or_IIS1_IISt7is_sameINSt9remove_cvIT_E4typeEaES2_IS6_sES2_IS6_iES2_IS6_lES2_IS6_xEEES1_IIS2_IS6_hES2_IS6_tES2_IS6_jES2_IS6_mES2_IS6_yEEES2_IcS6_EEE5valueESt17from_chars_resultE4typeEPKcSP_RS4_i"); ++asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE15_M_replace_coldEPcjPKcjj"); ++asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE9_M_createERjj"); ++asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE9_M_mutateEjjPKcj"); ++#endif --- libstdc++-v3/src/nonshared17/cow-fs_ops.cc.jj 2023-05-19 12:22:56.799881298 +0200 +++ libstdc++-v3/src/nonshared17/cow-fs_ops.cc 2023-05-19 19:21:04.451145483 +0200 @@ -0,0 +1,83 @@ @@ -10142,7 +10169,7 @@ +asm (".hidden _ZNSbIwSt11char_traitsIwESaIwEE12_M_leak_hardEv"); +asm (".hidden _ZNSs12_M_leak_hardEv"); +asm (".hidden _ZNSs4swapERSs"); -+asm (".hidden _ZNSs6appendERKSs"); ++//asm (".hidden _ZNSs6appendERKSs"); +asm (".hidden _ZNSt10filesystem4path5_List5beginEv"); +asm (".hidden _ZNSt10filesystem4path7_Parser4nextEv"); +asm (".hidden _ZNSt10filesystem4pathD1Ev"); @@ -10450,7 +10477,7 @@ +//asm (".hidden _ZNSt16_Sp_counted_baseILN9__gnu_cxx12_Lock_policyE2EE24_M_release_last_use_coldEv"); +asm (".hidden _ZNSt12_Destroy_auxILb0EE9__destroyIPNSt10filesystem7__cxx114path5_CmptEEEvT_S7_"); +asm (".hidden _ZNKSt10filesystem7__cxx114path5_List5_Impl4copyEv"); -+asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm"); ++//asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm"); +asm (".hidden _ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE6resizeEmw"); +asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE6resizeEmc"); +#endif @@ -10471,7 +10498,7 @@ +//asm (".hidden _ZNSt10filesystem7__cxx114path7_Parser4nextEv"); +asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEjjPKcj"); +asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEj"); -+asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcj"); ++//asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcj"); +asm (".hidden _ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE6resizeEjw"); +#endif +asm (".hidden _ZNSt10filesystem7__cxx114path8_CodecvtIwED0Ev"); @@ -12023,7 +12050,7 @@ +#include "../c++17/string-inst.cc" --- libstdc++-v3/src/nonshared17/floating_to_chars110.cc.jj 2023-05-19 18:08:54.897043256 +0200 +++ libstdc++-v3/src/nonshared17/floating_to_chars110.cc 2023-05-19 19:39:17.219738117 +0200 -@@ -0,0 +1,27 @@ +@@ -0,0 +1,31 @@ +// Copyright (C) 2019-2023 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free @@ -12048,9 +12075,13 @@ + +#define _GLIBCXX_NONSHARED_CXX11_110 +#include "../c++17/floating_to_chars.cc" ++#if !defined(__i386__) +asm (".hidden _ZSt12__to_chars_iIoENSt9enable_ifIXsrSt5__or_IJS1_IJSt7is_sameINSt9remove_cvIT_E4typeEaES2_IS6_sES2_IS6_iES2_IS6_lES2_IS6_xES2_IS6_nEEES1_IJS2_IS6_hES2_IS6_tES2_IS6_jES2_IS6_mES2_IS6_yES2_IS6_oEEES2_IcS6_EEE5valueESt15to_chars_resultE4typeEPcSQ_S4_i"); +asm (".hidden _ZSt12__to_chars_iIoENSt9enable_ifIXsrSt5__or_IIS1_IISt7is_sameINSt9remove_cvIT_E4typeEaES2_IS6_sES2_IS6_iES2_IS6_lES2_IS6_xES2_IS6_nEEES1_IIS2_IS6_hES2_IS6_tES2_IS6_jES2_IS6_mES2_IS6_yES2_IS6_oEEES2_IcS6_EEE5valueESt15to_chars_resultE4typeEPcSQ_S4_i"); ++#endif ++#if !defined(__s390x__) && !defined(__aarch64__) +asm (".hidden _ZNSt8__detail18__to_chars_10_implIjEEvPcjT_"); ++#endif --- libstdc++-v3/src/nonshared17/string-inst110.cc.jj 2023-05-19 20:14:44.341711057 +0200 +++ libstdc++-v3/src/nonshared17/string-inst110.cc 2023-05-19 19:41:57.743474432 +0200 @@ -0,0 +1,37 @@ @@ -12726,8 +12757,8 @@ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF --#line 12190 "configure" -+#line 12192 "configure" +-#line 12192 "configure" ++#line 12194 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -12735,8 +12766,8 @@ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF --#line 12296 "configure" -+#line 12298 "configure" +-#line 12298 "configure" ++#line 12300 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -12744,8 +12775,8 @@ # Fake what AC_TRY_COMPILE does. cat > conftest.$ac_ext << EOF --#line 16020 "configure" -+#line 16022 "configure" +-#line 16036 "configure" ++#line 16038 "configure" int main() { typedef bool atomic_type; @@ -12753,8 +12784,8 @@ rm -f conftest* cat > conftest.$ac_ext << EOF --#line 16055 "configure" -+#line 16057 "configure" +-#line 16071 "configure" ++#line 16073 "configure" int main() { typedef short atomic_type; @@ -12762,8 +12793,8 @@ rm -f conftest* cat > conftest.$ac_ext << EOF --#line 16090 "configure" -+#line 16092 "configure" +-#line 16106 "configure" ++#line 16108 "configure" int main() { // NB: _Atomic_word not necessarily int. @@ -12771,8 +12802,8 @@ rm -f conftest* cat > conftest.$ac_ext << EOF --#line 16126 "configure" -+#line 16128 "configure" +-#line 16142 "configure" ++#line 16144 "configure" int main() { typedef long long atomic_type; @@ -12780,8 +12811,8 @@ # unnecessary for this test. cat > conftest.$ac_ext << EOF --#line 16282 "configure" -+#line 16284 "configure" +-#line 16298 "configure" ++#line 16300 "configure" int main() { _Decimal32 d1; @@ -12789,8 +12820,8 @@ # unnecessary for this test. cat > conftest.$ac_ext << EOF --#line 16324 "configure" -+#line 16326 "configure" +-#line 16340 "configure" ++#line 16342 "configure" template struct same { typedef T2 type; }; diff --git a/SOURCES/gcc13-libstdc++-docs.patch b/SOURCES/gcc13-libstdc++-docs.patch index 6dffae6..b007822 100644 --- a/SOURCES/gcc13-libstdc++-docs.patch +++ b/SOURCES/gcc13-libstdc++-docs.patch @@ -4,7 +4,7 @@ FSF

-+ Release 13.1.1 ++ Release 13.3.1 +

Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation @@ -17,7 +17,7 @@

- The API documentation, rendered into HTML, can be viewed online + The API documentation, rendered into HTML, can be viewed locally -+ for the 13.1.1 release, ++ for the 13.3.1 release, + online for each GCC release and diff --git a/SOURCES/gcc13-pr107071.patch b/SOURCES/gcc13-pr107071.patch new file mode 100644 index 0000000..5d459a0 --- /dev/null +++ b/SOURCES/gcc13-pr107071.patch @@ -0,0 +1,28 @@ +commit c8c587b854c9e85fc9ce58c8192d532205f0ee1f +Author: Tamar Christina +Date: Wed Feb 21 11:42:13 2024 +0000 + + AArch64: skip modes_1.f90 [PR107071] + + This test has never worked on AArch64 since the day it was committed. It has + a number of issues that prevent it from working on AArch64: + + The testfailures seem to be known and triaged, so until that's fixed there's + no point in running this test. + + gcc/testsuite/ChangeLog: + + PR fortran/107071 + * gfortran.dg/ieee/modes_1.f90: skip aarch64, arm. + +diff --git a/gcc/testsuite/gfortran.dg/ieee/modes_1.f90 b/gcc/testsuite/gfortran.dg/ieee/modes_1.f90 +index 205c47f3800..e29d8c678e6 100644 +--- a/gcc/testsuite/gfortran.dg/ieee/modes_1.f90 ++++ b/gcc/testsuite/gfortran.dg/ieee/modes_1.f90 +@@ -1,5 +1,5 @@ + ! { dg-do run } +-! ++! { dg-skip-if "PR libfortran/78314" { aarch64*-*-gnu* arm*-*-gnueabi arm*-*-gnueabihf } } + ! Test IEEE_MODES_TYPE, IEEE_GET_MODES and IEEE_SET_MODES + + diff --git a/SOURCES/gcc13-testsuite-no-ssp.patch b/SOURCES/gcc13-testsuite-no-ssp.patch deleted file mode 100644 index b041a65..0000000 --- a/SOURCES/gcc13-testsuite-no-ssp.patch +++ /dev/null @@ -1,46 +0,0 @@ -From b964d2647b0da15543faed5add5044fd79a85c5c Mon Sep 17 00:00:00 2001 -From: Marek Polacek -Date: Thu, 29 Jun 2023 15:59:29 -0400 -Subject: [PATCH] i386: add -fno-stack-protector to two tests - -These tests fail when the testsuite is executed with -fstack-protector-strong. -To avoid this, this patch adds -fno-stack-protector to dg-options. - -Tested on x86_64-pc-linux-gnu, ok for trunk? - -gcc/testsuite/ChangeLog: - - * gcc.target/i386/pr104610.c: Use -fno-stack-protector. - * gcc.target/i386/pr69482-1.c: Likewise. ---- - gcc/testsuite/gcc.target/i386/pr104610.c | 2 +- - gcc/testsuite/gcc.target/i386/pr69482-1.c | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/gcc/testsuite/gcc.target/i386/pr104610.c b/gcc/testsuite/gcc.target/i386/pr104610.c -index fe39cbe5b8a..5173fc8898c 100644 ---- a/gcc/testsuite/gcc.target/i386/pr104610.c -+++ b/gcc/testsuite/gcc.target/i386/pr104610.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -mavx -mmove-max=256 -mstore-max=256" } */ -+/* { dg-options "-O2 -mavx -mmove-max=256 -mstore-max=256 -fno-stack-protector" } */ - /* { dg-final { scan-assembler-times {(?n)vptest.*ymm} 1 } } */ - /* { dg-final { scan-assembler-times {sete} 1 } } */ - /* { dg-final { scan-assembler-not {(?n)je.*L[0-9]} } } */ -diff --git a/gcc/testsuite/gcc.target/i386/pr69482-1.c b/gcc/testsuite/gcc.target/i386/pr69482-1.c -index f192261b104..99bb6ad5a37 100644 ---- a/gcc/testsuite/gcc.target/i386/pr69482-1.c -+++ b/gcc/testsuite/gcc.target/i386/pr69482-1.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O3" } */ -+/* { dg-options "-O3 -fno-stack-protector" } */ - - static inline void memset_s(void* s, int n) { - volatile unsigned char * p = s; - -base-commit: 070a6bf0bdc6761ad77ac97404c98f00a7007d54 --- -2.41.0 - diff --git a/SOURCES/gcc13-testsuite-plugin.patch b/SOURCES/gcc13-testsuite-plugin.patch deleted file mode 100644 index a1e0e69..0000000 --- a/SOURCES/gcc13-testsuite-plugin.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 2ef902063590ebd7e8b8b8b4d708f6c7dc62a81d Mon Sep 17 00:00:00 2001 -From: Marek Polacek -Date: Thu, 29 Jun 2023 14:57:48 -0400 -Subject: [PATCH] testsuite: Use -fno-report-bug in gcc.dg/plugin/ - -Certain downstream compilers (for example, in Fedora) default to --freport-bug. The extra output breaks the following tests. We can use --fno-report-bug to fix that. Patch verified with: - -$ make check RUNTESTFLAGS='--target_board=unix\{,-freport-bug\} plugin.exp' - -Tested x86_64-pc-linux-gnu, ok for trunk/13? - -gcc/testsuite/ChangeLog: - - * gcc.dg/plugin/crash-test-ice-sarif.c: Use -fno-report-bug. Adjust - scan-sarif-file. - * gcc.dg/plugin/crash-test-ice-stderr.c: Use -fno-report-bug. - * gcc.dg/plugin/crash-test-write-though-null-sarif.c: Use - -fno-report-bug. Adjust scan-sarif-file. - * gcc.dg/plugin/crash-test-write-though-null-stderr.c: Use - -fno-report-bug. ---- - gcc/testsuite/gcc.dg/plugin/crash-test-ice-sarif.c | 3 ++- - gcc/testsuite/gcc.dg/plugin/crash-test-ice-stderr.c | 1 + - .../gcc.dg/plugin/crash-test-write-though-null-sarif.c | 3 ++- - .../gcc.dg/plugin/crash-test-write-though-null-stderr.c | 1 + - 4 files changed, 6 insertions(+), 2 deletions(-) - -diff --git a/gcc/testsuite/gcc.dg/plugin/crash-test-ice-sarif.c b/gcc/testsuite/gcc.dg/plugin/crash-test-ice-sarif.c -index 3b773a9a84c..84a4347a17e 100644 ---- a/gcc/testsuite/gcc.dg/plugin/crash-test-ice-sarif.c -+++ b/gcc/testsuite/gcc.dg/plugin/crash-test-ice-sarif.c -@@ -1,5 +1,6 @@ - /* { dg-do compile } */ - /* { dg-options "-fdiagnostics-format=sarif-file" } */ -+/* { dg-additional-options "-fno-report-bug" } */ - - extern void inject_ice (void); - -@@ -56,7 +57,7 @@ void test_inject_ice (void) - { dg-final { scan-sarif-file "\"contextRegion\": " } } - { dg-final { scan-sarif-file "\"artifactLocation\": " } } - { dg-final { scan-sarif-file "\"region\": " } } -- { dg-final { scan-sarif-file "\"startLine\": 8" } } -+ { dg-final { scan-sarif-file "\"startLine\": 9" } } - { dg-final { scan-sarif-file "\"startColumn\": 3" } } - { dg-final { scan-sarif-file "\"endColumn\": 16" } } - { dg-final { scan-sarif-file "\"message\": " } } -diff --git a/gcc/testsuite/gcc.dg/plugin/crash-test-ice-stderr.c b/gcc/testsuite/gcc.dg/plugin/crash-test-ice-stderr.c -index cee701b135c..0064d3bc447 100644 ---- a/gcc/testsuite/gcc.dg/plugin/crash-test-ice-stderr.c -+++ b/gcc/testsuite/gcc.dg/plugin/crash-test-ice-stderr.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-additional-options "-fno-report-bug" } */ - - extern void inject_ice (void); - -diff --git a/gcc/testsuite/gcc.dg/plugin/crash-test-write-though-null-sarif.c b/gcc/testsuite/gcc.dg/plugin/crash-test-write-though-null-sarif.c -index 57caa20155f..83b38d2ffb5 100644 ---- a/gcc/testsuite/gcc.dg/plugin/crash-test-write-though-null-sarif.c -+++ b/gcc/testsuite/gcc.dg/plugin/crash-test-write-though-null-sarif.c -@@ -1,5 +1,6 @@ - /* { dg-do compile } */ - /* { dg-options "-fdiagnostics-format=sarif-file" } */ -+/* { dg-additional-options "-fno-report-bug" } */ - - extern void inject_write_through_null (void); - -@@ -56,7 +57,7 @@ void test_inject_write_through_null (void) - { dg-final { scan-sarif-file "\"contextRegion\": " } } - { dg-final { scan-sarif-file "\"artifactLocation\": " } } - { dg-final { scan-sarif-file "\"region\": " } } -- { dg-final { scan-sarif-file "\"startLine\": 8" } } -+ { dg-final { scan-sarif-file "\"startLine\": 9" } } - { dg-final { scan-sarif-file "\"startColumn\": 3" } } - { dg-final { scan-sarif-file "\"endColumn\": 31" } } - { dg-final { scan-sarif-file "\"message\": " } } -diff --git a/gcc/testsuite/gcc.dg/plugin/crash-test-write-though-null-stderr.c b/gcc/testsuite/gcc.dg/plugin/crash-test-write-though-null-stderr.c -index 7b43e423633..a9a211a3b1f 100644 ---- a/gcc/testsuite/gcc.dg/plugin/crash-test-write-though-null-stderr.c -+++ b/gcc/testsuite/gcc.dg/plugin/crash-test-write-though-null-stderr.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-additional-options "-fno-report-bug" } */ - - extern void inject_write_through_null (void); - - -base-commit: 070a6bf0bdc6761ad77ac97404c98f00a7007d54 --- -2.41.0 - diff --git a/SOURCES/gcc13-vector-merge-1.patch b/SOURCES/gcc13-vector-merge-1.patch new file mode 100644 index 0000000..039fe24 --- /dev/null +++ b/SOURCES/gcc13-vector-merge-1.patch @@ -0,0 +1,522 @@ +commit 361bfcec901ca882130e338aebaa2ebc6ea2dc3b +Author: Kewen Lin +Date: Thu Jun 20 20:23:56 2024 -0500 + + rs6000: Fix wrong RTL patterns for vector merge high/low word on LE + + Commit r12-4496 changes some define_expands and define_insns + for vector merge high/low word, which are altivec_vmrg[hl]w, + vsx_xxmrg[hl]w_. These defines are mainly for + built-in function vec_merge{h,l}, __builtin_vsx_xxmrghw, + __builtin_vsx_xxmrghw_4si and some internal gen function + needs. These functions should consider endianness, taking + vec_mergeh as example, as PVIPR defines, vec_mergeh "Merges + the first halves (in element order) of two vectors", it does + note it's in element order. So it's mapped into vmrghw on + BE while vmrglw on LE respectively. Although the mapped + insns are different, as the discussion in PR106069, the RTL + pattern should be still the same, it is conformed before + commit r12-4496, define_expand altivec_vmrghw got expanded + into: + + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "register_operand" "wa,v") + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + + on both BE and LE then. But commit r12-4496 changed it to + expand into: + + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "register_operand" "wa,v") + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + + on BE, and + + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "register_operand" "wa,v") + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + + on LE, although the mapped insn are still vmrghw on BE and + vmrglw on LE, the associated RTL pattern is completely + wrong and inconsistent with the mapped insn. If optimization + passes leave this pattern alone, even if its pattern doesn't + represent its mapped insn, it's still fine, that's why simple + testing on bif doesn't expose this issue. But once some + optimization pass such as combine does some changes basing + on this wrong pattern, because the pattern doesn't match the + semantics that the expanded insn is intended to represent, + it would cause the unexpected result. + + So this patch is to fix the wrong RTL pattern, ensure the + associated RTL patterns become the same as before which can + have the same semantic as their mapped insns. With the + proposed patch, the expanders like altivec_vmrghw expands + into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le + depending on endianness, "direct" can easily show which + insn would be generated, _be and _le are mainly for the + different RTL patterns as endianness. + + Co-authored-by: Xionghu Luo + + PR target/106069 + PR target/115355 + + gcc/ChangeLog: + + * config/rs6000/altivec.md (altivec_vmrghw_direct_): Rename + to ... + (altivec_vmrghw_direct__be): ... this. Add the condition + BYTES_BIG_ENDIAN. + (altivec_vmrghw_direct__le): New define_insn. + (altivec_vmrglw_direct_): Rename to ... + (altivec_vmrglw_direct__be): ... this. Add the condition + BYTES_BIG_ENDIAN. + (altivec_vmrglw_direct__le): New define_insn. + (altivec_vmrghw): Adjust by calling gen_altivec_vmrghw_direct_v4si_be + for BE and gen_altivec_vmrglw_direct_v4si_le for LE. + (altivec_vmrglw): Adjust by calling gen_altivec_vmrglw_direct_v4si_be + for BE and gen_altivec_vmrghw_direct_v4si_le for LE. + (vec_widen_umult_hi_v8hi): Adjust the call to + gen_altivec_vmrghw_direct_v4si by gen_altivec_vmrghw for BE + and by gen_altivec_vmrglw for LE. + (vec_widen_smult_hi_v8hi): Likewise. + (vec_widen_umult_lo_v8hi): Adjust the call to + gen_altivec_vmrglw_direct_v4si by gen_altivec_vmrglw for BE + and by gen_altivec_vmrghw for LE + (vec_widen_smult_lo_v8hi): Likewise. + * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace + CODE_FOR_altivec_vmrghw_direct_v4si by + CODE_FOR_altivec_vmrghw_direct_v4si_be for BE and + CODE_FOR_altivec_vmrghw_direct_v4si_le for LE. And replace + CODE_FOR_altivec_vmrglw_direct_v4si by + CODE_FOR_altivec_vmrglw_direct_v4si_be for BE and + CODE_FOR_altivec_vmrglw_direct_v4si_le for LE. + * config/rs6000/vsx.md (vsx_xxmrghw_): Adjust by calling + gen_altivec_vmrghw_direct_v4si_be for BE and + gen_altivec_vmrglw_direct_v4si_le for LE. + (vsx_xxmrglw_): Adjust by calling + gen_altivec_vmrglw_direct_v4si_be for BE and + gen_altivec_vmrghw_direct_v4si_le for LE. + + gcc/testsuite/ChangeLog: + + * g++.target/powerpc/pr106069.C: New test. + * gcc.target/powerpc/pr115355.c: New test. + + (cherry picked from commit 52c112800d9f44457c4832309a48c00945811313) + +diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md +index ad1224e0b57..92e2e4a4090 100644 +--- a/gcc/config/rs6000/altivec.md ++++ b/gcc/config/rs6000/altivec.md +@@ -1212,16 +1212,18 @@ (define_expand "altivec_vmrghw" + (use (match_operand:V4SI 2 "register_operand"))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_v4si +- : gen_altivec_vmrglw_direct_v4si; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrghw_direct_" ++(define_insn "altivec_vmrghw_direct__be" + [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") + (vec_select:VSX_W + (vec_concat: +@@ -1229,7 +1231,21 @@ (define_insn "altivec_vmrghw_direct_" + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "@ ++ xxmrghw %x0,%x1,%x2 ++ vmrghw %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrghw_direct__le" ++ [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") ++ (vec_select:VSX_W ++ (vec_concat: ++ (match_operand:VSX_W 2 "register_operand" "wa,v") ++ (match_operand:VSX_W 1 "register_operand" "wa,v")) ++ (parallel [(const_int 2) (const_int 6) ++ (const_int 3) (const_int 7)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "@ + xxmrghw %x0,%x1,%x2 + vmrghw %0,%1,%2" +@@ -1318,16 +1334,18 @@ (define_expand "altivec_vmrglw" + (use (match_operand:V4SI 2 "register_operand"))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_v4si +- : gen_altivec_vmrghw_direct_v4si; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrglw_direct_" ++(define_insn "altivec_vmrglw_direct__be" + [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") + (vec_select:VSX_W + (vec_concat: +@@ -1335,7 +1353,21 @@ (define_insn "altivec_vmrglw_direct_" + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "@ ++ xxmrglw %x0,%x1,%x2 ++ vmrglw %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrglw_direct__le" ++ [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") ++ (vec_select:VSX_W ++ (vec_concat: ++ (match_operand:VSX_W 2 "register_operand" "wa,v") ++ (match_operand:VSX_W 1 "register_operand" "wa,v")) ++ (parallel [(const_int 0) (const_int 4) ++ (const_int 1) (const_int 5)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "@ + xxmrglw %x0,%x1,%x2 + vmrglw %0,%1,%2" +@@ -3807,13 +3839,13 @@ (define_expand "vec_widen_umult_hi_v8hi" + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + DONE; + }) +@@ -3832,13 +3864,13 @@ (define_expand "vec_widen_umult_lo_v8hi" + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + DONE; + }) +@@ -3857,13 +3889,13 @@ (define_expand "vec_widen_smult_hi_v8hi" + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + DONE; + }) +@@ -3882,13 +3914,13 @@ (define_expand "vec_widen_smult_lo_v8hi" + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + DONE; + }) +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index 9cfde1a52ea..b3d648312f1 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -23174,8 +23174,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrglh_direct, + {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si +- : CODE_FOR_altivec_vmrglw_direct_v4si, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be ++ : CODE_FOR_altivec_vmrglw_direct_v4si_le, + {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct +@@ -23186,8 +23186,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrghh_direct, + {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si +- : CODE_FOR_altivec_vmrghw_direct_v4si, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be ++ : CODE_FOR_altivec_vmrghw_direct_v4si_le, + {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}}, + {OPTION_MASK_P8_VECTOR, + BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct +diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md +index f70d69ee4b9..b9a1bfb5c16 100644 +--- a/gcc/config/rs6000/vsx.md ++++ b/gcc/config/rs6000/vsx.md +@@ -4683,12 +4683,14 @@ (define_expand "vsx_xxmrghw_" + (const_int 1) (const_int 5)])))] + "VECTOR_MEM_VSX_P (mode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_ +- : gen_altivec_vmrglw_direct_; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + } + [(set_attr "type" "vecperm")]) +@@ -4703,12 +4705,14 @@ (define_expand "vsx_xxmrglw_" + (const_int 3) (const_int 7)])))] + "VECTOR_MEM_VSX_P (mode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_ +- : gen_altivec_vmrghw_direct_; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + } + [(set_attr "type" "vecperm")]) +diff --git a/gcc/testsuite/g++.target/powerpc/pr106069.C b/gcc/testsuite/g++.target/powerpc/pr106069.C +new file mode 100644 +index 00000000000..537207d2fe8 +--- /dev/null ++++ b/gcc/testsuite/g++.target/powerpc/pr106069.C +@@ -0,0 +1,119 @@ ++/* { dg-options "-O -fno-tree-forwprop -maltivec" } */ ++/* { dg-require-effective-target vmx_hw } */ ++/* { dg-do run } */ ++ ++typedef __attribute__ ((altivec (vector__))) unsigned native_simd_type; ++ ++union ++{ ++ native_simd_type V; ++ int R[4]; ++} store_le_vec; ++ ++struct S ++{ ++ S () = default; ++ S (unsigned B0) ++ { ++ native_simd_type val{B0}; ++ m_simd = val; ++ } ++ void store_le (unsigned int out[]) ++ { ++ store_le_vec.V = m_simd; ++ unsigned int x0 = store_le_vec.R[0]; ++ __builtin_memcpy (out, &x0, 4); ++ } ++ S rotl (unsigned int r) ++ { ++ native_simd_type rot{r}; ++ return __builtin_vec_rl (m_simd, rot); ++ } ++ void operator+= (S other) ++ { ++ m_simd = __builtin_vec_add (m_simd, other.m_simd); ++ } ++ void operator^= (S other) ++ { ++ m_simd = __builtin_vec_xor (m_simd, other.m_simd); ++ } ++ static void transpose (S &B0, S B1, S B2, S B3) ++ { ++ native_simd_type T0 = __builtin_vec_mergeh (B0.m_simd, B2.m_simd); ++ native_simd_type T1 = __builtin_vec_mergeh (B1.m_simd, B3.m_simd); ++ native_simd_type T2 = __builtin_vec_mergel (B0.m_simd, B2.m_simd); ++ native_simd_type T3 = __builtin_vec_mergel (B1.m_simd, B3.m_simd); ++ B0 = __builtin_vec_mergeh (T0, T1); ++ B3 = __builtin_vec_mergel (T2, T3); ++ } ++ S (native_simd_type x) : m_simd (x) {} ++ native_simd_type m_simd; ++}; ++ ++void ++foo (unsigned int output[], unsigned state[]) ++{ ++ S R00 = state[0]; ++ S R01 = state[0]; ++ S R02 = state[2]; ++ S R03 = state[0]; ++ S R05 = state[5]; ++ S R06 = state[6]; ++ S R07 = state[7]; ++ S R08 = state[8]; ++ S R09 = state[9]; ++ S R10 = state[10]; ++ S R11 = state[11]; ++ S R12 = state[12]; ++ S R13 = state[13]; ++ S R14 = state[4]; ++ S R15 = state[15]; ++ for (int r = 0; r != 10; ++r) ++ { ++ R09 += R13; ++ R11 += R15; ++ R05 ^= R09; ++ R06 ^= R10; ++ R07 ^= R11; ++ R07 = R07.rotl (7); ++ R00 += R05; ++ R01 += R06; ++ R02 += R07; ++ R15 ^= R00; ++ R12 ^= R01; ++ R13 ^= R02; ++ R00 += R05; ++ R01 += R06; ++ R02 += R07; ++ R15 ^= R00; ++ R12 = R12.rotl (8); ++ R13 = R13.rotl (8); ++ R10 += R15; ++ R11 += R12; ++ R08 += R13; ++ R09 += R14; ++ R05 ^= R10; ++ R06 ^= R11; ++ R07 ^= R08; ++ R05 = R05.rotl (7); ++ R06 = R06.rotl (7); ++ R07 = R07.rotl (7); ++ } ++ R00 += state[0]; ++ S::transpose (R00, R01, R02, R03); ++ R00.store_le (output); ++} ++ ++unsigned int res[1]; ++unsigned main_state[]{1634760805, 60878, 2036477234, 6, ++ 0, 825562964, 1471091955, 1346092787, ++ 506976774, 4197066702, 518848283, 118491664, ++ 0, 0, 0, 0}; ++int ++main () ++{ ++ foo (res, main_state); ++ if (res[0] != 0x41fcef98) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/powerpc/pr115355.c b/gcc/testsuite/gcc.target/powerpc/pr115355.c +new file mode 100644 +index 00000000000..8955126b808 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/powerpc/pr115355.c +@@ -0,0 +1,37 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target p9vector_hw } */ ++/* Force vectorization with -fno-vect-cost-model to have vector unpack ++ which exposes the issue in PR115355. */ ++/* { dg-options "-O2 -mdejagnu-cpu=power9 -fno-vect-cost-model" } */ ++ ++/* Verify it runs successfully. */ ++ ++__attribute__((noipa)) ++void setToIdentityGOOD(unsigned long long *mVec, unsigned int mLen) ++{ ++ #pragma GCC novector ++ for (unsigned int i = 0; i < mLen; i++) ++ mVec[i] = i; ++} ++ ++__attribute__((noipa)) ++void setToIdentityBAD(unsigned long long *mVec, unsigned int mLen) ++{ ++ for (unsigned int i = 0; i < mLen; i++) ++ mVec[i] = i; ++} ++ ++unsigned long long vec1[100]; ++unsigned long long vec2[100]; ++ ++int main() ++{ ++ unsigned int l = 29; ++ setToIdentityGOOD (vec1, 29); ++ setToIdentityBAD (vec2, 29); ++ ++ if (__builtin_memcmp (vec1, vec2, l * sizeof (vec1[0])) != 0) ++ __builtin_abort (); ++ ++ return 0; ++} diff --git a/SOURCES/gcc13-vector-merge-2.patch b/SOURCES/gcc13-vector-merge-2.patch new file mode 100644 index 0000000..3a5cbfb --- /dev/null +++ b/SOURCES/gcc13-vector-merge-2.patch @@ -0,0 +1,240 @@ +commit ffdd377fc07cdc7b62669d354e23f30940eaaffe +Author: Kewen Lin +Date: Wed Jun 26 02:16:17 2024 -0500 + + rs6000: Fix wrong RTL patterns for vector merge high/low char on LE + + Commit r12-4496 changes some define_expands and define_insns + for vector merge high/low char, which are altivec_vmrg[hl]b. + These defines are mainly for built-in function vec_merge{h,l} + and some internal gen function needs. These functions should + consider endianness, taking vec_mergeh as example, as PVIPR + defines, vec_mergeh "Merges the first halves (in element order) + of two vectors", it does note it's in element order. So it's + mapped into vmrghb on BE while vmrglb on LE respectively. + Although the mapped insns are different, as the discussion in + PR106069, the RTL pattern should be still the same, it is + conformed before commit r12-4496, but gets changed into + different patterns on BE and LE starting from commit r12-4496. + Similar to 32-bit element case in commit log of r15-1504, this + 8-bit element pattern on LE doesn't actually match what the + underlying insn is intended to represent, once some optimization + like combine does some changes basing on it, it would cause + the unexpected consequence. The newly constructed test case + pr106069-1.c is a typical example for this issue. + + So this patch is to fix the wrong RTL pattern, ensure the + associated RTL patterns become the same as before which can + have the same semantic as their mapped insns. With the + proposed patch, the expanders like altivec_vmrghb expands + into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le + depending on endianness, "direct" can easily show which + insn would be generated, _be and _le are mainly for the + different RTL patterns as endianness. + + Co-authored-by: Xionghu Luo + + PR target/106069 + PR target/115355 + + gcc/ChangeLog: + + * config/rs6000/altivec.md (altivec_vmrghb_direct): Rename to ... + (altivec_vmrghb_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrghb_direct_le): New define_insn. + (altivec_vmrglb_direct): Rename to ... + (altivec_vmrglb_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrglb_direct_le): New define_insn. + (altivec_vmrghb): Adjust by calling gen_altivec_vmrghb_direct_be + for BE and gen_altivec_vmrglb_direct_le for LE. + (altivec_vmrglb): Adjust by calling gen_altivec_vmrglb_direct_be + for BE and gen_altivec_vmrghb_direct_le for LE. + * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace + CODE_FOR_altivec_vmrghb_direct by + CODE_FOR_altivec_vmrghb_direct_be for BE and + CODE_FOR_altivec_vmrghb_direct_le for LE. And replace + CODE_FOR_altivec_vmrglb_direct by + CODE_FOR_altivec_vmrglb_direct_be for BE and + CODE_FOR_altivec_vmrglb_direct_le for LE. + + gcc/testsuite/ChangeLog: + + * gcc.target/powerpc/pr106069-1.c: New test. + + (cherry picked from commit 62520e4e9f7e2fe8a16ee57a4bd35da2e921ae22) + +diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md +index 92e2e4a4090..47664204bc5 100644 +--- a/gcc/config/rs6000/altivec.md ++++ b/gcc/config/rs6000/altivec.md +@@ -1152,15 +1152,16 @@ (define_expand "altivec_vmrghb" + (use (match_operand:V16QI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghb_direct +- : gen_altivec_vmrglb_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrghb_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrglb_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrghb_direct" ++(define_insn "altivec_vmrghb_direct_be" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (vec_select:V16QI + (vec_concat:V32QI +@@ -1174,7 +1175,25 @@ (define_insn "altivec_vmrghb_direct" + (const_int 5) (const_int 21) + (const_int 6) (const_int 22) + (const_int 7) (const_int 23)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrghb %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrghb_direct_le" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (vec_select:V16QI ++ (vec_concat:V32QI ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 1 "register_operand" "v")) ++ (parallel [(const_int 8) (const_int 24) ++ (const_int 9) (const_int 25) ++ (const_int 10) (const_int 26) ++ (const_int 11) (const_int 27) ++ (const_int 12) (const_int 28) ++ (const_int 13) (const_int 29) ++ (const_int 14) (const_int 30) ++ (const_int 15) (const_int 31)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrghb %0,%1,%2" + [(set_attr "type" "vecperm")]) + +@@ -1274,15 +1293,16 @@ (define_expand "altivec_vmrglb" + (use (match_operand:V16QI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglb_direct +- : gen_altivec_vmrghb_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrglb_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrghb_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrglb_direct" ++(define_insn "altivec_vmrglb_direct_be" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (vec_select:V16QI + (vec_concat:V32QI +@@ -1296,7 +1316,25 @@ (define_insn "altivec_vmrglb_direct" + (const_int 13) (const_int 29) + (const_int 14) (const_int 30) + (const_int 15) (const_int 31)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrglb %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrglb_direct_le" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (vec_select:V16QI ++ (vec_concat:V32QI ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 1 "register_operand" "v")) ++ (parallel [(const_int 0) (const_int 16) ++ (const_int 1) (const_int 17) ++ (const_int 2) (const_int 18) ++ (const_int 3) (const_int 19) ++ (const_int 4) (const_int 20) ++ (const_int 5) (const_int 21) ++ (const_int 6) (const_int 22) ++ (const_int 7) (const_int 23)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrglb %0,%1,%2" + [(set_attr "type" "vecperm")]) + +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index b3d648312f1..10088033aa1 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -23166,8 +23166,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + CODE_FOR_altivec_vpkuwum_direct, + {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct +- : CODE_FOR_altivec_vmrglb_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct_be ++ : CODE_FOR_altivec_vmrglb_direct_le, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct +@@ -23178,8 +23178,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrglw_direct_v4si_le, + {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct +- : CODE_FOR_altivec_vmrghb_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct_be ++ : CODE_FOR_altivec_vmrghb_direct_le, + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct +diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-1.c b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c +new file mode 100644 +index 00000000000..4945d8fedfb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c +@@ -0,0 +1,39 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++/* { dg-require-effective-target vmx_hw } */ ++ ++/* Test vector merge for 8-bit element size, ++ it will abort if the RTL pattern isn't expected. */ ++ ++#include "altivec.h" ++ ++__attribute__((noipa)) ++signed char elem_6 (vector signed char a, vector signed char b) ++{ ++ vector signed char c = vec_mergeh (a,b); ++ return vec_extract (c, 6); ++} ++ ++__attribute__((noipa)) ++unsigned char elem_15 (vector unsigned char a, vector unsigned char b) ++{ ++ vector unsigned char c = vec_mergel (a,b); ++ return vec_extract (c, 15); ++} ++ ++int ++main () ++{ ++ vector unsigned char v1 ++ = {3, 33, 22, 12, 34, 14, 5, 25, 30, 11, 0, 21, 17, 27, 38, 8}; ++ vector unsigned char v2 ++ = {81, 82, 83, 84, 68, 67, 66, 65, 99, 100, 101, 102, 250, 125, 0, 6}; ++ signed char x1 = elem_6 ((vector signed char) v1, (vector signed char) v2); ++ unsigned char x2 = elem_15 (v1, v2); ++ ++ if (x1 != 12 || x2 != 6) ++ __builtin_abort (); ++ ++ return 0; ++} ++ diff --git a/SOURCES/gcc13-vector-merge-3.patch b/SOURCES/gcc13-vector-merge-3.patch new file mode 100644 index 0000000..c6ee970 --- /dev/null +++ b/SOURCES/gcc13-vector-merge-3.patch @@ -0,0 +1,306 @@ +commit bab38d9271ce3f26cb64b8cb712351eb3fedd559 +Author: Kewen Lin +Date: Wed Jun 26 02:16:17 2024 -0500 + + rs6000: Fix wrong RTL patterns for vector merge high/low short on LE + + Commit r12-4496 changes some define_expands and define_insns + for vector merge high/low short, which are altivec_vmrg[hl]h. + These defines are mainly for built-in function vec_merge{h,l} + and some internal gen function needs. These functions should + consider endianness, taking vec_mergeh as example, as PVIPR + defines, vec_mergeh "Merges the first halves (in element order) + of two vectors", it does note it's in element order. So it's + mapped into vmrghh on BE while vmrglh on LE respectively. + Although the mapped insns are different, as the discussion in + PR106069, the RTL pattern should be still the same, it is + conformed before commit r12-4496, but gets changed into + different patterns on BE and LE starting from commit r12-4496. + Similar to 32-bit element case in commit log of r15-1504, this + 16-bit element pattern on LE doesn't actually match what the + underlying insn is intended to represent, once some optimization + like combine does some changes basing on it, it would cause + the unexpected consequence. The newly constructed test case + pr106069-2.c is a typical example for this issue on element type + short. + + So this patch is to fix the wrong RTL pattern, ensure the + associated RTL patterns become the same as before which can + have the same semantic as their mapped insns. With the + proposed patch, the expanders like altivec_vmrghh expands + into altivec_vmrghh_direct_be or altivec_vmrglh_direct_le + depending on endianness, "direct" can easily show which + insn would be generated, _be and _le are mainly for the + different RTL patterns as endianness. + + Co-authored-by: Xionghu Luo + + PR target/106069 + PR target/115355 + + gcc/ChangeLog: + + * config/rs6000/altivec.md (altivec_vmrghh_direct): Rename to ... + (altivec_vmrghh_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrghh_direct_le): New define_insn. + (altivec_vmrglh_direct): Rename to ... + (altivec_vmrglh_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrglh_direct_le): New define_insn. + (altivec_vmrghh): Adjust by calling gen_altivec_vmrghh_direct_be + for BE and gen_altivec_vmrglh_direct_le for LE. + (altivec_vmrglh): Adjust by calling gen_altivec_vmrglh_direct_be + for BE and gen_altivec_vmrghh_direct_le for LE. + (vec_widen_umult_hi_v16qi): Adjust the call to + gen_altivec_vmrghh_direct by gen_altivec_vmrghh for BE + and by gen_altivec_vmrglh for LE. + (vec_widen_smult_hi_v16qi): Likewise. + (vec_widen_umult_lo_v16qi): Adjust the call to + gen_altivec_vmrglh_direct by gen_altivec_vmrglh for BE + and by gen_altivec_vmrghh for LE. + (vec_widen_smult_lo_v16qi): Likewise. + * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace + CODE_FOR_altivec_vmrghh_direct by + CODE_FOR_altivec_vmrghh_direct_be for BE and + CODE_FOR_altivec_vmrghh_direct_le for LE. And replace + CODE_FOR_altivec_vmrglh_direct by + CODE_FOR_altivec_vmrglh_direct_be for BE and + CODE_FOR_altivec_vmrglh_direct_le for LE. + + gcc/testsuite/ChangeLog: + + * gcc.target/powerpc/pr106069-2.c: New test. + + (cherry picked from commit 812c70bf4981958488331d4ea5af8709b5321da1) + +diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md +index 47664204bc5..6557393a97c 100644 +--- a/gcc/config/rs6000/altivec.md ++++ b/gcc/config/rs6000/altivec.md +@@ -1203,17 +1203,18 @@ (define_expand "altivec_vmrghh" + (use (match_operand:V8HI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghh_direct +- : gen_altivec_vmrglh_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrghh_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrglh_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrghh_direct" ++(define_insn "altivec_vmrghh_direct_be" + [(set (match_operand:V8HI 0 "register_operand" "=v") +- (vec_select:V8HI ++ (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")) +@@ -1221,7 +1222,21 @@ (define_insn "altivec_vmrghh_direct" + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrghh %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrghh_direct_le" ++ [(set (match_operand:V8HI 0 "register_operand" "=v") ++ (vec_select:V8HI ++ (vec_concat:V16HI ++ (match_operand:V8HI 2 "register_operand" "v") ++ (match_operand:V8HI 1 "register_operand" "v")) ++ (parallel [(const_int 4) (const_int 12) ++ (const_int 5) (const_int 13) ++ (const_int 6) (const_int 14) ++ (const_int 7) (const_int 15)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrghh %0,%1,%2" + [(set_attr "type" "vecperm")]) + +@@ -1344,15 +1359,16 @@ (define_expand "altivec_vmrglh" + (use (match_operand:V8HI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglh_direct +- : gen_altivec_vmrghh_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrglh_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrghh_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrglh_direct" ++(define_insn "altivec_vmrglh_direct_be" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (vec_select:V8HI + (vec_concat:V16HI +@@ -1362,7 +1378,21 @@ (define_insn "altivec_vmrglh_direct" + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrglh %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrglh_direct_le" ++ [(set (match_operand:V8HI 0 "register_operand" "=v") ++ (vec_select:V8HI ++ (vec_concat:V16HI ++ (match_operand:V8HI 2 "register_operand" "v") ++ (match_operand:V8HI 1 "register_operand" "v")) ++ (parallel [(const_int 0) (const_int 8) ++ (const_int 1) (const_int 9) ++ (const_int 2) (const_int 10) ++ (const_int 3) (const_int 11)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrglh %0,%1,%2" + [(set_attr "type" "vecperm")]) + +@@ -3777,13 +3807,13 @@ (define_expand "vec_widen_umult_hi_v16qi" + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + DONE; + }) +@@ -3802,13 +3832,13 @@ (define_expand "vec_widen_umult_lo_v16qi" + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + DONE; + }) +@@ -3827,13 +3857,13 @@ (define_expand "vec_widen_smult_hi_v16qi" + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + DONE; + }) +@@ -3852,13 +3882,13 @@ (define_expand "vec_widen_smult_lo_v16qi" + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + DONE; + }) +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index 10088033aa1..76eb89ad529 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -23170,8 +23170,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrglb_direct_le, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct +- : CODE_FOR_altivec_vmrglh_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct_be ++ : CODE_FOR_altivec_vmrglh_direct_le, + {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be +@@ -23182,8 +23182,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrghb_direct_le, + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct +- : CODE_FOR_altivec_vmrghh_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct_be ++ : CODE_FOR_altivec_vmrghh_direct_le, + {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be +diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-2.c b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c +new file mode 100644 +index 00000000000..283e3290fb3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c +@@ -0,0 +1,37 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++/* { dg-require-effective-target vmx_hw } */ ++ ++/* Test vector merge for 16-bit element size, ++ it will abort if the RTL pattern isn't expected. */ ++ ++#include "altivec.h" ++ ++__attribute__((noipa)) ++signed short elem_2 (vector signed short a, vector signed short b) ++{ ++ vector signed short c = vec_mergeh (a,b); ++ return vec_extract (c, 2); ++} ++ ++__attribute__((noipa)) ++unsigned short elem_7 (vector unsigned short a, vector unsigned short b) ++{ ++ vector unsigned short c = vec_mergel (a,b); ++ return vec_extract (c, 7); ++} ++ ++int ++main () ++{ ++ vector unsigned short v1 = {3, 22, 12, 34, 5, 25, 30, 11}; ++ vector unsigned short v2 = {84, 168, 267, 966, 65, 399, 999, 99}; ++ signed short x1 = elem_2 ((vector signed short) v1, (vector signed short) v2); ++ unsigned short x2 = elem_7 (v1, v2); ++ ++ if (x1 != 22 || x2 != 99) ++ __builtin_abort (); ++ ++ return 0; ++} ++ diff --git a/SOURCES/gcc13-znver5.patch b/SOURCES/gcc13-znver5.patch new file mode 100644 index 0000000..967d073 --- /dev/null +++ b/SOURCES/gcc13-znver5.patch @@ -0,0 +1,2246 @@ +From 9ae3d1ceadd90c134fc4365b36ceb552decb227f Mon Sep 17 00:00:00 2001 +From: Marek Polacek +Date: Thu, 30 May 2024 16:21:45 -0400 +Subject: [PATCH] Add AMD znver5 processor enablement with scheduler model + +2024-02-14 Jan Hubicka + Karthiban Anbazhagan + +gcc/ChangeLog: + * common/config/i386/cpuinfo.h (get_amd_cpu): Recognize znver5. + * common/config/i386/i386-common.cc (processor_names): Add znver5. + (processor_alias_table): Likewise. + * common/config/i386/i386-cpuinfo.h (processor_types): Add new zen + family. + (processor_subtypes): Add znver5. + * config.gcc (x86_64-*-* |...): Likewise. + * config/i386/driver-i386.cc (host_detect_local_cpu): Let + march=native detect znver5 cpu's. + * config/i386/i386-c.cc (ix86_target_macros_internal): Add + znver5. + * config/i386/i386-options.cc (m_ZNVER5): New definition + (processor_cost_table): Add znver5. + * config/i386/i386.cc (ix86_reassociation_width): Likewise. + * config/i386/i386.h (processor_type): Add PROCESSOR_ZNVER5 + (PTA_ZNVER5): New definition. + * config/i386/i386.md (define_attr "cpu"): Add znver5. + (Scheduling descriptions) Add znver5.md. + * config/i386/x86-tune-costs.h (znver5_cost): New definition. + * config/i386/x86-tune-sched.cc (ix86_issue_rate): Add znver5. + (ix86_adjust_cost): Likewise. + * config/i386/x86-tune.def (avx512_move_by_pieces): Add m_ZNVER5. + (avx512_store_by_pieces): Add m_ZNVER5. + * doc/extend.texi: Add znver5. + * doc/invoke.texi: Likewise. + * config/i386/znver4.md: Rename to zn4zn5.md; combine znver4 and znver5 Scheduler. + +gcc/testsuite/ChangeLog: + * g++.target/i386/mv29.C: Handle znver5 arch. + * gcc.target/i386/funcspec-56.inc:Likewise. +--- + gcc/common/config/i386/cpuinfo.h | 16 + + gcc/common/config/i386/i386-common.cc | 6 +- + gcc/common/config/i386/i386-cpuinfo.h | 2 + + gcc/config.gcc | 14 +- + gcc/config/i386/driver-i386.cc | 5 + + gcc/config/i386/i386-c.cc | 7 + + gcc/config/i386/i386-options.cc | 6 +- + gcc/config/i386/i386.cc | 3 +- + gcc/config/i386/i386.h | 3 + + gcc/config/i386/i386.md | 5 +- + gcc/config/i386/x86-tune-costs.h | 136 +++ + gcc/config/i386/x86-tune-sched.cc | 2 + + gcc/config/i386/x86-tune.def | 4 +- + gcc/config/i386/{znver4.md => zn4zn5.md} | 817 ++++++++++++++++-- + gcc/doc/extend.texi | 3 + + gcc/doc/invoke.texi | 10 + + gcc/testsuite/g++.target/i386/mv29.C | 6 + + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + + 18 files changed, 986 insertions(+), 61 deletions(-) + rename gcc/config/i386/{znver4.md => zn4zn5.md} (55%) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 441fae0cdc9..a2e28e47a7d 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -310,6 +310,22 @@ get_amd_cpu (struct __processor_model *cpu_model, + cpu_model->__cpu_subtype = AMDFAM19H_ZNVER3; + } + break; ++ case 0x1a: ++ cpu_model->__cpu_type = AMDFAM1AH; ++ if (model <= 0x77) ++ { ++ cpu = "znver5"; ++ CHECK___builtin_cpu_is ("znver5"); ++ cpu_model->__cpu_subtype = AMDFAM1AH_ZNVER5; ++ } ++ else if (has_cpu_feature (cpu_model, cpu_features2, ++ FEATURE_AVX512VP2INTERSECT)) ++ { ++ cpu = "znver5"; ++ CHECK___builtin_cpu_is ("znver5"); ++ cpu_model->__cpu_subtype = AMDFAM1AH_ZNVER5; ++ } ++ break; + default: + break; + } +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index a8809889360..f3610155807 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1983,7 +1983,8 @@ const char *const processor_names[] = + "znver1", + "znver2", + "znver3", +- "znver4" ++ "znver4", ++ "znver5" + }; + + /* Guarantee that the array is aligned with enum processor_type. */ +@@ -2243,6 +2244,9 @@ const pta processor_alias_table[] = + {"znver4", PROCESSOR_ZNVER4, CPU_ZNVER4, + PTA_ZNVER4, + M_CPU_SUBTYPE (AMDFAM19H_ZNVER4), P_PROC_AVX512F}, ++ {"znver5", PROCESSOR_ZNVER5, CPU_ZNVER5, ++ PTA_ZNVER5, ++ M_CPU_SUBTYPE (AMDFAM1AH_ZNVER5), P_PROC_AVX512F}, + {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16 | PTA_PRFCHW +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 254dfec70e5..77db97e36b3 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -62,6 +62,7 @@ enum processor_types + ZHAOXIN_FAM7H, + INTEL_SIERRAFOREST, + INTEL_GRANDRIDGE, ++ AMDFAM1AH, + CPU_TYPE_MAX, + BUILTIN_CPU_TYPE_MAX = CPU_TYPE_MAX + }; +@@ -99,6 +100,7 @@ enum processor_subtypes + AMDFAM19H_ZNVER4, + INTEL_COREI7_GRANITERAPIDS, + INTEL_COREI7_GRANITERAPIDS_D, ++ AMDFAM1AH_ZNVER5, + CPU_SUBTYPE_MAX + }; + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index c3b73d05eb7..ae332a88768 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -675,9 +675,9 @@ c7 esther" + # 64-bit x86 processors supported by --with-arch=. Each processor + # MUST be separated by exactly one space. + x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \ +-bdver3 bdver4 znver1 znver2 znver3 znver4 btver1 btver2 k8 k8-sse3 opteron \ +-opteron-sse3 nocona core2 corei7 corei7-avx core-avx-i core-avx2 atom \ +-slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \ ++bdver3 bdver4 znver1 znver2 znver3 znver4 znver5 btver1 btver2 k8 k8-sse3 \ ++opteron opteron-sse3 nocona core2 corei7 corei7-avx core-avx-i core-avx2 \ ++atom slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \ + silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \ + skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \ + sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \ +@@ -3681,6 +3681,10 @@ case ${target} in + arch=znver4 + cpu=znver4 + ;; ++ znver5-*) ++ arch=znver5 ++ cpu=znver5 ++ ;; + bdver4-*) + arch=bdver4 + cpu=bdver4 +@@ -3814,6 +3818,10 @@ case ${target} in + arch=znver4 + cpu=znver4 + ;; ++ znver5-*) ++ arch=znver5 ++ cpu=znver5 ++ ;; + bdver4-*) + arch=bdver4 + cpu=bdver4 +diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc +index 6a998f5dea5..f4eee496924 100644 +--- a/gcc/config/i386/driver-i386.cc ++++ b/gcc/config/i386/driver-i386.cc +@@ -465,6 +465,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) + processor = PROCESSOR_GEODE; + else if (has_feature (FEATURE_MOVBE) && family == 22) + processor = PROCESSOR_BTVER2; ++ else if (has_feature (FEATURE_AVX512VP2INTERSECT)) ++ processor = PROCESSOR_ZNVER5; + else if (has_feature (FEATURE_AVX512F)) + processor = PROCESSOR_ZNVER4; + else if (has_feature (FEATURE_VAES)) +@@ -793,6 +795,9 @@ const char *host_detect_local_cpu (int argc, const char **argv) + case PROCESSOR_ZNVER4: + cpu = "znver4"; + break; ++ case PROCESSOR_ZNVER5: ++ cpu = "znver5"; ++ break; + case PROCESSOR_BTVER1: + cpu = "btver1"; + break; +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index 5b886e8a760..bbefb66e194 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -136,6 +136,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__znver4"); + def_or_undef (parse_in, "__znver4__"); + break; ++ case PROCESSOR_ZNVER5: ++ def_or_undef (parse_in, "__znver5"); ++ def_or_undef (parse_in, "__znver5__"); ++ break; + case PROCESSOR_BTVER1: + def_or_undef (parse_in, "__btver1"); + def_or_undef (parse_in, "__btver1__"); +@@ -353,6 +357,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + case PROCESSOR_ZNVER4: + def_or_undef (parse_in, "__tune_znver4__"); + break; ++ case PROCESSOR_ZNVER5: ++ def_or_undef (parse_in, "__tune_znver5__"); ++ break; + case PROCESSOR_BTVER1: + def_or_undef (parse_in, "__tune_btver1__"); + break; +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index c1229475138..86102851d4d 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -165,11 +165,12 @@ along with GCC; see the file COPYING3. If not see + #define m_ZNVER2 (HOST_WIDE_INT_1U<integer move cost is 2. */ ++ ++ /* reg-reg moves are done by renaming and thus they are even cheaper than ++ 1 cycle. Because reg-reg move cost is 2 and following tables correspond ++ to doubles of latencies, we do not model this correctly. It does not ++ seem to make practical difference to bump prices up even more. */ ++ 6, /* cost for loading QImode using ++ movzbl. */ ++ {6, 6, 6}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer ++ registers. */ ++ 2, /* cost of reg,reg fld/fst. */ ++ {14, 14, 17}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode. */ ++ {12, 12, 16}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode. */ ++ 2, /* cost of moving MMX register. */ ++ {6, 6}, /* cost of loading MMX registers ++ in SImode and DImode. */ ++ {8, 8}, /* cost of storing MMX registers ++ in SImode and DImode. */ ++ 2, 2, 3, /* cost of moving XMM,YMM,ZMM ++ register. */ ++ {6, 6, 10, 10, 12}, /* cost of loading SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ {8, 8, 8, 12, 12}, /* cost of storing SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ 6, 8, /* SSE->integer and integer->SSE ++ moves. */ ++ 8, 8, /* mask->integer and integer->mask moves */ ++ {6, 6, 6}, /* cost of loading mask register ++ in QImode, HImode, SImode. */ ++ {8, 8, 8}, /* cost if storing mask register ++ in QImode, HImode, SImode. */ ++ 2, /* cost of moving mask register. */ ++ /* End of register allocator costs. */ ++ }, ++ ++ COSTS_N_INSNS (1), /* cost of an add instruction. */ ++ /* TODO: Lea with 3 components has cost 2. */ ++ COSTS_N_INSNS (1), /* cost of a lea instruction. */ ++ COSTS_N_INSNS (1), /* variable shift costs. */ ++ COSTS_N_INSNS (1), /* constant shift costs. */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ ++ COSTS_N_INSNS (3), /* HI. */ ++ COSTS_N_INSNS (3), /* SI. */ ++ COSTS_N_INSNS (3), /* DI. */ ++ COSTS_N_INSNS (3)}, /* other. */ ++ 0, /* cost of multiply per each bit ++ set. */ ++ {COSTS_N_INSNS (10), /* cost of a divide/mod for QI. */ ++ COSTS_N_INSNS (11), /* HI. */ ++ COSTS_N_INSNS (13), /* SI. */ ++ COSTS_N_INSNS (16), /* DI. */ ++ COSTS_N_INSNS (16)}, /* other. */ ++ COSTS_N_INSNS (1), /* cost of movsx. */ ++ COSTS_N_INSNS (1), /* cost of movzx. */ ++ 8, /* "large" insn. */ ++ 9, /* MOVE_RATIO. */ ++ 6, /* CLEAR_RATIO */ ++ {6, 6, 6}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer ++ registers. */ ++ {6, 6, 10, 10, 12}, /* cost of loading SSE registers ++ in 32bit, 64bit, 128bit, 256bit and 512bit */ ++ {8, 8, 8, 12, 12}, /* cost of storing SSE register ++ in 32bit, 64bit, 128bit, 256bit and 512bit */ ++ {6, 6, 6, 6, 6}, /* cost of unaligned loads. */ ++ {8, 8, 8, 8, 8}, /* cost of unaligned stores. */ ++ 2, 2, 2, /* cost of moving XMM,YMM,ZMM ++ register. */ ++ 6, /* cost of moving SSE register to integer. */ ++ /* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops, ++ throughput 5. Approx 7 uops do not depend on vector size and every load ++ is 5 uops. */ ++ 14, 10, /* Gather load static, per_elt. */ ++ 14, 20, /* Gather store static, per_elt. */ ++ 32, /* size of l1 cache. */ ++ 1024, /* size of l2 cache. */ ++ 64, /* size of prefetch block. */ ++ /* New AMD processors never drop prefetches; if they cannot be performed ++ immediately, they are queued. We set number of simultaneous prefetches ++ to a large constant to reflect this (it probably is not a good idea not ++ to limit number of prefetches at all, as their execution also takes some ++ time). */ ++ 100, /* number of parallel prefetches. */ ++ 3, /* Branch cost. */ ++ COSTS_N_INSNS (7), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (7), /* cost of FMUL instruction. */ ++ /* Latency of fdiv is 8-15. */ ++ COSTS_N_INSNS (15), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (1), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ ++ /* Latency of fsqrt is 4-10. */ ++ COSTS_N_INSNS (25), /* cost of FSQRT instruction. */ ++ ++ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ ++ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ ++ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ ++ COSTS_N_INSNS (3), /* cost of MULSD instruction. */ ++ COSTS_N_INSNS (4), /* cost of FMA SS instruction. */ ++ COSTS_N_INSNS (4), /* cost of FMA SD instruction. */ ++ COSTS_N_INSNS (10), /* cost of DIVSS instruction. */ ++ /* 9-13. */ ++ COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ ++ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ ++ COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */ ++ /* Zen can execute 4 integer operations per cycle. FP operations ++ take 3 cycles and it can execute 2 integer additions and 2 ++ multiplications thus reassociation may make sense up to with of 6. ++ SPEC2k6 bencharks suggests ++ that 4 works better than 6 probably due to register pressure. ++ ++ Integer vector operations are taken by FP unit and execute 3 vector ++ plus/minus operations per cycle but only one multiply. This is adjusted ++ in ix86_reassociation_width. */ ++ 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ ++ znver2_memcpy, ++ znver2_memset, ++ COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ ++ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ ++ "16", /* Loop alignment. */ ++ "16", /* Jump alignment. */ ++ "0:0:8", /* Label alignment. */ ++ "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ ++}; ++ + /* skylake_cost should produce code tuned for Skylake familly of CPUs. */ + static stringop_algs skylake_memcpy[2] = { + {libcall, +diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc +index df4113d4eab..cbaba5f9e3c 100644 +--- a/gcc/config/i386/x86-tune-sched.cc ++++ b/gcc/config/i386/x86-tune-sched.cc +@@ -69,6 +69,7 @@ ix86_issue_rate (void) + case PROCESSOR_ZNVER2: + case PROCESSOR_ZNVER3: + case PROCESSOR_ZNVER4: ++ case PROCESSOR_ZNVER5: + case PROCESSOR_CORE2: + case PROCESSOR_NEHALEM: + case PROCESSOR_SANDYBRIDGE: +@@ -417,6 +418,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, + case PROCESSOR_ZNVER2: + case PROCESSOR_ZNVER3: + case PROCESSOR_ZNVER4: ++ case PROCESSOR_ZNVER5: + /* Stack engine allows to execute push&pop instructions in parall. */ + if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) + && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index 0fd5bb4430e..48ca9167bf4 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -572,12 +572,12 @@ DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces", + /* X86_TUNE_AVX512_MOVE_BY_PIECES: Optimize move_by_pieces with 512-bit + AVX instructions. */ + DEF_TUNE (X86_TUNE_AVX512_MOVE_BY_PIECES, "avx512_move_by_pieces", +- m_SAPPHIRERAPIDS | m_ZNVER4) ++ m_SAPPHIRERAPIDS | m_ZNVER4 | m_ZNVER5) + + /* X86_TUNE_AVX512_STORE_BY_PIECES: Optimize store_by_pieces with 512-bit + AVX instructions. */ + DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces", +- m_SAPPHIRERAPIDS | m_ZNVER4) ++ m_SAPPHIRERAPIDS | m_ZNVER4 | m_ZNVER5) + + /*****************************************************************************/ + /*****************************************************************************/ +diff --git a/gcc/config/i386/znver4.md b/gcc/config/i386/zn4zn5.md +similarity index 55% +rename from gcc/config/i386/znver4.md +rename to gcc/config/i386/zn4zn5.md +index 47802a7de2b..ba9cfbb5dfc 100644 +--- a/gcc/config/i386/znver4.md ++++ b/gcc/config/i386/zn4zn5.md +@@ -1,4 +1,4 @@ +-;; Copyright (C) 2012-2023 Free Software Foundation, Inc. ++;; Copyright (C) 2012-2024 Free Software Foundation, Inc. + ;; + ;; This file is part of GCC. + ;; +@@ -21,7 +21,7 @@ + (define_attr "znver4_decode" "direct,vector,double" + (const_string "direct")) + +-;; AMD znver4 Scheduling ++;; AMD znver4 and znver5 Scheduling + ;; Modeling automatons for zen decoders, integer execution pipes, + ;; AGU pipes, branch, floating point execution and fp store units. + (define_automaton "znver4, znver4_ieu, znver4_idiv, znver4_fdiv, znver4_agu, znver4_fpu, znver4_fp_store") +@@ -44,32 +44,44 @@ (define_reservation "znver4-direct" "znver4-decode0|znver4-decode1|znver4-decode + (define_reservation "znver4-double" "znver4-direct") + + +-;; Integer unit 4 ALU pipes. ++;; Integer unit 4 ALU pipes in znver4 6 ALU pipes in znver5. + (define_cpu_unit "znver4-ieu0" "znver4_ieu") + (define_cpu_unit "znver4-ieu1" "znver4_ieu") + (define_cpu_unit "znver4-ieu2" "znver4_ieu") + (define_cpu_unit "znver4-ieu3" "znver4_ieu") ++(define_cpu_unit "znver5-ieu4" "znver4_ieu") ++(define_cpu_unit "znver5-ieu5" "znver4_ieu") ++ + ;; Znver4 has an additional branch unit. + (define_cpu_unit "znver4-bru0" "znver4_ieu") ++ + (define_reservation "znver4-ieu" "znver4-ieu0|znver4-ieu1|znver4-ieu2|znver4-ieu3") ++(define_reservation "znver5-ieu" "znver4-ieu0|znver4-ieu1|znver4-ieu2|znver4-ieu3|znver5-ieu4|znver5-ieu5") + +-;; 3 AGU pipes in znver4 ++;; 3 AGU pipes in znver4 and 4 AGU pipes in znver5 + (define_cpu_unit "znver4-agu0" "znver4_agu") + (define_cpu_unit "znver4-agu1" "znver4_agu") + (define_cpu_unit "znver4-agu2" "znver4_agu") ++(define_cpu_unit "znver5-agu3" "znver4_agu") ++ + (define_reservation "znver4-agu-reserve" "znver4-agu0|znver4-agu1|znver4-agu2") ++(define_reservation "znver5-agu-reserve" "znver4-agu0|znver4-agu1|znver4-agu2|znver5-agu3") + + ;; Load is 4 cycles. We do not model reservation of load unit. + (define_reservation "znver4-load" "znver4-agu-reserve") + (define_reservation "znver4-store" "znver4-agu-reserve") ++(define_reservation "znver5-load" "znver5-agu-reserve") ++(define_reservation "znver5-store" "znver5-agu-reserve") + + ;; vectorpath (microcoded) instructions are single issue instructions. + ;; So, they occupy all the integer units. ++;; This is used for both Znver4 and Znver5, since reserving extra units not used otherwise ++;; is harmless. + (define_reservation "znver4-ivector" "znver4-ieu0+znver4-ieu1 +- +znver4-ieu2+znver4-ieu3+znver4-bru0 +- +znver4-agu0+znver4-agu1+znver4-agu2") ++ +znver4-ieu2+znver4-ieu3+znver5-ieu4+znver5-ieu5+znver4-bru0 ++ +znver4-agu0+znver4-agu1+znver4-agu2+znver5-agu3") + +-;; Floating point unit 4 FP pipes. ++;; Floating point unit 4 FP pipes in znver4 and znver5. + (define_cpu_unit "znver4-fpu0" "znver4_fpu") + (define_cpu_unit "znver4-fpu1" "znver4_fpu") + (define_cpu_unit "znver4-fpu2" "znver4_fpu") +@@ -77,10 +89,6 @@ (define_cpu_unit "znver4-fpu3" "znver4_fpu") + + (define_reservation "znver4-fpu" "znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") + +-(define_reservation "znver4-fvector" "znver4-fpu0+znver4-fpu1 +- +znver4-fpu2+znver4-fpu3 +- +znver4-agu0+znver4-agu1+znver4-agu2") +- + ;; DIV units + (define_cpu_unit "znver4-idiv" "znver4_idiv") + (define_cpu_unit "znver4-fdiv" "znver4_fdiv") +@@ -89,6 +97,19 @@ (define_cpu_unit "znver4-fdiv" "znver4_fdiv") + ;; throughput is limited to only one per cycle. + (define_cpu_unit "znver4-fp-store" "znver4_fp_store") + ++;; Floating point store unit 2 FP pipes in znver5. ++(define_cpu_unit "znver5-fp-store0" "znver4_fp_store") ++(define_cpu_unit "znver5-fp-store1" "znver4_fp_store") ++ ++;; This is used for both Znver4 and Znver5, since reserving extra units not used otherwise ++;; is harmless. ++(define_reservation "znver4-fvector" "znver4-fpu0+znver4-fpu1 ++ +znver4-fpu2+znver4-fpu3+znver5-fp-store0+znver5-fp-store1 ++ +znver4-agu0+znver4-agu1+znver4-agu2+znver5-agu3") ++ ++(define_reservation "znver5-fp-store256" "znver5-fp-store0|znver5-fp-store1") ++(define_reservation "znver5-fp-store-512" "znver5-fp-store0+znver5-fp-store1") ++ + + ;; Integer Instructions + ;; Move instructions +@@ -100,6 +121,13 @@ (define_insn_reservation "znver4_imov_double" 1 + (eq_attr "memory" "none")))) + "znver4-double,znver4-ieu") + ++(define_insn_reservation "znver5_imov_double" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "double") ++ (and (eq_attr "type" "imov") ++ (eq_attr "memory" "none")))) ++ "znver4-double,znver5-ieu") ++ + (define_insn_reservation "znver4_imov_double_load" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "znver1_decode" "double") +@@ -107,6 +135,13 @@ (define_insn_reservation "znver4_imov_double_load" 5 + (eq_attr "memory" "load")))) + "znver4-double,znver4-load,znver4-ieu") + ++(define_insn_reservation "znver5_imov_double_load" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "double") ++ (and (eq_attr "type" "imov") ++ (eq_attr "memory" "load")))) ++ "znver4-double,znver5-load,znver5-ieu") ++ + ;; imov, imovx + (define_insn_reservation "znver4_imov" 1 + (and (eq_attr "cpu" "znver4") +@@ -114,12 +149,24 @@ (define_insn_reservation "znver4_imov" 1 + (eq_attr "memory" "none"))) + "znver4-direct,znver4-ieu") + ++(define_insn_reservation "znver5_imov" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "imov,imovx") ++ (eq_attr "memory" "none"))) ++ "znver4-direct,znver5-ieu") ++ + (define_insn_reservation "znver4_imov_load" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "imov,imovx") + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-ieu") + ++(define_insn_reservation "znver5_imov_load" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "imov,imovx") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver5-ieu") ++ + ;; Push Instruction + (define_insn_reservation "znver4_push" 1 + (and (eq_attr "cpu" "znver4") +@@ -127,12 +174,24 @@ (define_insn_reservation "znver4_push" 1 + (eq_attr "memory" "store"))) + "znver4-direct,znver4-store") + ++(define_insn_reservation "znver5_push" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "push") ++ (eq_attr "memory" "store"))) ++ "znver4-direct,znver5-store") ++ + (define_insn_reservation "znver4_push_mem" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "push") + (eq_attr "memory" "both"))) + "znver4-direct,znver4-load,znver4-store") + ++(define_insn_reservation "znver5_push_mem" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "push") ++ (eq_attr "memory" "both"))) ++ "znver4-direct,znver5-load,znver5-store") ++ + ;; Pop instruction + (define_insn_reservation "znver4_pop" 4 + (and (eq_attr "cpu" "znver4") +@@ -140,16 +199,28 @@ (define_insn_reservation "znver4_pop" 4 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load") + ++(define_insn_reservation "znver5_pop" 4 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "pop") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load") ++ + (define_insn_reservation "znver4_pop_mem" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "pop") + (eq_attr "memory" "both"))) + "znver4-direct,znver4-load,znver4-store") + ++(define_insn_reservation "znver5_pop_mem" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "pop") ++ (eq_attr "memory" "both"))) ++ "znver4-direct,znver5-load,znver5-store") ++ + ;; Integer Instructions or General instructions + ;; Multiplications + (define_insn_reservation "znver4_imul" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "imul") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-ieu1") +@@ -160,30 +231,36 @@ (define_insn_reservation "znver4_imul_load" 7 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-ieu1") + ++(define_insn_reservation "znver5_imul_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "imul") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-ieu1") ++ + ;; Divisions + (define_insn_reservation "znver4_idiv_DI" 18 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "none")))) + "znver4-double,znver4-idiv*10") + + (define_insn_reservation "znver4_idiv_SI" 12 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "SI") + (eq_attr "memory" "none")))) + "znver4-double,znver4-idiv*6") + + (define_insn_reservation "znver4_idiv_HI" 10 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "HI") + (eq_attr "memory" "none")))) + "znver4-double,znver4-idiv*4") + + (define_insn_reservation "znver4_idiv_QI" 9 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "none")))) +@@ -196,6 +273,13 @@ (define_insn_reservation "znver4_idiv_DI_load" 22 + (eq_attr "memory" "load")))) + "znver4-double,znver4-load,znver4-idiv*10") + ++(define_insn_reservation "znver5_idiv_DI_load" 22 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "memory" "load")))) ++ "znver4-double,znver5-load,znver4-idiv*10") ++ + (define_insn_reservation "znver4_idiv_SI_load" 16 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "idiv") +@@ -203,6 +287,13 @@ (define_insn_reservation "znver4_idiv_SI_load" 16 + (eq_attr "memory" "load")))) + "znver4-double,znver4-load,znver4-idiv*6") + ++(define_insn_reservation "znver5_idiv_SI_load" 16 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "SI") ++ (eq_attr "memory" "load")))) ++ "znver4-double,znver5-load,znver4-idiv*6") ++ + (define_insn_reservation "znver4_idiv_HI_load" 14 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "idiv") +@@ -210,6 +301,13 @@ (define_insn_reservation "znver4_idiv_HI_load" 14 + (eq_attr "memory" "load")))) + "znver4-double,znver4-load,znver4-idiv*4") + ++(define_insn_reservation "znver5_idiv_HI_load" 14 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "memory" "load")))) ++ "znver4-double,znver5-load,znver4-idiv*4") ++ + (define_insn_reservation "znver4_idiv_QI_load" 13 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "idiv") +@@ -217,6 +315,13 @@ (define_insn_reservation "znver4_idiv_QI_load" 13 + (eq_attr "memory" "load")))) + "znver4-double,znver4-load,znver4-idiv*4") + ++(define_insn_reservation "znver5_idiv_QI_load" 13 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "memory" "load")))) ++ "znver4-double,znver5-load,znver4-idiv*4") ++ + ;; INTEGER/GENERAL Instructions + (define_insn_reservation "znver4_insn" 1 + (and (eq_attr "cpu" "znver4") +@@ -224,14 +329,26 @@ (define_insn_reservation "znver4_insn" 1 + (eq_attr "memory" "none,unknown"))) + "znver4-direct,znver4-ieu") + ++(define_insn_reservation "znver5_insn" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "alu,alu1,negnot,rotate1,ishift1,test,incdec,icmp") ++ (eq_attr "memory" "none,unknown"))) ++ "znver4-direct,znver5-ieu") ++ + (define_insn_reservation "znver4_insn_load" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "alu,alu1,negnot,rotate1,ishift1,test,incdec,icmp") + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-ieu") + ++(define_insn_reservation "znver5_insn_load" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "alu,alu1,negnot,rotate1,ishift1,test,incdec,icmp") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver5-ieu") ++ + (define_insn_reservation "znver4_insn2" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "icmov,setcc") + (eq_attr "memory" "none,unknown"))) + "znver4-direct,znver4-ieu0|znver4-ieu3") +@@ -242,8 +359,14 @@ (define_insn_reservation "znver4_insn2_load" 5 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-ieu0|znver4-ieu3") + ++(define_insn_reservation "znver5_insn2_load" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "icmov,setcc") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-ieu0|znver4-ieu3") ++ + (define_insn_reservation "znver4_rotate" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "rotate") + (eq_attr "memory" "none,unknown"))) + "znver4-direct,znver4-ieu1|znver4-ieu2") +@@ -254,27 +377,51 @@ (define_insn_reservation "znver4_rotate_load" 5 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-ieu1|znver4-ieu2") + ++(define_insn_reservation "znver5_rotate_load" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "rotate") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-ieu1|znver4-ieu2") ++ + (define_insn_reservation "znver4_insn_store" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "alu,alu1,negnot,rotate1,ishift1,test,incdec,icmp") + (eq_attr "memory" "store"))) + "znver4-direct,znver4-ieu,znver4-store") + ++(define_insn_reservation "znver5_insn_store" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "alu,alu1,negnot,rotate1,ishift1,test,incdec,icmp") ++ (eq_attr "memory" "store"))) ++ "znver4-direct,znver4-ieu,znver5-store") ++ + (define_insn_reservation "znver4_insn2_store" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "icmov,setcc") + (eq_attr "memory" "store"))) + "znver4-direct,znver4-ieu0|znver4-ieu3,znver4-store") + ++(define_insn_reservation "znver5_insn2_store" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "icmov,setcc") ++ (eq_attr "memory" "store"))) ++ "znver4-direct,znver4-ieu0|znver4-ieu3,znver5-store") ++ + (define_insn_reservation "znver4_rotate_store" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "rotate") + (eq_attr "memory" "store"))) + "znver4-direct,znver4-ieu1|znver4-ieu2,znver4-store") + ++(define_insn_reservation "znver5_rotate_store" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "rotate") ++ (eq_attr "memory" "store"))) ++ "znver4-direct,znver4-ieu1|znver4-ieu2,znver5-store") ++ + ;; alu1 instructions + (define_insn_reservation "znver4_alu1_vector" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "znver1_decode" "vector") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "none,unknown")))) +@@ -287,15 +434,27 @@ (define_insn_reservation "znver4_alu1_vector_load" 7 + (eq_attr "memory" "load")))) + "znver4-vector,znver4-load,znver4-ivector*3") + ++(define_insn_reservation "znver5_alu1_vector_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "vector") ++ (and (eq_attr "type" "alu1") ++ (eq_attr "memory" "load")))) ++ "znver4-vector,znver5-load,znver4-ivector*3") ++ + ;; Call Instruction + (define_insn_reservation "znver4_call" 1 + (and (eq_attr "cpu" "znver4") + (eq_attr "type" "call,callv")) + "znver4-double,znver4-ieu0|znver4-bru0,znver4-store") + ++(define_insn_reservation "znver5_call" 1 ++ (and (eq_attr "cpu" "znver5") ++ (eq_attr "type" "call,callv")) ++ "znver4-double,znver4-ieu0|znver4-bru0,znver5-store") ++ + ;; Branches + (define_insn_reservation "znver4_branch" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-ieu0|znver4-bru0") +@@ -306,8 +465,14 @@ (define_insn_reservation "znver4_branch_load" 5 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-ieu0|znver4-bru0") + ++(define_insn_reservation "znver5_branch_load" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ibr") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-ieu0|znver4-bru0") ++ + (define_insn_reservation "znver4_branch_vector" 2 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "none,unknown"))) + "znver4-vector,znver4-ivector*2") +@@ -318,21 +483,36 @@ (define_insn_reservation "znver4_branch_vector_load" 6 + (eq_attr "memory" "load"))) + "znver4-vector,znver4-load,znver4-ivector*2") + ++(define_insn_reservation "znver5_branch_vector_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ibr") ++ (eq_attr "memory" "load"))) ++ "znver4-vector,znver5-load,znver4-ivector*2") ++ + ;; LEA instruction with simple addressing + (define_insn_reservation "znver4_lea" 1 + (and (eq_attr "cpu" "znver4") + (eq_attr "type" "lea")) + "znver4-direct,znver4-ieu") + ++(define_insn_reservation "znver5_lea" 1 ++ (and (eq_attr "cpu" "znver5") ++ (eq_attr "type" "lea")) ++ "znver4-direct,znver5-ieu") + ;; Leave + (define_insn_reservation "znver4_leave" 1 + (and (eq_attr "cpu" "znver4") + (eq_attr "type" "leave")) + "znver4-double,znver4-ieu,znver4-store") + ++(define_insn_reservation "znver5_leave" 1 ++ (and (eq_attr "cpu" "znver5") ++ (eq_attr "type" "leave")) ++ "znver4-double,znver5-ieu,znver5-store") ++ + ;; STR and ISHIFT are microcoded. + (define_insn_reservation "znver4_str" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "str") + (eq_attr "memory" "none"))) + "znver4-vector,znver4-ivector*3") +@@ -343,8 +523,14 @@ (define_insn_reservation "znver4_str_load" 7 + (eq_attr "memory" "load"))) + "znver4-vector,znver4-load,znver4-ivector*3") + ++(define_insn_reservation "znver5_str_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "str") ++ (eq_attr "memory" "load"))) ++ "znver4-vector,znver5-load,znver4-ivector*3") ++ + (define_insn_reservation "znver4_ishift" 2 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ishift") + (eq_attr "memory" "none"))) + "znver4-vector,znver4-ivector*2") +@@ -355,9 +541,15 @@ (define_insn_reservation "znver4_ishift_load" 6 + (eq_attr "memory" "load"))) + "znver4-vector,znver4-load,znver4-ivector*2") + ++(define_insn_reservation "znver5_ishift_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ishift") ++ (eq_attr "memory" "load"))) ++ "znver4-vector,znver5-load,znver4-ivector*2") ++ + ;; Other vector type + (define_insn_reservation "znver4_ieu_vector" 5 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "other,multi") + (eq_attr "memory" "none,unknown"))) + "znver4-vector,znver4-ivector*5") +@@ -368,15 +560,21 @@ (define_insn_reservation "znver4_ieu_vector_load" 9 + (eq_attr "memory" "load"))) + "znver4-vector,znver4-load,znver4-ivector*5") + ++(define_insn_reservation "znver5_ieu_vector_load" 9 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "other,multi") ++ (eq_attr "memory" "load"))) ++ "znver4-vector,znver5-load,znver4-ivector*5") ++ + ;; Floating Point + ;; FP movs + (define_insn_reservation "znver4_fp_cmov" 4 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (eq_attr "type" "fcmov")) + "znver4-vector,znver4-fvector*3") + + (define_insn_reservation "znver4_fp_mov_direct" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (eq_attr "type" "fmov")) + "znver4-direct,znver4-fpu0|znver4-fpu1") + +@@ -388,6 +586,13 @@ (define_insn_reservation "znver4_fp_mov_direct_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1") + ++(define_insn_reservation "znver5_fp_mov_direct_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "direct") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1") ++ + ;;FST + (define_insn_reservation "znver4_fp_mov_direct_store" 6 + (and (eq_attr "cpu" "znver4") +@@ -396,6 +601,13 @@ (define_insn_reservation "znver4_fp_mov_direct_store" 6 + (eq_attr "memory" "store")))) + "znver4-direct,znver4-fpu0|znver4-fpu1,znver4-fp-store") + ++(define_insn_reservation "znver5_fp_mov_direct_store" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "direct") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "store")))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1,znver5-fp-store256") ++ + ;;FILD + (define_insn_reservation "znver4_fp_mov_double_load" 13 + (and (eq_attr "cpu" "znver4") +@@ -404,6 +616,13 @@ (define_insn_reservation "znver4_fp_mov_double_load" 13 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1") + ++(define_insn_reservation "znver5_fp_mov_double_load" 13 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "double") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1") ++ + ;;FIST + (define_insn_reservation "znver4_fp_mov_double_store" 7 + (and (eq_attr "cpu" "znver4") +@@ -412,9 +631,16 @@ (define_insn_reservation "znver4_fp_mov_double_store" 7 + (eq_attr "memory" "store")))) + "znver4-double,znver4-fpu1,znver4-fp-store") + ++(define_insn_reservation "znver5_fp_mov_double_store" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "double") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "store")))) ++ "znver4-double,znver4-fpu1,znver5-fp-store256") ++ + ;; FSQRT + (define_insn_reservation "znver4_fsqrt" 22 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "fpspc") + (and (eq_attr "mode" "XF") + (eq_attr "memory" "none")))) +@@ -422,20 +648,20 @@ (define_insn_reservation "znver4_fsqrt" 22 + + ;; FPSPC instructions + (define_insn_reservation "znver4_fp_spc" 6 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "fpspc") + (eq_attr "memory" "none"))) + "znver4-vector,znver4-fvector*6") + + (define_insn_reservation "znver4_fp_insn_vector" 6 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "znver1_decode" "vector") + (eq_attr "type" "mmxcvt,sselog1,ssemov"))) + "znver4-vector,znver4-fvector*6") + + ;; FADD, FSUB, FMUL + (define_insn_reservation "znver4_fp_op_mul" 7 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "fop,fmul") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu0") +@@ -446,9 +672,14 @@ (define_insn_reservation "znver4_fp_op_mul_load" 12 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-fpu0") + ++(define_insn_reservation "znver5_fp_op_mul_load" 12 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "fop,fmul") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-fpu0") + ;; FDIV + (define_insn_reservation "znver4_fp_div" 15 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fdiv*6") +@@ -459,6 +690,12 @@ (define_insn_reservation "znver4_fp_div_load" 20 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-fdiv*6") + ++(define_insn_reservation "znver5_fp_div_load" 20 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "fdiv") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-fdiv*6") ++ + (define_insn_reservation "znver4_fp_idiv_load" 24 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "fdiv") +@@ -466,15 +703,27 @@ (define_insn_reservation "znver4_fp_idiv_load" 24 + (eq_attr "memory" "load")))) + "znver4-double,znver4-load,znver4-fdiv*6") + ++(define_insn_reservation "znver5_fp_idiv_load" 24 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "fdiv") ++ (and (eq_attr "fp_int_src" "true") ++ (eq_attr "memory" "load")))) ++ "znver4-double,znver5-load,znver4-fdiv*6") ++ + ;; FABS, FCHS + (define_insn_reservation "znver4_fp_fsgn" 1 + (and (eq_attr "cpu" "znver4") + (eq_attr "type" "fsgn")) + "znver4-direct,znver4-fpu0|znver4-fpu1") + ++(define_insn_reservation "znver5_fp_fsgn" 1 ++ (and (eq_attr "cpu" "znver5") ++ (eq_attr "type" "fsgn")) ++ "znver4-direct,znver4-fpu1|znver4-fpu2") ++ + ;; FCMP + (define_insn_reservation "znver4_fp_fcmp" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "fcmp") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu1") +@@ -486,14 +735,21 @@ (define_insn_reservation "znver4_fp_fcmp_double" 4 + (eq_attr "memory" "none")))) + "znver4-double,znver4-fpu1,znver4-fpu2") + ++(define_insn_reservation "znver5_fp_fcmp_double" 4 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "fcmp") ++ (and (eq_attr "znver1_decode" "double") ++ (eq_attr "memory" "none")))) ++ "znver4-double,znver4-fpu1,znver5-fp-store256") ++ + ;; MMX, SSE, SSEn.n instructions + (define_insn_reservation "znver4_fp_mmx " 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (eq_attr "type" "mmx")) + "znver4-direct,znver4-fpu1|znver4-fpu2") + + (define_insn_reservation "znver4_mmx_add_cmp" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "mmxadd,mmxcmp") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu") +@@ -504,32 +760,62 @@ (define_insn_reservation "znver4_mmx_add_cmp_load" 6 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-fpu") + ++(define_insn_reservation "znver5_mmx_add_cmp_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mmxadd,mmxcmp") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-fpu") ++ + (define_insn_reservation "znver4_mmx_insn" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1,mmxshft") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu1|znver4-fpu2") + ++(define_insn_reservation "znver5_mmx_insn" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1,mmxshft") ++ (eq_attr "memory" "none"))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_mmx_insn_load" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1,mmxshft") + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-fpu1|znver4-fpu2") + ++(define_insn_reservation "znver5_mmx_insn_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1,mmxshft") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_mmx_mov" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "mmxmov") + (eq_attr "memory" "store"))) + "znver4-direct,znver4-fp-store") + ++(define_insn_reservation "znver5_mmx_mov" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mmxmov") ++ (eq_attr "memory" "store"))) ++ "znver4-direct,znver5-fp-store256") ++ + (define_insn_reservation "znver4_mmx_mov_load" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "mmxmov") + (eq_attr "memory" "both"))) + "znver4-direct,znver4-load,znver4-fp-store") + ++(define_insn_reservation "znver5_mmx_mov_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mmxmov") ++ (eq_attr "memory" "both"))) ++ "znver4-direct,znver5-load,znver5-fp-store256") ++ + (define_insn_reservation "znver4_mmx_mul" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "mmxmul") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu0|znver4-fpu3") +@@ -540,9 +826,15 @@ (define_insn_reservation "znver4_mmx_mul_load" 8 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu3") + ++(define_insn_reservation "znver5_mmx_mul_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mmxmul") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu3") ++ + ;; AVX instructions + (define_insn_reservation "znver4_sse_log" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sselog") + (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI") + (eq_attr "memory" "none")))) +@@ -555,6 +847,13 @@ (define_insn_reservation "znver4_sse_log_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu") + ++(define_insn_reservation "znver5_sse_log_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu") ++ + (define_insn_reservation "znver4_sse_log1" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sselog1") +@@ -562,6 +861,13 @@ (define_insn_reservation "znver4_sse_log1" 1 + (eq_attr "memory" "store")))) + "znver4-direct,znver4-fpu1|znver4-fpu2,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_log1" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "store")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_log1_load" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sselog1") +@@ -569,20 +875,39 @@ (define_insn_reservation "znver4_sse_log1_load" 6 + (eq_attr "memory" "both")))) + "znver4-direct,znver4-load,znver4-fpu1|znver4-fpu2,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_log1_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "both")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_comi" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "store"))) + "znver4-double,znver4-fpu2|znver4-fpu3,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_comi" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "store"))) ++ "znver4-double,znver4-fpu2|znver4-fpu3,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_comi_load" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "both"))) + "znver4-double,znver4-load,znver4-fpu2|znver4-fpu3,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_comi_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "both"))) ++ "znver4-double,znver5-load,znver4-fpu2|znver4-fpu3,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_test" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "prefix_extra" "1") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "none")))) +@@ -595,8 +920,15 @@ (define_insn_reservation "znver4_sse_test_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1|znver4-fpu2") + ++(define_insn_reservation "znver5_sse_test_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_imul" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sseimul") + (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") + (eq_attr "memory" "none")))) +@@ -609,8 +941,15 @@ (define_insn_reservation "znver4_sse_imul_load" 8 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1") + ++(define_insn_reservation "znver5_sse_imul_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseimul") ++ (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_mov" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") + (eq_attr "memory" "none")))) +@@ -623,6 +962,13 @@ (define_insn_reservation "znver4_sse_mov_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1|znver4-fpu2") + ++(define_insn_reservation "znver5_sse_mov_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_mov_store" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemov") +@@ -630,8 +976,15 @@ (define_insn_reservation "znver4_sse_mov_store" 1 + (eq_attr "memory" "store")))) + "znver4-direct,znver4-fpu1|znver4-fpu2,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_mov_store" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "store")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_mov_fp" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") + (eq_attr "memory" "none")))) +@@ -644,6 +997,13 @@ (define_insn_reservation "znver4_sse_mov_fp_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu") + ++(define_insn_reservation "znver5_sse_mov_fp_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu") ++ + (define_insn_reservation "znver4_sse_mov_fp_store" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemov") +@@ -651,8 +1011,22 @@ (define_insn_reservation "znver4_sse_mov_fp_store" 1 + (eq_attr "memory" "store")))) + "znver4-direct,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_mov_fp_store" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "store")))) ++ "znver4-direct,znver5-fp-store256") ++ ++(define_insn_reservation "znver5_sse_mov_fp_store_512" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "store")))) ++ "znver4-direct,znver5-fp-store-512") ++ + (define_insn_reservation "znver4_sse_add" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sseadd") + (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") + (eq_attr "memory" "none")))) +@@ -665,8 +1039,15 @@ (define_insn_reservation "znver4_sse_add_load" 8 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu2|znver4-fpu3") + ++(define_insn_reservation "znver5_sse_add_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_add1" 4 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sseadd1") + (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") + (eq_attr "memory" "none")))) +@@ -679,8 +1060,15 @@ (define_insn_reservation "znver4_sse_add1_load" 9 + (eq_attr "memory" "load")))) + "znver4-vector,znver4-load,znver4-fvector*2") + ++(define_insn_reservation "znver5_sse_add1_load" 9 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseadd1") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-vector,znver5-load,znver4-fvector*2") ++ + (define_insn_reservation "znver4_sse_iadd" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sseiadd") + (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") + (eq_attr "memory" "none")))) +@@ -693,8 +1081,15 @@ (define_insn_reservation "znver4_sse_iadd_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu") + ++(define_insn_reservation "znver5_sse_iadd_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu") ++ + (define_insn_reservation "znver4_sse_mul" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssemul") + (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") + (eq_attr "memory" "none")))) +@@ -707,15 +1102,22 @@ (define_insn_reservation "znver4_sse_mul_load" 8 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1") + ++(define_insn_reservation "znver5_sse_mul_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemul") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_div_pd" 13 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "V4DF,V2DF,V1DF") + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fdiv*5") + + (define_insn_reservation "znver4_sse_div_ps" 10 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "V8SF,V4SF,V2SF,SF") + (eq_attr "memory" "none")))) +@@ -728,6 +1130,13 @@ (define_insn_reservation "znver4_sse_div_pd_load" 18 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fdiv*5") + ++(define_insn_reservation "znver5_sse_div_pd_load" 18 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V4DF,V2DF,V1DF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fdiv*5") ++ + (define_insn_reservation "znver4_sse_div_ps_load" 15 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssediv") +@@ -735,8 +1144,15 @@ (define_insn_reservation "znver4_sse_div_ps_load" 15 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fdiv*3") + ++(define_insn_reservation "znver5_sse_div_ps_load" 15 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V8SF,V4SF,V2SF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fdiv*3") ++ + (define_insn_reservation "znver4_sse_cmp_avx" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "prefix" "vex") + (eq_attr "memory" "none")))) +@@ -749,20 +1165,39 @@ (define_insn_reservation "znver4_sse_cmp_avx_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1") + ++(define_insn_reservation "znver5_sse_cmp_avx_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "prefix" "vex") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_comi_avx" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "store"))) + "znver4-direct,znver4-fpu2+znver4-fpu3,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_comi_avx" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "store"))) ++ "znver4-direct,znver4-fpu2+znver4-fpu3,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_comi_avx_load" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "both"))) + "znver4-direct,znver4-load,znver4-fpu2+znver4-fpu3,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_comi_avx_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "both"))) ++ "znver4-direct,znver5-load,znver4-fpu2+znver4-fpu3,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_cvt" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") + (eq_attr "memory" "none")))) +@@ -775,8 +1210,15 @@ (define_insn_reservation "znver4_sse_cvt_load" 8 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu2|znver4-fpu3") + ++(define_insn_reservation "znver5_sse_cvt_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_icvt" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "mode" "SI") + (eq_attr "memory" "none")))) +@@ -789,6 +1231,13 @@ (define_insn_reservation "znver4_sse_icvt_store" 4 + (eq_attr "memory" "store")))) + "znver4-double,znver4-fpu2|znver4-fpu3,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_icvt_store" 4 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "mode" "SI") ++ (eq_attr "memory" "store")))) ++ "znver4-double,znver4-fpu2|znver4-fpu3,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_shuf" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") +@@ -796,6 +1245,13 @@ (define_insn_reservation "znver4_sse_shuf" 1 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu1|znver4-fpu2") + ++(define_insn_reservation "znver5_sse_shuf" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_shuf_load" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") +@@ -803,8 +1259,15 @@ (define_insn_reservation "znver4_sse_shuf_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu") + ++(define_insn_reservation "znver5_sse_shuf_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu") ++ + (define_insn_reservation "znver4_sse_ishuf" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sseshuf") + (and (eq_attr "mode" "OI") + (eq_attr "memory" "none")))) +@@ -817,6 +1280,13 @@ (define_insn_reservation "znver4_sse_ishuf_load" 8 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1|znver4-fpu2") + ++(define_insn_reservation "znver5_sse_ishuf_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "OI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + ;; AVX512 instructions + (define_insn_reservation "znver4_sse_log_evex" 1 + (and (eq_attr "cpu" "znver4") +@@ -825,6 +1295,13 @@ (define_insn_reservation "znver4_sse_log_evex" 1 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_log_evex" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_log_evex_load" 7 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sselog") +@@ -832,6 +1309,13 @@ (define_insn_reservation "znver4_sse_log_evex_load" 7 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_log_evex_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_log1_evex" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sselog1") +@@ -839,6 +1323,13 @@ (define_insn_reservation "znver4_sse_log1_evex" 1 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu1*2|znver4-fpu2*2,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_log1_evex" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2,znver5-fp-store-512") ++ + (define_insn_reservation "znver4_sse_log1_evex_load" 7 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sselog1") +@@ -846,6 +1337,13 @@ (define_insn_reservation "znver4_sse_log1_evex_load" 7 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1*2|znver4-fpu2*2,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_log1_evex_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2,znver5-fp-store-512") ++ + (define_insn_reservation "znver4_sse_mul_evex" 3 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemul") +@@ -853,6 +1351,13 @@ (define_insn_reservation "znver4_sse_mul_evex" 3 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_mul_evex" 3 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemul") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_mul_evex_load" 9 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemul") +@@ -860,6 +1365,13 @@ (define_insn_reservation "znver4_sse_mul_evex_load" 9 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_mul_evex_load" 9 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemul") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_imul_evex" 3 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseimul") +@@ -867,6 +1379,13 @@ (define_insn_reservation "znver4_sse_imul_evex" 3 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu0*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_imul_evex" 3 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseimul") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu0|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_imul_evex_load" 9 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseimul") +@@ -874,6 +1393,13 @@ (define_insn_reservation "znver4_sse_imul_evex_load" 9 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_imul_evex_load" 9 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseimul") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_mov_evex" 4 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemov") +@@ -881,6 +1407,13 @@ (define_insn_reservation "znver4_sse_mov_evex" 4 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu1*2|znver4-fpu2*2") + ++(define_insn_reservation "znver5_sse_mov_evex" 2 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_mov_evex_load" 10 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemov") +@@ -888,6 +1421,13 @@ (define_insn_reservation "znver4_sse_mov_evex_load" 10 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1*2|znver4-fpu2*2") + ++(define_insn_reservation "znver5_sse_mov_evex_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver4-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_mov_evex_store" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemov") +@@ -895,6 +1435,13 @@ (define_insn_reservation "znver4_sse_mov_evex_store" 5 + (eq_attr "memory" "store")))) + "znver4-direct,znver4-fpu1*2|znver4-fpu2*2,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_mov_evex_store" 3 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "store")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2,znver5-fp-store-512") ++ + (define_insn_reservation "znver4_sse_add_evex" 3 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseadd") +@@ -902,6 +1449,13 @@ (define_insn_reservation "znver4_sse_add_evex" 3 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_add_evex" 2 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_add_evex_load" 9 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseadd") +@@ -909,6 +1463,13 @@ (define_insn_reservation "znver4_sse_add_evex_load" 9 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_add_evex_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver4-load,znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_iadd_evex" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseiadd") +@@ -916,6 +1477,13 @@ (define_insn_reservation "znver4_sse_iadd_evex" 1 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_iadd_evex" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_iadd_evex_load" 7 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseiadd") +@@ -923,6 +1491,13 @@ (define_insn_reservation "znver4_sse_iadd_evex_load" 7 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_iadd_evex_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_div_pd_evex" 13 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssediv") +@@ -930,6 +1505,13 @@ (define_insn_reservation "znver4_sse_div_pd_evex" 13 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fdiv*9") + ++(define_insn_reservation "znver5_sse_div_pd_evex" 13 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V8DF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fdiv*9") ++ + (define_insn_reservation "znver4_sse_div_ps_evex" 10 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssediv") +@@ -937,6 +1519,13 @@ (define_insn_reservation "znver4_sse_div_ps_evex" 10 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fdiv*6") + ++(define_insn_reservation "znver5_sse_div_ps_evex" 10 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V16SF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fdiv*6") ++ + (define_insn_reservation "znver4_sse_div_pd_evex_load" 19 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssediv") +@@ -944,6 +1533,13 @@ (define_insn_reservation "znver4_sse_div_pd_evex_load" 19 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fdiv*9") + ++(define_insn_reservation "znver5_sse_div_pd_evex_load" 19 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V8DF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fdiv*9") ++ + (define_insn_reservation "znver4_sse_div_ps_evex_load" 16 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssediv") +@@ -951,6 +1547,13 @@ (define_insn_reservation "znver4_sse_div_ps_evex_load" 16 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fdiv*6") + ++(define_insn_reservation "znver5_sse_div_ps_evex_load" 16 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V16SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fdiv*6") ++ + (define_insn_reservation "znver4_sse_cmp_avx128" 3 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecmp") +@@ -959,6 +1562,14 @@ (define_insn_reservation "znver4_sse_cmp_avx128" 3 + (eq_attr "memory" "none"))))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_cmp_avx128" 3 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,SF") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "none"))))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_cmp_avx128_load" 9 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecmp") +@@ -967,6 +1578,14 @@ (define_insn_reservation "znver4_sse_cmp_avx128_load" 9 + (eq_attr "memory" "load"))))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_cmp_avx128_load" 9 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,SF") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load"))))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_cmp_avx256" 4 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecmp") +@@ -975,6 +1594,14 @@ (define_insn_reservation "znver4_sse_cmp_avx256" 4 + (eq_attr "memory" "none"))))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_cmp_avx256" 4 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V8SF,V4DF") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "none"))))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_cmp_avx256_load" 10 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecmp") +@@ -983,6 +1610,14 @@ (define_insn_reservation "znver4_sse_cmp_avx256_load" 10 + (eq_attr "memory" "load"))))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_cmp_avx256_load" 10 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V8SF,V4DF") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load"))))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_cmp_avx512" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecmp") +@@ -991,6 +1626,14 @@ (define_insn_reservation "znver4_sse_cmp_avx512" 5 + (eq_attr "memory" "none"))))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_cmp_avx512" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "none"))))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_cmp_avx512_load" 11 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecmp") +@@ -999,6 +1642,14 @@ (define_insn_reservation "znver4_sse_cmp_avx512_load" 11 + (eq_attr "memory" "load"))))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_cmp_avx512_load" 11 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load"))))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_cvt_evex" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecvt") +@@ -1006,6 +1657,13 @@ (define_insn_reservation "znver4_sse_cvt_evex" 6 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu1*2|znver4-fpu2*2,znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_cvt_evex" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2,znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_cvt_evex_load" 12 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecvt") +@@ -1013,6 +1671,13 @@ (define_insn_reservation "znver4_sse_cvt_evex_load" 12 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1*2|znver4-fpu2*2,znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_cvt_evex_load" 12 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2,znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_shuf_evex" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") +@@ -1020,6 +1685,13 @@ (define_insn_reservation "znver4_sse_shuf_evex" 1 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_shuf_evex" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_shuf_evex_load" 7 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") +@@ -1027,6 +1699,13 @@ (define_insn_reservation "znver4_sse_shuf_evex_load" 7 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_shuf_evex_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_ishuf_evex" 4 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") +@@ -1034,6 +1713,13 @@ (define_insn_reservation "znver4_sse_ishuf_evex" 4 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu1*2|znver4-fpu2*2") + ++(define_insn_reservation "znver5_sse_ishuf_evex" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_ishuf_evex_load" 10 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") +@@ -1041,18 +1727,37 @@ (define_insn_reservation "znver4_sse_ishuf_evex_load" 10 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1*2|znver4-fpu2*2") + ++(define_insn_reservation "znver5_sse_ishuf_evex_load" 10 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_muladd" 4 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemuladd") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_muladd" 4 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemuladd") ++ (eq_attr "memory" "none"))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_muladd_load" 10 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_muladd_load" 10 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + ;; AVX512 mask instructions + + (define_insn_reservation "znver4_sse_mskmov" 2 +@@ -1061,8 +1766,20 @@ (define_insn_reservation "znver4_sse_mskmov" 2 + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_mskmov" 2 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mskmov") ++ (eq_attr "memory" "none"))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_msklog" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "msklog") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu2*2|znver4-fpu3*2") ++ ++(define_insn_reservation "znver5_sse_msklog" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "msklog") ++ (eq_attr "memory" "none"))) ++ "znver4-direct,znver4-fpu0|znver4-fpu3") +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index d6fcd611339..5c6ce67b7d1 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -22060,6 +22060,9 @@ AMD Family 19h Zen version 3. + + @item znver4 + AMD Family 19h Zen version 4. ++ ++@item znver5 ++AMD Family 1ah Zen version 5. + @end table + + Here is an example: +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 792ce283bb9..9bec6eb01bd 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -32660,6 +32660,16 @@ WBNOINVD, PKU, VPCLMULQDQ, VAES, AVX512F, AVX512DQ, AVX512IFMA, AVX512CD, + AVX512BW, AVX512VL, AVX512BF16, AVX512VBMI, AVX512VBMI2, AVX512VNNI, + AVX512BITALG, AVX512VPOPCNTDQ, GFNI and 64-bit instruction set extensions.) + ++@item znver5 ++AMD Family 1ah core based CPUs with x86-64 instruction set support. (This ++supersets BMI, BMI2, CLWB, F16C, FMA, FSGSBASE, AVX, AVX2, ADCX, RDSEED, ++MWAITX, SHA, CLZERO, AES, PCLMUL, CX16, MOVBE, MMX, SSE, SSE2, SSE3, SSE4A, ++SSSE3, SSE4.1, SSE4.2, ABM, XSAVEC, XSAVES, CLFLUSHOPT, POPCNT, RDPID, ++WBNOINVD, PKU, VPCLMULQDQ, VAES, AVX512F, AVX512DQ, AVX512IFMA, AVX512CD, ++AVX512BW, AVX512VL, AVX512BF16, AVX512VBMI, AVX512VBMI2, AVX512VNNI, ++AVX512BITALG, AVX512VPOPCNTDQ, GFNI, AVXVNNI, MOVDIRI, MOVDIR64B, ++AVX512VP2INTERSECT, PREFETCHI and 64-bit instruction set extensions.) ++ + @item btver1 + CPUs based on AMD Family 14h cores with x86-64 instruction set support. (This + supersets MMX, SSE, SSE2, SSE3, SSSE3, SSE4A, CX16, ABM and 64-bit +diff --git a/gcc/testsuite/g++.target/i386/mv29.C b/gcc/testsuite/g++.target/i386/mv29.C +index a8dd8ac4803..ab229534edd 100644 +--- a/gcc/testsuite/g++.target/i386/mv29.C ++++ b/gcc/testsuite/g++.target/i386/mv29.C +@@ -53,6 +53,10 @@ int __attribute__ ((target("arch=znver4"))) foo () { + return 10; + } + ++int __attribute__ ((target("arch=znver5"))) foo () { ++ return 11; ++} ++ + int main () + { + int val = foo (); +@@ -77,6 +81,8 @@ int main () + assert (val == 9); + else if (__builtin_cpu_is ("znver4")) + assert (val == 10); ++ else if (__builtin_cpu_is ("znver5")) ++ assert (val == 11); + else + assert (val == 0); + +diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +index f466962c36c..6bfdcdade82 100644 +--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc ++++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +@@ -221,6 +221,7 @@ extern void test_arch_znver1 (void) __attribute__((__target__("arch= + extern void test_arch_znver2 (void) __attribute__((__target__("arch=znver2"))); + extern void test_arch_znver3 (void) __attribute__((__target__("arch=znver3"))); + extern void test_arch_znver4 (void) __attribute__((__target__("arch=znver4"))); ++extern void test_arch_znver5 (void) __attribute__((__target__("arch=znver5"))); + + extern void test_tune_nocona (void) __attribute__((__target__("tune=nocona"))); + extern void test_tune_core2 (void) __attribute__((__target__("tune=core2"))); +@@ -245,6 +246,7 @@ extern void test_tune_znver1 (void) __attribute__((__target__("tune= + extern void test_tune_znver2 (void) __attribute__((__target__("tune=znver2"))); + extern void test_tune_znver3 (void) __attribute__((__target__("tune=znver3"))); + extern void test_tune_znver4 (void) __attribute__((__target__("tune=znver4"))); ++extern void test_tune_znver5 (void) __attribute__((__target__("tune=znver5"))); + + extern void test_fpmath_sse (void) __attribute__((__target__("sse2,fpmath=sse"))); + extern void test_fpmath_387 (void) __attribute__((__target__("sse2,fpmath=387"))); +-- +2.45.1 + diff --git a/SPECS/gcc.spec b/SPECS/gcc.spec index e2a7258..f5d4827 100644 --- a/SPECS/gcc.spec +++ b/SPECS/gcc.spec @@ -5,14 +5,14 @@ BuildRequires: scl-utils-build %{?scl:%global __strip %%{_scl_root}/usr/bin/strip} %{?scl:%global __objdump %%{_scl_root}/usr/bin/objdump} %{?scl:%scl_package gcc} -%global DATE 20230614 -%global gitrev 0d7019741b037c7e9c4e57d6de3bce6bb2ed8026 -%global gcc_version 13.1.1 +%global DATE 20240611 +%global gitrev 03b1a31f9807251f378fcecb29c4669eed357eb2 +%global gcc_version 13.3.1 %global gcc_major 13 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 4 -%global nvptx_tools_gitrev 93e00909ceb9cbbc104f0fcba56c0361ffb3ca4b +%global gcc_release 2 +%global nvptx_tools_gitrev aa3404ad5a496cda5d79a50bedb1344fd63e8763 %global newlib_cygwin_gitrev 9e09d6ed83cce4777a5950412647ccc603040409 %global mpc_version 1.0.3 %global isl_version 0.24 @@ -146,10 +146,10 @@ BuildRequires: scl-utils-build %else %global build_annobin_plugin 0 %endif -Summary: GCC version 13 +Summary: GCC version %{gcc_major} Name: %{?scl_prefix}gcc Version: %{gcc_version} -Release: %{gcc_release}.3%{?dist} +Release: %{gcc_release}.1%{?dist} # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have # GCC Runtime Exception. License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD @@ -329,6 +329,11 @@ Patch8: gcc13-no-add-needed.patch Patch9: gcc13-Wno-format-security.patch Patch10: gcc13-rh1574936.patch Patch11: gcc13-d-shared-libphobos.patch +Patch12: gcc13-znver5.patch +Patch13: gcc13-pr107071.patch +Patch14: gcc13-vector-merge-1.patch +Patch15: gcc13-vector-merge-2.patch +Patch16: gcc13-vector-merge-3.patch Patch50: isl-rh2155127.patch @@ -358,9 +363,7 @@ Patch3016: 0019-xfails.patch Patch3017: 0020-more-fixes.patch Patch3018: 0021-libstdc++-disable-tests.patch Patch3019: 0022-libstdc++-revert-behavior.patch -Patch3020: gcc13-testsuite-no-ssp.patch Patch3021: gcc13-testsuite-p10.patch -Patch3022: gcc13-testsuite-plugin.patch Patch3023: gcc13-testsuite-dwarf.patch %if 0%{?rhel} == 9 @@ -392,10 +395,11 @@ Patch3023: gcc13-testsuite-dwarf.patch %endif %description -The %{?scl_prefix}gcc%{!?scl:13} package contains the GNU Compiler Collection version 10. +The %{?scl_prefix}gcc%{!?scl:13} package contains the GNU Compiler Collection +version %{gcc_major}. %package -n libgcc -Summary: GCC version 13 shared support library +Summary: GCC version %{gcc_major} shared support library Autoreq: false %description -n libgcc @@ -403,7 +407,7 @@ This package contains GCC shared support library which is needed e.g. for exception handling support. %package c++ -Summary: C++ support for GCC version 13 +Summary: C++ support for GCC version %{gcc_major} Requires: %{?scl_prefix}gcc%{!?scl:13} = %{version}-%{release} Requires: libstdc++ Requires: %{?scl_prefix}libstdc++%{!?scl:13}-devel = %{version}-%{release} @@ -411,8 +415,8 @@ Autoreq: true %description c++ This package adds C++ support to the GNU Compiler Collection -version 13. It includes support for most of the current C++ specification -and a lot of support for the upcoming C++ specification. +version %{gcc_major}. It includes support for most of the current C++ +specification and a lot of support for the upcoming C++ specification. %package -n libstdc++ Summary: GNU Standard C++ Library @@ -442,7 +446,7 @@ Manual, doxygen generated API information and Frequently Asked Questions for the GNU standard C++ library. %package gfortran -Summary: Fortran support for GCC 13 +Summary: Fortran support for GCC %{gcc_major} Requires: %{?scl_prefix}gcc%{!?scl:13} = %{version}-%{release} %if 0%{?rhel} > 7 Requires: libgfortran >= 8.1.1 @@ -465,18 +469,18 @@ programs with the GNU Compiler Collection. %package gdb-plugin -Summary: GCC 13 plugin for GDB +Summary: GCC %{gcc_major} plugin for GDB Requires: %{?scl_prefix}gcc%{!?scl:13} = %{version}-%{release} %description gdb-plugin -This package contains GCC 13 plugin for GDB C expression evaluation. +This package contains GCC %{gcc_major} plugin for GDB C expression evaluation. %package -n %{?scl_prefix}libgccjit Summary: Library for embedding GCC inside programs and libraries Requires: %{?scl_prefix}gcc%{!?scl:13} = %{version}-%{release} %description -n %{?scl_prefix}libgccjit -This package contains shared library with GCC 13 JIT front-end. +This package contains shared library with GCC %{gcc_major} JIT front-end. %package -n %{?scl_prefix}libgccjit-devel Summary: Support for embedding GCC inside programs and libraries @@ -486,7 +490,7 @@ Requires: %{?scl_prefix}libgccjit = %{version}-%{release} #Requires: %%{?scl_prefix}libgccjit-docs = %%{version}-%%{release} %description -n %{?scl_prefix}libgccjit-devel -This package contains header files for GCC 13 JIT front end. +This package contains header files for GCC %{gcc_major} JIT front end. %package -n %{?scl_prefix}libgccjit-docs Summary: Documentation for embedding GCC inside programs and libraries @@ -500,10 +504,10 @@ Requires(post): /sbin/install-info Requires(preun): /sbin/install-info %description -n %{?scl_prefix}libgccjit-docs -This package contains documentation for GCC 13 JIT front-end. +This package contains documentation for GCC %{gcc_major} JIT front-end. %package -n libquadmath -Summary: GCC 13 __float128 shared support library +Summary: GCC %{gcc_major} __float128 shared support library Requires(post): /sbin/install-info Requires(preun): /sbin/install-info @@ -512,7 +516,7 @@ This package contains GCC shared support library which is needed for __float128 math support and for Fortran REAL*16 support. %package -n %{?scl_prefix}libquadmath-devel -Summary: GCC 13 __float128 support +Summary: GCC %{gcc_major} __float128 support Group: Development/Libraries %if 0%{!?scl:1} Requires: %{?scl_prefix}libquadmath%{_isa} = %{version}-%{release} @@ -553,7 +557,7 @@ Requires: libmpc-devel >= 0.8.1 %description plugin-devel This package contains header files and other support files -for compiling GCC 13 plugins. The GCC plugin ABI is currently +for compiling GCC %{gcc_major} plugins. The GCC plugin ABI is currently not stable, so plugins must be rebuilt any time GCC is updated. %package -n libatomic @@ -575,10 +579,10 @@ Requires: libatomic%{_isa} >= 4.8.0 This package contains GNU Atomic static libraries. %package -n libasan8 -Summary: The Address Sanitizer runtime library from GCC 13 +Summary: The Address Sanitizer runtime library from GCC %{gcc_major} %description -n libasan8 -This package contains the Address Sanitizer library from GCC 13 +This package contains the Address Sanitizer library from GCC %{gcc_major} which is used for -fsanitize=address instrumented programs. %package -n %{?scl_prefix}libasan-devel @@ -695,6 +699,11 @@ so that there cannot be any synchronization problems. %patch -P10 -p0 -b .rh1574936~ %endif %patch -P11 -p0 -b .d-shared-libphobos~ +%patch -P12 -p1 -b .znver5~ +%patch -P13 -p1 -b .pr107071~ +%patch -P14 -p1 -b .vector-merge-1~ +%patch -P15 -p1 -b .vector-merge-2~ +%patch -P16 -p1 -b .vector-merge-3~ %if 0%{?rhel} >= 6 %patch -P100 -p1 -b .fortran-fdec-duplicates~ @@ -740,9 +749,7 @@ rm -f libphobos/testsuite/libphobos.gc/forkgc2.d %if 0%{?rhel} <= 7 %patch -P3019 -p1 -b .dts-test-19~ %endif -%patch -P3020 -p1 -b .dts-test-20~ %patch -P3021 -p1 -b .dts-test-21~ -%patch -P3022 -p1 -b .dts-test-22~ %patch -P3023 -p1 -b .dts-test-23~ find gcc/testsuite -name \*.pr96939~ | xargs rm -f @@ -927,7 +934,7 @@ CONFIGURE_OPTS="\ %endif --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions \ --enable-gnu-unique-object --enable-linker-build-id --with-gcc-major-version-only \ - --enable-libstdcxx-backtrace --with-libstdcxx-zoneinfo=%{_datadir}/zoneinfo \ + --enable-libstdcxx-backtrace --with-libstdcxx-zoneinfo=%{_root_datadir}/zoneinfo \ %ifnarch %{mips} --with-linker-hash-style=gnu \ %endif @@ -1181,7 +1188,8 @@ CC="`$ANNOBIN_FLAGS --build-cc`" CXX="`$ANNOBIN_FLAGS --build-cxx`" \ CFLAGS="$ANNOBIN_CFLAGS1 $ANNOBIN_CFLAGS2 $ANNOBIN_LDFLAGS" \ CXXFLAGS="$ANNOBIN_CFLAGS1 `$ANNOBIN_FLAGS --build-includes` $ANNOBIN_CFLAGS2 $ANNOBIN_LDFLAGS" \ ./configure --with-gcc-plugin-dir=%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin \ - --without-annocheck --without-tests --without-docs --disable-rpath --without-debuginfod + --without-annocheck --without-tests --without-docs --disable-rpath --without-debuginfod \ + --without-clang-plugin --without-llvm-plugin make cd ../.. %endif @@ -1826,7 +1834,7 @@ ln -sf ../../../%{multilib_32_arch}-%{_vendor}-%{_target_os}%{?_gnu}/%{gcc_major %if 0%{?_enable_debug_packages} mkdir -p $RPM_BUILD_ROOT%{?scl:%{_root_prefix}}%{!?scl:%{_prefix}}/lib/debug%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major} adirs="$FULLPATH" -if [ $FULLLPATH -ne $FULLPATH ]; then +if [ "$FULLLPATH" != "$FULLPATH" ]; then adirs="$adirs $FULLLPATH" fi for f in `find $adirs -maxdepth 1 -a \ @@ -1946,7 +1954,7 @@ rm -f %{buildroot}%{_prefix}/%{_lib}/libssp* rm -f %{buildroot}%{_prefix}/%{_lib}/libvtv* || : rm -f %{buildroot}/lib/cpp rm -f %{buildroot}/%{_lib}/libgcc_s* -rm -f %{buildroot}%{_prefix}/bin/{f95,gccbug,gnatgcc*} +rm -f %{buildroot}%{_prefix}/bin/{gccbug,gnatgcc*} rm -f %{buildroot}%{_prefix}/bin/%{gcc_target_platform}-gfortran %if 0%{!?scl:1} rm -f %{buildroot}%{_prefix}/bin/{*c++*,cc,cpp} @@ -2618,6 +2626,7 @@ fi %files gfortran %{_prefix}/bin/gfortran +%{_prefix}/bin/f95 %if 0%{?scl:1} %{_mandir}/man1/gfortran.1* %{_infodir}/gfortran* @@ -2906,6 +2915,130 @@ fi %endif %changelog +* Fri Jul 12 2024 Marek Polacek 13.3.1-2.1 +- fix wrong RTL patterns for vector merge high/low word on LE (RHEL-45191) + +* Tue Jun 11 2024 Marek Polacek 13.3.1-2 +- update from releases/gcc-13 branch + - PRs ada/114398, ada/114708, c/114493, c++/111529, c++/113598, + fortran/110415, fortran/114827, fortran/115150, libstdc++/114940, + libstdc++/115269, middle-end/108789, rtl-optimization/114902, + rtl-optimization/115092, target/113281, target/113719, target/115297, + target/115317, target/115324, tree-optimization/115192, + tree-optimization/115307, tree-optimization/115337 +- fix a shell condition (RHEL-40722) +- backport a fix for modes_1.f90 (RHEL-40234) +- fix up pointer types to may_alias structures (PR c/114493, RHEL-39736) + +* Mon Jun 3 2024 Marek Polacek 13.3.1-1 +- update from releases/gcc-13 branch + - GCC 13.3 release + - PRs analyzer/104042, analyzer/108171, analyzer/109251, analyzer/109577, + analyzer/110014, analyzer/110112, analyzer/110700, analyzer/110882, + analyzer/111289, analyzer/112790, analyzer/112889, analyzer/112969, + analyzer/113253, analyzer/113333, analyzer/114408, analyzer/114473, + bootstrap/106472, bootstrap/114369, c/112571, c/114780, c++/89224, + c++/97990, c++/100667, c++/103825, c++/110006, c++/111284, c++/112769, + c++/113141, c++/113966, c++/114303, c++/114377, c++/114537, + c++/114561, c++/114562, c++/114572, c++/114580, c++/114634, + c++/114691, c++/114709, debug/112718, driver/111700, fortran/36337, + fortran/50410, fortran/55978, fortran/89462, fortran/93678, + fortran/95374, fortran/101135, fortran/102003, fortran/103707, + fortran/103715, fortran/103716, fortran/104352, fortran/106987, + fortran/106999, fortran/107426, fortran/110987, fortran/112407, + fortran/113799, fortran/113866, fortran/113885, fortran/113956, + fortran/114001, fortran/114474, fortran/114535, fortran/114739, + fortran/114825, fortran/115039, gcov-profile/114115, + gcov-profile/114715, ipa/92606, ipa/108007, ipa/111571, ipa/112616, + ipa/113359, ipa/113907, ipa/113964, jit/110466, libgcc/111731, + libquadmath/114533, libstdc++/66146, libstdc++/93672, + libstdc++/104606, libstdc++/107800, libstdc++/108976, + libstdc++/110050, libstdc++/110054, libstdc++/113841, + libstdc++/114147, libstdc++/114316, libstdc++/114359, + libstdc++/114367, libstdc++/114401, libstdc++/114750, + libstdc++/114803, libstdc++/114863, libstdc++/115063, lto/114655, + middle-end/110027, middle-end/111151, middle-end/111632, + middle-end/111683, middle-end/112684, middle-end/112732, + middle-end/113396, middle-end/113622, middle-end/114070, + middle-end/114348, middle-end/114552, middle-end/114599, + middle-end/114734, middle-end/114753, middle-end/114907, + rtl-optimization/54052, rtl-optimization/114415, + rtl-optimization/114768, rtl-optimization/114924, sanitizer/97696, + sanitizer/114687, sanitizer/114743, sanitizer/114956, + sanitizer/115172, target/88309, target/101865, target/105522, + target/110621, target/111234, target/111600, target/111610, + target/111822, target/112397, target/113095, target/113233, + target/113950, target/114049, target/114130, target/114160, + target/114172, target/114175, target/114272, target/114747, + target/114752, target/114794, target/114837, target/114848, + target/114981, testsuite/111066, testsuite/112297, testsuite/114034, + testsuite/114036, testsuite/114662, tree-optimization/91838, + tree-optimization/109925, tree-optimization/110838, + tree-optimization/111009, tree-optimization/111268, + tree-optimization/111407, tree-optimization/111736, + tree-optimization/111882, tree-optimization/112281, + tree-optimization/112303, tree-optimization/112793, + tree-optimization/112961, tree-optimization/112991, + tree-optimization/113552, tree-optimization/113630, + tree-optimization/113670, tree-optimization/113831, + tree-optimization/113910, tree-optimization/114027, + tree-optimization/114115, tree-optimization/114121, + tree-optimization/114203, tree-optimization/114231, + tree-optimization/114246, tree-optimization/114375, + tree-optimization/114396, tree-optimization/114485, + tree-optimization/114566, tree-optimization/114672, + tree-optimization/114733, tree-optimization/114736, + tree-optimization/114749, tree-optimization/114787, + tree-optimization/114799, tree-optimization/114876, + tree-optimization/114965, tree-optimization/115143, + tree-optimization/115152, tree-optimization/115154 +- add --without-clang-plugin --without-llvm-plugin to annobin configure + options + +* Mon Dec 11 2023 Marek Polacek 13.2.1-6.2 +- use the system dir in --with-libstdcxx-zoneinfo (RHEL-20522) + +* Mon Dec 11 2023 Marek Polacek 13.2.1-6.1 +- add f95 (RHEL-17656) + +* Wed Dec 6 2023 Marek Polacek 13.2.1-6 +- update from releases/gcc-13 branch + - PRs c++/33799, c++/102191, c++/111703, c++/112269, c++/112301, c++/112633, + c/112339, fortran/111880, fortran/112764, libgomp/111413, + libstdc++/112348, libstdc++/112491, libstdc++/112607, + middle-end/111497, target/53372, target/110411, target/111408, + target/111815, target/111828, target/112672, tree-optimization/111137, + tree-optimization/111465, tree-optimization/111967, + tree-optimization/112496 +- add -fno-stack-protector to aarch64 tests (RHEL-16940) + +* Mon Nov 13 2023 Marek Polacek 13.2.1-5 +- update from releases/gcc-13 branch + - PRs c++/89038, c/111884, d/110712, d/112270, fortran/67740, fortran/97245, + fortran/111837, fortran/112316, libbacktrace/111315, + libbacktrace/112263, libstdc++/110944, libstdc++/111172, + libstdc++/111936, libstdc++/112089, libstdc++/112314, + middle-end/111253, middle-end/111818, modula2/111756, modula2/112110, + target/101177, target/110170, target/111001, target/111366, + target/111367, target/111380, target/111935, target/112443, + tree-optimization/111397, tree-optimization/111445, + tree-optimization/111489, tree-optimization/111583, + tree-optimization/111614, tree-optimization/111622, + tree-optimization/111694, tree-optimization/111764, + tree-optimization/111820, tree-optimization/111833, + tree-optimization/111917 + - fix aarch64 RA ICE (#2241139, PR target/111528) +- fix ia32 doubleword rotates (#2238781, PR target/110792) + +* Thu Nov 9 2023 Marek Polacek 13.2.1-4 +- update from releases/gcc-13 branch + - PRs ada/110488, ada/111434, c++/99631, c++/111471, c++/111485, c++/111493, + c++/111512, fortran/68155, fortran/92586, fortran/111674, + libstdc++/108046, libstdc++/111050, libstdc++/111102, + libstdc++/111511, middle-end/111699, modula2/111510, target/111121, + target/111411, tree-optimization/110315, tree-optimization/110386, + tree-optimization/111331, tree-optimization/111519 + * Thu Jul 6 2023 Marek Polacek 13.1.1-4.3 - fix utf-1.C with -gdwarf-4 (#2217506)