From b02c7dc4db9e49eca28789c43532a47d915ceb5c Mon Sep 17 00:00:00 2001 From: MSVSphere Packaging Team Date: Thu, 5 Sep 2024 03:34:02 +0300 Subject: [PATCH] import gcc-toolset-12-gcc-12.2.1-7.7.el9_4 --- SOURCES/gcc12-vector-merge-1.patch | 522 +++++++++++++++++++++++++++++ SOURCES/gcc12-vector-merge-2.patch | 240 +++++++++++++ SOURCES/gcc12-vector-merge-3.patch | 306 +++++++++++++++++ SPECS/gcc.spec | 11 +- 4 files changed, 1078 insertions(+), 1 deletion(-) create mode 100644 SOURCES/gcc12-vector-merge-1.patch create mode 100644 SOURCES/gcc12-vector-merge-2.patch create mode 100644 SOURCES/gcc12-vector-merge-3.patch diff --git a/SOURCES/gcc12-vector-merge-1.patch b/SOURCES/gcc12-vector-merge-1.patch new file mode 100644 index 0000000..f3d6ff9 --- /dev/null +++ b/SOURCES/gcc12-vector-merge-1.patch @@ -0,0 +1,522 @@ +commit 96ef3367067219c8e3eb88c0474a1090cc7749b4 +Author: Kewen Lin +Date: Thu Jun 20 20:23:56 2024 -0500 + + rs6000: Fix wrong RTL patterns for vector merge high/low word on LE + + Commit r12-4496 changes some define_expands and define_insns + for vector merge high/low word, which are altivec_vmrg[hl]w, + vsx_xxmrg[hl]w_. These defines are mainly for + built-in function vec_merge{h,l}, __builtin_vsx_xxmrghw, + __builtin_vsx_xxmrghw_4si and some internal gen function + needs. These functions should consider endianness, taking + vec_mergeh as example, as PVIPR defines, vec_mergeh "Merges + the first halves (in element order) of two vectors", it does + note it's in element order. So it's mapped into vmrghw on + BE while vmrglw on LE respectively. Although the mapped + insns are different, as the discussion in PR106069, the RTL + pattern should be still the same, it is conformed before + commit r12-4496, define_expand altivec_vmrghw got expanded + into: + + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "register_operand" "wa,v") + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + + on both BE and LE then. But commit r12-4496 changed it to + expand into: + + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "register_operand" "wa,v") + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + + on BE, and + + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "register_operand" "wa,v") + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + + on LE, although the mapped insn are still vmrghw on BE and + vmrglw on LE, the associated RTL pattern is completely + wrong and inconsistent with the mapped insn. If optimization + passes leave this pattern alone, even if its pattern doesn't + represent its mapped insn, it's still fine, that's why simple + testing on bif doesn't expose this issue. But once some + optimization pass such as combine does some changes basing + on this wrong pattern, because the pattern doesn't match the + semantics that the expanded insn is intended to represent, + it would cause the unexpected result. + + So this patch is to fix the wrong RTL pattern, ensure the + associated RTL patterns become the same as before which can + have the same semantic as their mapped insns. With the + proposed patch, the expanders like altivec_vmrghw expands + into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le + depending on endianness, "direct" can easily show which + insn would be generated, _be and _le are mainly for the + different RTL patterns as endianness. + + Co-authored-by: Xionghu Luo + + PR target/106069 + PR target/115355 + + gcc/ChangeLog: + + * config/rs6000/altivec.md (altivec_vmrghw_direct_): Rename + to ... + (altivec_vmrghw_direct__be): ... this. Add the condition + BYTES_BIG_ENDIAN. + (altivec_vmrghw_direct__le): New define_insn. + (altivec_vmrglw_direct_): Rename to ... + (altivec_vmrglw_direct__be): ... this. Add the condition + BYTES_BIG_ENDIAN. + (altivec_vmrglw_direct__le): New define_insn. + (altivec_vmrghw): Adjust by calling gen_altivec_vmrghw_direct_v4si_be + for BE and gen_altivec_vmrglw_direct_v4si_le for LE. + (altivec_vmrglw): Adjust by calling gen_altivec_vmrglw_direct_v4si_be + for BE and gen_altivec_vmrghw_direct_v4si_le for LE. + (vec_widen_umult_hi_v8hi): Adjust the call to + gen_altivec_vmrghw_direct_v4si by gen_altivec_vmrghw for BE + and by gen_altivec_vmrglw for LE. + (vec_widen_smult_hi_v8hi): Likewise. + (vec_widen_umult_lo_v8hi): Adjust the call to + gen_altivec_vmrglw_direct_v4si by gen_altivec_vmrglw for BE + and by gen_altivec_vmrghw for LE + (vec_widen_smult_lo_v8hi): Likewise. + * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace + CODE_FOR_altivec_vmrghw_direct_v4si by + CODE_FOR_altivec_vmrghw_direct_v4si_be for BE and + CODE_FOR_altivec_vmrghw_direct_v4si_le for LE. And replace + CODE_FOR_altivec_vmrglw_direct_v4si by + CODE_FOR_altivec_vmrglw_direct_v4si_be for BE and + CODE_FOR_altivec_vmrglw_direct_v4si_le for LE. + * config/rs6000/vsx.md (vsx_xxmrghw_): Adjust by calling + gen_altivec_vmrghw_direct_v4si_be for BE and + gen_altivec_vmrglw_direct_v4si_le for LE. + (vsx_xxmrglw_): Adjust by calling + gen_altivec_vmrglw_direct_v4si_be for BE and + gen_altivec_vmrghw_direct_v4si_le for LE. + + gcc/testsuite/ChangeLog: + + * g++.target/powerpc/pr106069.C: New test. + * gcc.target/powerpc/pr115355.c: New test. + + (cherry picked from commit 52c112800d9f44457c4832309a48c00945811313) + +diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md +index 3849db5ca3c..0c408a9e839 100644 +--- a/gcc/config/rs6000/altivec.md ++++ b/gcc/config/rs6000/altivec.md +@@ -1212,16 +1212,18 @@ (define_expand "altivec_vmrghw" + (use (match_operand:V4SI 2 "register_operand"))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_v4si +- : gen_altivec_vmrglw_direct_v4si; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrghw_direct_" ++(define_insn "altivec_vmrghw_direct__be" + [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") + (vec_select:VSX_W + (vec_concat: +@@ -1229,7 +1231,21 @@ (define_insn "altivec_vmrghw_direct_" + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "@ ++ xxmrghw %x0,%x1,%x2 ++ vmrghw %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrghw_direct__le" ++ [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") ++ (vec_select:VSX_W ++ (vec_concat: ++ (match_operand:VSX_W 2 "register_operand" "wa,v") ++ (match_operand:VSX_W 1 "register_operand" "wa,v")) ++ (parallel [(const_int 2) (const_int 6) ++ (const_int 3) (const_int 7)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "@ + xxmrghw %x0,%x1,%x2 + vmrghw %0,%1,%2" +@@ -1318,16 +1334,18 @@ (define_expand "altivec_vmrglw" + (use (match_operand:V4SI 2 "register_operand"))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_v4si +- : gen_altivec_vmrghw_direct_v4si; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrglw_direct_" ++(define_insn "altivec_vmrglw_direct__be" + [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") + (vec_select:VSX_W + (vec_concat: +@@ -1335,7 +1353,21 @@ (define_insn "altivec_vmrglw_direct_" + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "@ ++ xxmrglw %x0,%x1,%x2 ++ vmrglw %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrglw_direct__le" ++ [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") ++ (vec_select:VSX_W ++ (vec_concat: ++ (match_operand:VSX_W 2 "register_operand" "wa,v") ++ (match_operand:VSX_W 1 "register_operand" "wa,v")) ++ (parallel [(const_int 0) (const_int 4) ++ (const_int 1) (const_int 5)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "@ + xxmrglw %x0,%x1,%x2 + vmrglw %0,%1,%2" +@@ -3807,13 +3839,13 @@ (define_expand "vec_widen_umult_hi_v8hi" + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + DONE; + }) +@@ -3832,13 +3864,13 @@ (define_expand "vec_widen_umult_lo_v8hi" + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + DONE; + }) +@@ -3857,13 +3889,13 @@ (define_expand "vec_widen_smult_hi_v8hi" + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + DONE; + }) +@@ -3882,13 +3914,13 @@ (define_expand "vec_widen_smult_lo_v8hi" + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + DONE; + }) +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index f5db6436dfa..23b553131a9 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -22979,8 +22979,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrglh_direct, + {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si +- : CODE_FOR_altivec_vmrglw_direct_v4si, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be ++ : CODE_FOR_altivec_vmrglw_direct_v4si_le, + {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct +@@ -22991,8 +22991,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrghh_direct, + {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si +- : CODE_FOR_altivec_vmrghw_direct_v4si, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be ++ : CODE_FOR_altivec_vmrghw_direct_v4si_le, + {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}}, + {OPTION_MASK_P8_VECTOR, + BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct +diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md +index e16f893c073..226a1049917 100644 +--- a/gcc/config/rs6000/vsx.md ++++ b/gcc/config/rs6000/vsx.md +@@ -4694,12 +4694,14 @@ (define_expand "vsx_xxmrghw_" + (const_int 1) (const_int 5)])))] + "VECTOR_MEM_VSX_P (mode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_ +- : gen_altivec_vmrglw_direct_; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + } + [(set_attr "type" "vecperm")]) +@@ -4714,12 +4716,14 @@ (define_expand "vsx_xxmrglw_" + (const_int 3) (const_int 7)])))] + "VECTOR_MEM_VSX_P (mode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_ +- : gen_altivec_vmrghw_direct_; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + } + [(set_attr "type" "vecperm")]) +diff --git a/gcc/testsuite/g++.target/powerpc/pr106069.C b/gcc/testsuite/g++.target/powerpc/pr106069.C +new file mode 100644 +index 00000000000..537207d2fe8 +--- /dev/null ++++ b/gcc/testsuite/g++.target/powerpc/pr106069.C +@@ -0,0 +1,119 @@ ++/* { dg-options "-O -fno-tree-forwprop -maltivec" } */ ++/* { dg-require-effective-target vmx_hw } */ ++/* { dg-do run } */ ++ ++typedef __attribute__ ((altivec (vector__))) unsigned native_simd_type; ++ ++union ++{ ++ native_simd_type V; ++ int R[4]; ++} store_le_vec; ++ ++struct S ++{ ++ S () = default; ++ S (unsigned B0) ++ { ++ native_simd_type val{B0}; ++ m_simd = val; ++ } ++ void store_le (unsigned int out[]) ++ { ++ store_le_vec.V = m_simd; ++ unsigned int x0 = store_le_vec.R[0]; ++ __builtin_memcpy (out, &x0, 4); ++ } ++ S rotl (unsigned int r) ++ { ++ native_simd_type rot{r}; ++ return __builtin_vec_rl (m_simd, rot); ++ } ++ void operator+= (S other) ++ { ++ m_simd = __builtin_vec_add (m_simd, other.m_simd); ++ } ++ void operator^= (S other) ++ { ++ m_simd = __builtin_vec_xor (m_simd, other.m_simd); ++ } ++ static void transpose (S &B0, S B1, S B2, S B3) ++ { ++ native_simd_type T0 = __builtin_vec_mergeh (B0.m_simd, B2.m_simd); ++ native_simd_type T1 = __builtin_vec_mergeh (B1.m_simd, B3.m_simd); ++ native_simd_type T2 = __builtin_vec_mergel (B0.m_simd, B2.m_simd); ++ native_simd_type T3 = __builtin_vec_mergel (B1.m_simd, B3.m_simd); ++ B0 = __builtin_vec_mergeh (T0, T1); ++ B3 = __builtin_vec_mergel (T2, T3); ++ } ++ S (native_simd_type x) : m_simd (x) {} ++ native_simd_type m_simd; ++}; ++ ++void ++foo (unsigned int output[], unsigned state[]) ++{ ++ S R00 = state[0]; ++ S R01 = state[0]; ++ S R02 = state[2]; ++ S R03 = state[0]; ++ S R05 = state[5]; ++ S R06 = state[6]; ++ S R07 = state[7]; ++ S R08 = state[8]; ++ S R09 = state[9]; ++ S R10 = state[10]; ++ S R11 = state[11]; ++ S R12 = state[12]; ++ S R13 = state[13]; ++ S R14 = state[4]; ++ S R15 = state[15]; ++ for (int r = 0; r != 10; ++r) ++ { ++ R09 += R13; ++ R11 += R15; ++ R05 ^= R09; ++ R06 ^= R10; ++ R07 ^= R11; ++ R07 = R07.rotl (7); ++ R00 += R05; ++ R01 += R06; ++ R02 += R07; ++ R15 ^= R00; ++ R12 ^= R01; ++ R13 ^= R02; ++ R00 += R05; ++ R01 += R06; ++ R02 += R07; ++ R15 ^= R00; ++ R12 = R12.rotl (8); ++ R13 = R13.rotl (8); ++ R10 += R15; ++ R11 += R12; ++ R08 += R13; ++ R09 += R14; ++ R05 ^= R10; ++ R06 ^= R11; ++ R07 ^= R08; ++ R05 = R05.rotl (7); ++ R06 = R06.rotl (7); ++ R07 = R07.rotl (7); ++ } ++ R00 += state[0]; ++ S::transpose (R00, R01, R02, R03); ++ R00.store_le (output); ++} ++ ++unsigned int res[1]; ++unsigned main_state[]{1634760805, 60878, 2036477234, 6, ++ 0, 825562964, 1471091955, 1346092787, ++ 506976774, 4197066702, 518848283, 118491664, ++ 0, 0, 0, 0}; ++int ++main () ++{ ++ foo (res, main_state); ++ if (res[0] != 0x41fcef98) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/powerpc/pr115355.c b/gcc/testsuite/gcc.target/powerpc/pr115355.c +new file mode 100644 +index 00000000000..8955126b808 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/powerpc/pr115355.c +@@ -0,0 +1,37 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target p9vector_hw } */ ++/* Force vectorization with -fno-vect-cost-model to have vector unpack ++ which exposes the issue in PR115355. */ ++/* { dg-options "-O2 -mdejagnu-cpu=power9 -fno-vect-cost-model" } */ ++ ++/* Verify it runs successfully. */ ++ ++__attribute__((noipa)) ++void setToIdentityGOOD(unsigned long long *mVec, unsigned int mLen) ++{ ++ #pragma GCC novector ++ for (unsigned int i = 0; i < mLen; i++) ++ mVec[i] = i; ++} ++ ++__attribute__((noipa)) ++void setToIdentityBAD(unsigned long long *mVec, unsigned int mLen) ++{ ++ for (unsigned int i = 0; i < mLen; i++) ++ mVec[i] = i; ++} ++ ++unsigned long long vec1[100]; ++unsigned long long vec2[100]; ++ ++int main() ++{ ++ unsigned int l = 29; ++ setToIdentityGOOD (vec1, 29); ++ setToIdentityBAD (vec2, 29); ++ ++ if (__builtin_memcmp (vec1, vec2, l * sizeof (vec1[0])) != 0) ++ __builtin_abort (); ++ ++ return 0; ++} diff --git a/SOURCES/gcc12-vector-merge-2.patch b/SOURCES/gcc12-vector-merge-2.patch new file mode 100644 index 0000000..974f6e1 --- /dev/null +++ b/SOURCES/gcc12-vector-merge-2.patch @@ -0,0 +1,240 @@ +commit 13f0528c782c3732052973a5d340769af8182c8f +Author: Kewen Lin +Date: Wed Jun 26 02:16:17 2024 -0500 + + rs6000: Fix wrong RTL patterns for vector merge high/low char on LE + + Commit r12-4496 changes some define_expands and define_insns + for vector merge high/low char, which are altivec_vmrg[hl]b. + These defines are mainly for built-in function vec_merge{h,l} + and some internal gen function needs. These functions should + consider endianness, taking vec_mergeh as example, as PVIPR + defines, vec_mergeh "Merges the first halves (in element order) + of two vectors", it does note it's in element order. So it's + mapped into vmrghb on BE while vmrglb on LE respectively. + Although the mapped insns are different, as the discussion in + PR106069, the RTL pattern should be still the same, it is + conformed before commit r12-4496, but gets changed into + different patterns on BE and LE starting from commit r12-4496. + Similar to 32-bit element case in commit log of r15-1504, this + 8-bit element pattern on LE doesn't actually match what the + underlying insn is intended to represent, once some optimization + like combine does some changes basing on it, it would cause + the unexpected consequence. The newly constructed test case + pr106069-1.c is a typical example for this issue. + + So this patch is to fix the wrong RTL pattern, ensure the + associated RTL patterns become the same as before which can + have the same semantic as their mapped insns. With the + proposed patch, the expanders like altivec_vmrghb expands + into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le + depending on endianness, "direct" can easily show which + insn would be generated, _be and _le are mainly for the + different RTL patterns as endianness. + + Co-authored-by: Xionghu Luo + + PR target/106069 + PR target/115355 + + gcc/ChangeLog: + + * config/rs6000/altivec.md (altivec_vmrghb_direct): Rename to ... + (altivec_vmrghb_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrghb_direct_le): New define_insn. + (altivec_vmrglb_direct): Rename to ... + (altivec_vmrglb_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrglb_direct_le): New define_insn. + (altivec_vmrghb): Adjust by calling gen_altivec_vmrghb_direct_be + for BE and gen_altivec_vmrglb_direct_le for LE. + (altivec_vmrglb): Adjust by calling gen_altivec_vmrglb_direct_be + for BE and gen_altivec_vmrghb_direct_le for LE. + * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace + CODE_FOR_altivec_vmrghb_direct by + CODE_FOR_altivec_vmrghb_direct_be for BE and + CODE_FOR_altivec_vmrghb_direct_le for LE. And replace + CODE_FOR_altivec_vmrglb_direct by + CODE_FOR_altivec_vmrglb_direct_be for BE and + CODE_FOR_altivec_vmrglb_direct_le for LE. + + gcc/testsuite/ChangeLog: + + * gcc.target/powerpc/pr106069-1.c: New test. + + (cherry picked from commit 62520e4e9f7e2fe8a16ee57a4bd35da2e921ae22) + +diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md +index 0c408a9e839..b8baae679c4 100644 +--- a/gcc/config/rs6000/altivec.md ++++ b/gcc/config/rs6000/altivec.md +@@ -1152,15 +1152,16 @@ (define_expand "altivec_vmrghb" + (use (match_operand:V16QI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghb_direct +- : gen_altivec_vmrglb_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrghb_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrglb_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrghb_direct" ++(define_insn "altivec_vmrghb_direct_be" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (vec_select:V16QI + (vec_concat:V32QI +@@ -1174,7 +1175,25 @@ (define_insn "altivec_vmrghb_direct" + (const_int 5) (const_int 21) + (const_int 6) (const_int 22) + (const_int 7) (const_int 23)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrghb %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrghb_direct_le" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (vec_select:V16QI ++ (vec_concat:V32QI ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 1 "register_operand" "v")) ++ (parallel [(const_int 8) (const_int 24) ++ (const_int 9) (const_int 25) ++ (const_int 10) (const_int 26) ++ (const_int 11) (const_int 27) ++ (const_int 12) (const_int 28) ++ (const_int 13) (const_int 29) ++ (const_int 14) (const_int 30) ++ (const_int 15) (const_int 31)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrghb %0,%1,%2" + [(set_attr "type" "vecperm")]) + +@@ -1274,15 +1293,16 @@ (define_expand "altivec_vmrglb" + (use (match_operand:V16QI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglb_direct +- : gen_altivec_vmrghb_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrglb_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrghb_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrglb_direct" ++(define_insn "altivec_vmrglb_direct_be" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (vec_select:V16QI + (vec_concat:V32QI +@@ -1296,7 +1316,25 @@ (define_insn "altivec_vmrglb_direct" + (const_int 13) (const_int 29) + (const_int 14) (const_int 30) + (const_int 15) (const_int 31)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrglb %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrglb_direct_le" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (vec_select:V16QI ++ (vec_concat:V32QI ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 1 "register_operand" "v")) ++ (parallel [(const_int 0) (const_int 16) ++ (const_int 1) (const_int 17) ++ (const_int 2) (const_int 18) ++ (const_int 3) (const_int 19) ++ (const_int 4) (const_int 20) ++ (const_int 5) (const_int 21) ++ (const_int 6) (const_int 22) ++ (const_int 7) (const_int 23)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrglb %0,%1,%2" + [(set_attr "type" "vecperm")]) + +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index 23b553131a9..e8ce629182b 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -22971,8 +22971,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + CODE_FOR_altivec_vpkuwum_direct, + {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct +- : CODE_FOR_altivec_vmrglb_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct_be ++ : CODE_FOR_altivec_vmrglb_direct_le, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct +@@ -22983,8 +22983,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrglw_direct_v4si_le, + {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct +- : CODE_FOR_altivec_vmrghb_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct_be ++ : CODE_FOR_altivec_vmrghb_direct_le, + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct +diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-1.c b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c +new file mode 100644 +index 00000000000..4945d8fedfb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c +@@ -0,0 +1,39 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++/* { dg-require-effective-target vmx_hw } */ ++ ++/* Test vector merge for 8-bit element size, ++ it will abort if the RTL pattern isn't expected. */ ++ ++#include "altivec.h" ++ ++__attribute__((noipa)) ++signed char elem_6 (vector signed char a, vector signed char b) ++{ ++ vector signed char c = vec_mergeh (a,b); ++ return vec_extract (c, 6); ++} ++ ++__attribute__((noipa)) ++unsigned char elem_15 (vector unsigned char a, vector unsigned char b) ++{ ++ vector unsigned char c = vec_mergel (a,b); ++ return vec_extract (c, 15); ++} ++ ++int ++main () ++{ ++ vector unsigned char v1 ++ = {3, 33, 22, 12, 34, 14, 5, 25, 30, 11, 0, 21, 17, 27, 38, 8}; ++ vector unsigned char v2 ++ = {81, 82, 83, 84, 68, 67, 66, 65, 99, 100, 101, 102, 250, 125, 0, 6}; ++ signed char x1 = elem_6 ((vector signed char) v1, (vector signed char) v2); ++ unsigned char x2 = elem_15 (v1, v2); ++ ++ if (x1 != 12 || x2 != 6) ++ __builtin_abort (); ++ ++ return 0; ++} ++ diff --git a/SOURCES/gcc12-vector-merge-3.patch b/SOURCES/gcc12-vector-merge-3.patch new file mode 100644 index 0000000..0360b1d --- /dev/null +++ b/SOURCES/gcc12-vector-merge-3.patch @@ -0,0 +1,306 @@ +commit ca6eea0eb33de8b2e23e0bef3466575bb14ab63f +Author: Kewen Lin +Date: Wed Jun 26 02:16:17 2024 -0500 + + rs6000: Fix wrong RTL patterns for vector merge high/low short on LE + + Commit r12-4496 changes some define_expands and define_insns + for vector merge high/low short, which are altivec_vmrg[hl]h. + These defines are mainly for built-in function vec_merge{h,l} + and some internal gen function needs. These functions should + consider endianness, taking vec_mergeh as example, as PVIPR + defines, vec_mergeh "Merges the first halves (in element order) + of two vectors", it does note it's in element order. So it's + mapped into vmrghh on BE while vmrglh on LE respectively. + Although the mapped insns are different, as the discussion in + PR106069, the RTL pattern should be still the same, it is + conformed before commit r12-4496, but gets changed into + different patterns on BE and LE starting from commit r12-4496. + Similar to 32-bit element case in commit log of r15-1504, this + 16-bit element pattern on LE doesn't actually match what the + underlying insn is intended to represent, once some optimization + like combine does some changes basing on it, it would cause + the unexpected consequence. The newly constructed test case + pr106069-2.c is a typical example for this issue on element type + short. + + So this patch is to fix the wrong RTL pattern, ensure the + associated RTL patterns become the same as before which can + have the same semantic as their mapped insns. With the + proposed patch, the expanders like altivec_vmrghh expands + into altivec_vmrghh_direct_be or altivec_vmrglh_direct_le + depending on endianness, "direct" can easily show which + insn would be generated, _be and _le are mainly for the + different RTL patterns as endianness. + + Co-authored-by: Xionghu Luo + + PR target/106069 + PR target/115355 + + gcc/ChangeLog: + + * config/rs6000/altivec.md (altivec_vmrghh_direct): Rename to ... + (altivec_vmrghh_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrghh_direct_le): New define_insn. + (altivec_vmrglh_direct): Rename to ... + (altivec_vmrglh_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrglh_direct_le): New define_insn. + (altivec_vmrghh): Adjust by calling gen_altivec_vmrghh_direct_be + for BE and gen_altivec_vmrglh_direct_le for LE. + (altivec_vmrglh): Adjust by calling gen_altivec_vmrglh_direct_be + for BE and gen_altivec_vmrghh_direct_le for LE. + (vec_widen_umult_hi_v16qi): Adjust the call to + gen_altivec_vmrghh_direct by gen_altivec_vmrghh for BE + and by gen_altivec_vmrglh for LE. + (vec_widen_smult_hi_v16qi): Likewise. + (vec_widen_umult_lo_v16qi): Adjust the call to + gen_altivec_vmrglh_direct by gen_altivec_vmrglh for BE + and by gen_altivec_vmrghh for LE. + (vec_widen_smult_lo_v16qi): Likewise. + * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace + CODE_FOR_altivec_vmrghh_direct by + CODE_FOR_altivec_vmrghh_direct_be for BE and + CODE_FOR_altivec_vmrghh_direct_le for LE. And replace + CODE_FOR_altivec_vmrglh_direct by + CODE_FOR_altivec_vmrglh_direct_be for BE and + CODE_FOR_altivec_vmrglh_direct_le for LE. + + gcc/testsuite/ChangeLog: + + * gcc.target/powerpc/pr106069-2.c: New test. + + (cherry picked from commit 812c70bf4981958488331d4ea5af8709b5321da1) + +diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md +index b8baae679c4..50689e418ed 100644 +--- a/gcc/config/rs6000/altivec.md ++++ b/gcc/config/rs6000/altivec.md +@@ -1203,17 +1203,18 @@ (define_expand "altivec_vmrghh" + (use (match_operand:V8HI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghh_direct +- : gen_altivec_vmrglh_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrghh_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrglh_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrghh_direct" ++(define_insn "altivec_vmrghh_direct_be" + [(set (match_operand:V8HI 0 "register_operand" "=v") +- (vec_select:V8HI ++ (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")) +@@ -1221,7 +1222,21 @@ (define_insn "altivec_vmrghh_direct" + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrghh %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrghh_direct_le" ++ [(set (match_operand:V8HI 0 "register_operand" "=v") ++ (vec_select:V8HI ++ (vec_concat:V16HI ++ (match_operand:V8HI 2 "register_operand" "v") ++ (match_operand:V8HI 1 "register_operand" "v")) ++ (parallel [(const_int 4) (const_int 12) ++ (const_int 5) (const_int 13) ++ (const_int 6) (const_int 14) ++ (const_int 7) (const_int 15)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrghh %0,%1,%2" + [(set_attr "type" "vecperm")]) + +@@ -1344,15 +1359,16 @@ (define_expand "altivec_vmrglh" + (use (match_operand:V8HI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglh_direct +- : gen_altivec_vmrghh_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrglh_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrghh_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrglh_direct" ++(define_insn "altivec_vmrglh_direct_be" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (vec_select:V8HI + (vec_concat:V16HI +@@ -1362,7 +1378,21 @@ (define_insn "altivec_vmrglh_direct" + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrglh %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrglh_direct_le" ++ [(set (match_operand:V8HI 0 "register_operand" "=v") ++ (vec_select:V8HI ++ (vec_concat:V16HI ++ (match_operand:V8HI 2 "register_operand" "v") ++ (match_operand:V8HI 1 "register_operand" "v")) ++ (parallel [(const_int 0) (const_int 8) ++ (const_int 1) (const_int 9) ++ (const_int 2) (const_int 10) ++ (const_int 3) (const_int 11)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrglh %0,%1,%2" + [(set_attr "type" "vecperm")]) + +@@ -3777,13 +3807,13 @@ (define_expand "vec_widen_umult_hi_v16qi" + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + DONE; + }) +@@ -3802,13 +3832,13 @@ (define_expand "vec_widen_umult_lo_v16qi" + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + DONE; + }) +@@ -3827,13 +3857,13 @@ (define_expand "vec_widen_smult_hi_v16qi" + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + DONE; + }) +@@ -3852,13 +3882,13 @@ (define_expand "vec_widen_smult_lo_v16qi" + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + DONE; + }) +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index e8ce629182b..34be43c9f84 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -22975,8 +22975,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrglb_direct_le, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct +- : CODE_FOR_altivec_vmrglh_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct_be ++ : CODE_FOR_altivec_vmrglh_direct_le, + {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be +@@ -22987,8 +22987,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrghb_direct_le, + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct +- : CODE_FOR_altivec_vmrghh_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct_be ++ : CODE_FOR_altivec_vmrghh_direct_le, + {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be +diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-2.c b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c +new file mode 100644 +index 00000000000..283e3290fb3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c +@@ -0,0 +1,37 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++/* { dg-require-effective-target vmx_hw } */ ++ ++/* Test vector merge for 16-bit element size, ++ it will abort if the RTL pattern isn't expected. */ ++ ++#include "altivec.h" ++ ++__attribute__((noipa)) ++signed short elem_2 (vector signed short a, vector signed short b) ++{ ++ vector signed short c = vec_mergeh (a,b); ++ return vec_extract (c, 2); ++} ++ ++__attribute__((noipa)) ++unsigned short elem_7 (vector unsigned short a, vector unsigned short b) ++{ ++ vector unsigned short c = vec_mergel (a,b); ++ return vec_extract (c, 7); ++} ++ ++int ++main () ++{ ++ vector unsigned short v1 = {3, 22, 12, 34, 5, 25, 30, 11}; ++ vector unsigned short v2 = {84, 168, 267, 966, 65, 399, 999, 99}; ++ signed short x1 = elem_2 ((vector signed short) v1, (vector signed short) v2); ++ unsigned short x2 = elem_7 (v1, v2); ++ ++ if (x1 != 22 || x2 != 99) ++ __builtin_abort (); ++ ++ return 0; ++} ++ diff --git a/SPECS/gcc.spec b/SPECS/gcc.spec index d9942ba..73e9958 100644 --- a/SPECS/gcc.spec +++ b/SPECS/gcc.spec @@ -147,7 +147,7 @@ Summary: GCC version 12 Name: %{?scl_prefix}gcc Version: %{gcc_version} -Release: %{gcc_release}.6%{?dist} +Release: %{gcc_release}.7%{?dist} # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have # GCC Runtime Exception. License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD @@ -350,6 +350,9 @@ Patch12: gcc12-pr107468.patch Patch15: gcc12-static-libquadmath.patch Patch16: gcc12-FMA-chains.patch Patch17: gcc12-pr113960.patch +Patch18: gcc12-vector-merge-1.patch +Patch19: gcc12-vector-merge-2.patch +Patch20: gcc12-vector-merge-3.patch Patch100: gcc12-fortran-fdec-duplicates.patch Patch101: gcc12-fortran-flogical-as-integer.patch @@ -731,6 +734,9 @@ so that there cannot be any synchronization problems. %patch15 -p0 -b .static-libquadmath~ %patch16 -p1 -b .fma~ %patch17 -p1 -b .pr113960~ +%patch18 -p1 -b .vector-merge-1~ +%patch19 -p1 -b .vector-merge-2~ +%patch20 -p1 -b .vector-merge-3~ %if 0%{?rhel} >= 6 %patch100 -p1 -b .fortran-fdec-duplicates~ @@ -2990,6 +2996,9 @@ fi %endif %changelog +* Thu Jul 11 2024 Marek Polacek 12.2.1-7.7 +- fix wrong RTL patterns for vector merge high/low word on LE (RHEL-44850) + * Wed Apr 3 2024 Marek Polacek 12.2.1-7.6 - bump NVR (RHEL-30832)