307 lines
12 KiB
307 lines
12 KiB
commit bab38d9271ce3f26cb64b8cb712351eb3fedd559
|
|
Author: Kewen Lin <linkw@linux.ibm.com>
|
|
Date: Wed Jun 26 02:16:17 2024 -0500
|
|
|
|
rs6000: Fix wrong RTL patterns for vector merge high/low short on LE
|
|
|
|
Commit r12-4496 changes some define_expands and define_insns
|
|
for vector merge high/low short, which are altivec_vmrg[hl]h.
|
|
These defines are mainly for built-in function vec_merge{h,l}
|
|
and some internal gen function needs. These functions should
|
|
consider endianness, taking vec_mergeh as example, as PVIPR
|
|
defines, vec_mergeh "Merges the first halves (in element order)
|
|
of two vectors", it does note it's in element order. So it's
|
|
mapped into vmrghh on BE while vmrglh on LE respectively.
|
|
Although the mapped insns are different, as the discussion in
|
|
PR106069, the RTL pattern should be still the same, it is
|
|
conformed before commit r12-4496, but gets changed into
|
|
different patterns on BE and LE starting from commit r12-4496.
|
|
Similar to 32-bit element case in commit log of r15-1504, this
|
|
16-bit element pattern on LE doesn't actually match what the
|
|
underlying insn is intended to represent, once some optimization
|
|
like combine does some changes basing on it, it would cause
|
|
the unexpected consequence. The newly constructed test case
|
|
pr106069-2.c is a typical example for this issue on element type
|
|
short.
|
|
|
|
So this patch is to fix the wrong RTL pattern, ensure the
|
|
associated RTL patterns become the same as before which can
|
|
have the same semantic as their mapped insns. With the
|
|
proposed patch, the expanders like altivec_vmrghh expands
|
|
into altivec_vmrghh_direct_be or altivec_vmrglh_direct_le
|
|
depending on endianness, "direct" can easily show which
|
|
insn would be generated, _be and _le are mainly for the
|
|
different RTL patterns as endianness.
|
|
|
|
Co-authored-by: Xionghu Luo <xionghuluo@tencent.com>
|
|
|
|
PR target/106069
|
|
PR target/115355
|
|
|
|
gcc/ChangeLog:
|
|
|
|
* config/rs6000/altivec.md (altivec_vmrghh_direct): Rename to ...
|
|
(altivec_vmrghh_direct_be): ... this. Add condition BYTES_BIG_ENDIAN.
|
|
(altivec_vmrghh_direct_le): New define_insn.
|
|
(altivec_vmrglh_direct): Rename to ...
|
|
(altivec_vmrglh_direct_be): ... this. Add condition BYTES_BIG_ENDIAN.
|
|
(altivec_vmrglh_direct_le): New define_insn.
|
|
(altivec_vmrghh): Adjust by calling gen_altivec_vmrghh_direct_be
|
|
for BE and gen_altivec_vmrglh_direct_le for LE.
|
|
(altivec_vmrglh): Adjust by calling gen_altivec_vmrglh_direct_be
|
|
for BE and gen_altivec_vmrghh_direct_le for LE.
|
|
(vec_widen_umult_hi_v16qi): Adjust the call to
|
|
gen_altivec_vmrghh_direct by gen_altivec_vmrghh for BE
|
|
and by gen_altivec_vmrglh for LE.
|
|
(vec_widen_smult_hi_v16qi): Likewise.
|
|
(vec_widen_umult_lo_v16qi): Adjust the call to
|
|
gen_altivec_vmrglh_direct by gen_altivec_vmrglh for BE
|
|
and by gen_altivec_vmrghh for LE.
|
|
(vec_widen_smult_lo_v16qi): Likewise.
|
|
* config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace
|
|
CODE_FOR_altivec_vmrghh_direct by
|
|
CODE_FOR_altivec_vmrghh_direct_be for BE and
|
|
CODE_FOR_altivec_vmrghh_direct_le for LE. And replace
|
|
CODE_FOR_altivec_vmrglh_direct by
|
|
CODE_FOR_altivec_vmrglh_direct_be for BE and
|
|
CODE_FOR_altivec_vmrglh_direct_le for LE.
|
|
|
|
gcc/testsuite/ChangeLog:
|
|
|
|
* gcc.target/powerpc/pr106069-2.c: New test.
|
|
|
|
(cherry picked from commit 812c70bf4981958488331d4ea5af8709b5321da1)
|
|
|
|
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
|
|
index 47664204bc5..6557393a97c 100644
|
|
--- a/gcc/config/rs6000/altivec.md
|
|
+++ b/gcc/config/rs6000/altivec.md
|
|
@@ -1203,17 +1203,18 @@ (define_expand "altivec_vmrghh"
|
|
(use (match_operand:V8HI 2 "register_operand"))]
|
|
"TARGET_ALTIVEC"
|
|
{
|
|
- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghh_direct
|
|
- : gen_altivec_vmrglh_direct;
|
|
- if (!BYTES_BIG_ENDIAN)
|
|
- std::swap (operands[1], operands[2]);
|
|
- emit_insn (fun (operands[0], operands[1], operands[2]));
|
|
+ if (BYTES_BIG_ENDIAN)
|
|
+ emit_insn (
|
|
+ gen_altivec_vmrghh_direct_be (operands[0], operands[1], operands[2]));
|
|
+ else
|
|
+ emit_insn (
|
|
+ gen_altivec_vmrglh_direct_le (operands[0], operands[2], operands[1]));
|
|
DONE;
|
|
})
|
|
|
|
-(define_insn "altivec_vmrghh_direct"
|
|
+(define_insn "altivec_vmrghh_direct_be"
|
|
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
|
- (vec_select:V8HI
|
|
+ (vec_select:V8HI
|
|
(vec_concat:V16HI
|
|
(match_operand:V8HI 1 "register_operand" "v")
|
|
(match_operand:V8HI 2 "register_operand" "v"))
|
|
@@ -1221,7 +1222,21 @@ (define_insn "altivec_vmrghh_direct"
|
|
(const_int 1) (const_int 9)
|
|
(const_int 2) (const_int 10)
|
|
(const_int 3) (const_int 11)])))]
|
|
- "TARGET_ALTIVEC"
|
|
+ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
|
|
+ "vmrghh %0,%1,%2"
|
|
+ [(set_attr "type" "vecperm")])
|
|
+
|
|
+(define_insn "altivec_vmrghh_direct_le"
|
|
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
|
|
+ (vec_select:V8HI
|
|
+ (vec_concat:V16HI
|
|
+ (match_operand:V8HI 2 "register_operand" "v")
|
|
+ (match_operand:V8HI 1 "register_operand" "v"))
|
|
+ (parallel [(const_int 4) (const_int 12)
|
|
+ (const_int 5) (const_int 13)
|
|
+ (const_int 6) (const_int 14)
|
|
+ (const_int 7) (const_int 15)])))]
|
|
+ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
|
|
"vmrghh %0,%1,%2"
|
|
[(set_attr "type" "vecperm")])
|
|
|
|
@@ -1344,15 +1359,16 @@ (define_expand "altivec_vmrglh"
|
|
(use (match_operand:V8HI 2 "register_operand"))]
|
|
"TARGET_ALTIVEC"
|
|
{
|
|
- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglh_direct
|
|
- : gen_altivec_vmrghh_direct;
|
|
- if (!BYTES_BIG_ENDIAN)
|
|
- std::swap (operands[1], operands[2]);
|
|
- emit_insn (fun (operands[0], operands[1], operands[2]));
|
|
+ if (BYTES_BIG_ENDIAN)
|
|
+ emit_insn (
|
|
+ gen_altivec_vmrglh_direct_be (operands[0], operands[1], operands[2]));
|
|
+ else
|
|
+ emit_insn (
|
|
+ gen_altivec_vmrghh_direct_le (operands[0], operands[2], operands[1]));
|
|
DONE;
|
|
})
|
|
|
|
-(define_insn "altivec_vmrglh_direct"
|
|
+(define_insn "altivec_vmrglh_direct_be"
|
|
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
|
(vec_select:V8HI
|
|
(vec_concat:V16HI
|
|
@@ -1362,7 +1378,21 @@ (define_insn "altivec_vmrglh_direct"
|
|
(const_int 5) (const_int 13)
|
|
(const_int 6) (const_int 14)
|
|
(const_int 7) (const_int 15)])))]
|
|
- "TARGET_ALTIVEC"
|
|
+ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
|
|
+ "vmrglh %0,%1,%2"
|
|
+ [(set_attr "type" "vecperm")])
|
|
+
|
|
+(define_insn "altivec_vmrglh_direct_le"
|
|
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
|
|
+ (vec_select:V8HI
|
|
+ (vec_concat:V16HI
|
|
+ (match_operand:V8HI 2 "register_operand" "v")
|
|
+ (match_operand:V8HI 1 "register_operand" "v"))
|
|
+ (parallel [(const_int 0) (const_int 8)
|
|
+ (const_int 1) (const_int 9)
|
|
+ (const_int 2) (const_int 10)
|
|
+ (const_int 3) (const_int 11)])))]
|
|
+ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
|
|
"vmrglh %0,%1,%2"
|
|
[(set_attr "type" "vecperm")])
|
|
|
|
@@ -3777,13 +3807,13 @@ (define_expand "vec_widen_umult_hi_v16qi"
|
|
{
|
|
emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
|
|
emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
|
|
- emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo));
|
|
+ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
|
|
}
|
|
else
|
|
{
|
|
emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
|
|
emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
|
|
- emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve));
|
|
+ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
|
|
}
|
|
DONE;
|
|
})
|
|
@@ -3802,13 +3832,13 @@ (define_expand "vec_widen_umult_lo_v16qi"
|
|
{
|
|
emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
|
|
emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
|
|
- emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo));
|
|
+ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
|
|
}
|
|
else
|
|
{
|
|
emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
|
|
emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
|
|
- emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve));
|
|
+ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
|
|
}
|
|
DONE;
|
|
})
|
|
@@ -3827,13 +3857,13 @@ (define_expand "vec_widen_smult_hi_v16qi"
|
|
{
|
|
emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
|
|
emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
|
|
- emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo));
|
|
+ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
|
|
}
|
|
else
|
|
{
|
|
emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
|
|
emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
|
|
- emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve));
|
|
+ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
|
|
}
|
|
DONE;
|
|
})
|
|
@@ -3852,13 +3882,13 @@ (define_expand "vec_widen_smult_lo_v16qi"
|
|
{
|
|
emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
|
|
emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
|
|
- emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo));
|
|
+ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
|
|
}
|
|
else
|
|
{
|
|
emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
|
|
emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
|
|
- emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve));
|
|
+ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
|
|
}
|
|
DONE;
|
|
})
|
|
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
|
|
index 10088033aa1..76eb89ad529 100644
|
|
--- a/gcc/config/rs6000/rs6000.cc
|
|
+++ b/gcc/config/rs6000/rs6000.cc
|
|
@@ -23170,8 +23170,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
|
|
: CODE_FOR_altivec_vmrglb_direct_le,
|
|
{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
|
|
{OPTION_MASK_ALTIVEC,
|
|
- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
|
|
- : CODE_FOR_altivec_vmrglh_direct,
|
|
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct_be
|
|
+ : CODE_FOR_altivec_vmrglh_direct_le,
|
|
{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
|
|
{OPTION_MASK_ALTIVEC,
|
|
BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be
|
|
@@ -23182,8 +23182,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
|
|
: CODE_FOR_altivec_vmrghb_direct_le,
|
|
{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
|
|
{OPTION_MASK_ALTIVEC,
|
|
- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
|
|
- : CODE_FOR_altivec_vmrghh_direct,
|
|
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct_be
|
|
+ : CODE_FOR_altivec_vmrghh_direct_le,
|
|
{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
|
|
{OPTION_MASK_ALTIVEC,
|
|
BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be
|
|
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-2.c b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c
|
|
new file mode 100644
|
|
index 00000000000..283e3290fb3
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c
|
|
@@ -0,0 +1,37 @@
|
|
+/* { dg-do run } */
|
|
+/* { dg-options "-O2" } */
|
|
+/* { dg-require-effective-target vmx_hw } */
|
|
+
|
|
+/* Test vector merge for 16-bit element size,
|
|
+ it will abort if the RTL pattern isn't expected. */
|
|
+
|
|
+#include "altivec.h"
|
|
+
|
|
+__attribute__((noipa))
|
|
+signed short elem_2 (vector signed short a, vector signed short b)
|
|
+{
|
|
+ vector signed short c = vec_mergeh (a,b);
|
|
+ return vec_extract (c, 2);
|
|
+}
|
|
+
|
|
+__attribute__((noipa))
|
|
+unsigned short elem_7 (vector unsigned short a, vector unsigned short b)
|
|
+{
|
|
+ vector unsigned short c = vec_mergel (a,b);
|
|
+ return vec_extract (c, 7);
|
|
+}
|
|
+
|
|
+int
|
|
+main ()
|
|
+{
|
|
+ vector unsigned short v1 = {3, 22, 12, 34, 5, 25, 30, 11};
|
|
+ vector unsigned short v2 = {84, 168, 267, 966, 65, 399, 999, 99};
|
|
+ signed short x1 = elem_2 ((vector signed short) v1, (vector signed short) v2);
|
|
+ unsigned short x2 = elem_7 (v1, v2);
|
|
+
|
|
+ if (x1 != 22 || x2 != 99)
|
|
+ __builtin_abort ();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|