Commit 0cbc5fc8 authored by sje's avatar sje
Browse files

2011-02-04 Richard Henderson <rth@redhat.com>

	    Steve Ellcey  <sje@cup.hp.com>

	PR target/46997
	* config/ia64/predicates.md (mux1_brcst_element): New.
	* config/ia64/ia64-protos.h (ia64_unpack_assemble): New.
	* config/ia64/ia64.c (ia64_unpack_assemble): New.
	(ia64_unpack_sign): New.
	(ia64_expand_unpack): Rewrite using new routines.
	(ia64_expand_widen_sum): Ditto.
	(ia64_expand_dot_prod_v8qi): Ditto.
	* config/ia64/vect.md (mulv8qi3): Rewrite to use new
	routines, add endian check.
	(pmpy2_even): Rename from pmpy2_r, add endian check.
	(pmpy2_odd): Rename from pmpy2_l, add endian check.
	(vec_widen_smult_lo_v4hi): Rewrite using new routines.
	(vec_widen_smult_hi_v4hi): Ditto.
	(vec_widen_umult_lo_v4hi): Ditto.
	(vec_widen_umult_hi_v4hi): Ditto.
	(mulv2si3): Change endian checks.
	(sdot_prodv4hi): Rewrite with new calls.
	(udot_prodv4hi): New.
	(vec_pack_ssat_v4hi): Add endian check.
	(vec_pack_usat_v4hi): Ditto.
	(vec_pack_ssat_v2si): Ditto.
	(max1_even): Rename from max1_r, add endian check.
	(max1_odd): Rename from max1_l, add endian check.
	(*mux1_rev): Format change.
	(*mux1_mix): Ditto.
	(*mux1_shuf): Ditto.
	(*mux1_alt): Ditto.
	(*mux1_brcst_v8qi): Use new predicate.
	(vec_extract_evenv8qi): Remove endian check.
	(vec_extract_oddv8qi): Ditto.
	(vec_interleave_lowv4hi): Format change.
	(vec_interleave_highv4hi): Ditto.
	(mix2_even): Rename from mix2_r, add endian check.
	(mix2_odd): Rename from mux2_l, add endian check.
	(*mux2): Fix mask setting for TARGET_BIG_ENDIAN.
	(vec_extract_evenodd_helper): Format change.
	(vec_extract_evenv4hi): Remove endian check.
	(vec_extract_oddv4hi): Remove endian check.
	(vec_interleave_lowv2si): Format change.
	(vec_interleave_highv2si): Format change.
	(vec_initv2si): Remove endian check.
	(vecinit_v2si): Add endian check.
	(reduc_splus_v2sf): Add endian check.
	(reduc_smax_v2sf): Ditto.
	(reduc_smin_v2sf): Ditto.
	(vec_initv2sf): Remove endian check.
	(fpack): Add endian check.
	(fswap): Add endian check.
	(vec_interleave_highv2sf): Add endian check.
	(vec_interleave_lowv2sf): Add endian check.
	(fmix_lr): Add endian check.
	(vec_setv2sf): Format change.
	(*vec_extractv2sf_0_be): Use shift to extract operand.
	(*vec_extractv2sf_1_be): New.
	(vec_pack_trunc_v4hi): Add endian check.
	(vec_pack_trunc_v2si): Format change.


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@169840 138bc75d-0d04-0410-961f-82ee72b054a4
parent 797db64b
2011-02-04 Richard Henderson <rth@redhat.com>
Steve Ellcey <sje@cup.hp.com>
PR target/46997
* config/ia64/predicates.md (mux1_brcst_element): New.
* config/ia64/ia64-protos.h (ia64_unpack_assemble): New.
* config/ia64/ia64.c (ia64_unpack_assemble): New.
(ia64_unpack_sign): New.
(ia64_expand_unpack): Rewrite using new routines.
(ia64_expand_widen_sum): Ditto.
(ia64_expand_dot_prod_v8qi): Ditto.
* config/ia64/vect.md (mulv8qi3): Rewrite to use new
routines, add endian check.
(pmpy2_even): Rename from pmpy2_r, add endian check.
(pmpy2_odd): Rename from pmpy2_l, add endian check.
(vec_widen_smult_lo_v4hi): Rewrite using new routines.
(vec_widen_smult_hi_v4hi): Ditto.
(vec_widen_umult_lo_v4hi): Ditto.
(vec_widen_umult_hi_v4hi): Ditto.
(mulv2si3): Change endian checks.
(sdot_prodv4hi): Rewrite with new calls.
(udot_prodv4hi): New.
(vec_pack_ssat_v4hi): Add endian check.
(vec_pack_usat_v4hi): Ditto.
(vec_pack_ssat_v2si): Ditto.
(max1_even): Rename from max1_r, add endian check.
(max1_odd): Rename from max1_l, add endian check.
(*mux1_rev): Format change.
(*mux1_mix): Ditto.
(*mux1_shuf): Ditto.
(*mux1_alt): Ditto.
(*mux1_brcst_v8qi): Use new predicate.
(vec_extract_evenv8qi): Remove endian check.
(vec_extract_oddv8qi): Ditto.
(vec_interleave_lowv4hi): Format change.
(vec_interleave_highv4hi): Ditto.
(mix2_even): Rename from mix2_r, add endian check.
(mix2_odd): Rename from mux2_l, add endian check.
(*mux2): Fix mask setting for TARGET_BIG_ENDIAN.
(vec_extract_evenodd_helper): Format change.
(vec_extract_evenv4hi): Remove endian check.
(vec_extract_oddv4hi): Remove endian check.
(vec_interleave_lowv2si): Format change.
(vec_interleave_highv2si): Format change.
(vec_initv2si): Remove endian check.
(vecinit_v2si): Add endian check.
(reduc_splus_v2sf): Add endian check.
(reduc_smax_v2sf): Ditto.
(reduc_smin_v2sf): Ditto.
(vec_initv2sf): Remove endian check.
(fpack): Add endian check.
(fswap): Add endian check.
(vec_interleave_highv2sf): Add endian check.
(vec_interleave_lowv2sf): Add endian check.
(fmix_lr): Add endian check.
(vec_setv2sf): Format change.
(*vec_extractv2sf_0_be): Use shift to extract operand.
(*vec_extractv2sf_1_be): New.
(vec_pack_trunc_v4hi): Add endian check.
(vec_pack_trunc_v2si): Format change.
2011-02-04 Jakub Jelinek <jakub@redhat.com> 2011-02-04 Jakub Jelinek <jakub@redhat.com>
PR inline-asm/23200 PR inline-asm/23200
......
...@@ -39,9 +39,9 @@ extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]); ...@@ -39,9 +39,9 @@ extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]);
extern void ia64_expand_compare (rtx *, rtx *, rtx *); extern void ia64_expand_compare (rtx *, rtx *, rtx *);
extern void ia64_expand_vecint_cmov (rtx[]); extern void ia64_expand_vecint_cmov (rtx[]);
extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]); extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
extern void ia64_unpack_assemble (rtx, rtx, rtx, bool);
extern void ia64_expand_unpack (rtx [], bool, bool); extern void ia64_expand_unpack (rtx [], bool, bool);
extern void ia64_expand_widen_sum (rtx[], bool); extern void ia64_expand_widen_sum (rtx[], bool);
extern void ia64_expand_widen_mul_v4hi (rtx [], bool, bool);
extern void ia64_expand_dot_prod_v8qi (rtx[], bool); extern void ia64_expand_dot_prod_v8qi (rtx[], bool);
extern void ia64_expand_call (rtx, rtx, rtx, int); extern void ia64_expand_call (rtx, rtx, rtx, int);
extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int); extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);
......
...@@ -1972,12 +1972,13 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode, ...@@ -1972,12 +1972,13 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
return true; return true;
} }
/* Emit an integral vector unpack operation. */ /* The vectors LO and HI each contain N halves of a double-wide vector.
Reassemble either the first N/2 or the second N/2 elements. */
void void
ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp) ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
{ {
enum machine_mode mode = GET_MODE (operands[1]); enum machine_mode mode = GET_MODE (lo);
rtx (*gen) (rtx, rtx, rtx); rtx (*gen) (rtx, rtx, rtx);
rtx x; rtx x;
...@@ -1993,110 +1994,66 @@ ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp) ...@@ -1993,110 +1994,66 @@ ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
gcc_unreachable (); gcc_unreachable ();
} }
/* Fill in x with the sign extension of each element in op1. */ x = gen_lowpart (mode, out);
if (unsignedp)
x = CONST0_RTX (mode);
else
{
bool neg;
x = gen_reg_rtx (mode);
neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
CONST0_RTX (mode));
gcc_assert (!neg);
}
if (TARGET_BIG_ENDIAN) if (TARGET_BIG_ENDIAN)
emit_insn (gen (gen_lowpart (mode, operands[0]), x, operands[1])); x = gen (x, hi, lo);
else else
emit_insn (gen (gen_lowpart (mode, operands[0]), operands[1], x)); x = gen (x, lo, hi);
emit_insn (x);
} }
/* Emit an integral vector widening sum operations. */ /* Return a vector of the sign-extension of VEC. */
void static rtx
ia64_expand_widen_sum (rtx operands[3], bool unsignedp) ia64_unpack_sign (rtx vec, bool unsignedp)
{ {
rtx l, h, x, s; enum machine_mode mode = GET_MODE (vec);
enum machine_mode wmode, mode; rtx zero = CONST0_RTX (mode);
rtx (*unpack_l) (rtx, rtx, rtx);
rtx (*unpack_h) (rtx, rtx, rtx);
rtx (*plus) (rtx, rtx, rtx);
wmode = GET_MODE (operands[0]);
mode = GET_MODE (operands[1]);
switch (mode)
{
case V8QImode:
unpack_l = gen_vec_interleave_lowv8qi;
unpack_h = gen_vec_interleave_highv8qi;
plus = gen_addv4hi3;
break;
case V4HImode:
unpack_l = gen_vec_interleave_lowv4hi;
unpack_h = gen_vec_interleave_highv4hi;
plus = gen_addv2si3;
break;
default:
gcc_unreachable ();
}
/* Fill in x with the sign extension of each element in op1. */
if (unsignedp) if (unsignedp)
x = CONST0_RTX (mode); return zero;
else else
{ {
rtx sign = gen_reg_rtx (mode);
bool neg; bool neg;
x = gen_reg_rtx (mode); neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
CONST0_RTX (mode));
gcc_assert (!neg); gcc_assert (!neg);
return sign;
} }
}
l = gen_reg_rtx (wmode); /* Emit an integral vector unpack operation. */
h = gen_reg_rtx (wmode);
s = gen_reg_rtx (wmode);
if (TARGET_BIG_ENDIAN) void
{ ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
emit_insn (unpack_l (gen_lowpart (mode, l), x, operands[1])); {
emit_insn (unpack_h (gen_lowpart (mode, h), x, operands[1])); rtx sign = ia64_unpack_sign (operands[1], unsignedp);
} ia64_unpack_assemble (operands[0], operands[1], sign, highp);
else
{
emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
}
emit_insn (plus (s, l, operands[2]));
emit_insn (plus (operands[0], h, s));
} }
/* Emit an integral vector widening sum operations. */
void void
ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp) ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
{ {
rtx l = gen_reg_rtx (V4HImode); enum machine_mode wmode;
rtx h = gen_reg_rtx (V4HImode); rtx l, h, t, sign;
rtx (*mulhigh)(rtx, rtx, rtx, rtx);
rtx (*interl)(rtx, rtx, rtx);
emit_insn (gen_mulv4hi3 (l, operands[1], operands[2])); sign = ia64_unpack_sign (operands[1], unsignedp);
wmode = GET_MODE (operands[0]);
l = gen_reg_rtx (wmode);
h = gen_reg_rtx (wmode);
/* For signed, pmpy2.r would appear to more closely match this operation. ia64_unpack_assemble (l, operands[1], sign, false);
However, the vectorizer is more likely to use the LO and HI patterns ia64_unpack_assemble (h, operands[1], sign, true);
in pairs. At which point, with this formulation, the first two insns
of each can be CSEd. */
mulhigh = unsignedp ? gen_pmpyshr2_u : gen_pmpyshr2;
emit_insn (mulhigh (h, operands[1], operands[2], GEN_INT (16)));
interl = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi; t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
if (TARGET_BIG_ENDIAN) t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
emit_insn (interl (gen_lowpart (V4HImode, operands[0]), h, l)); if (t != operands[0])
else emit_move_insn (operands[0], t);
emit_insn (interl (gen_lowpart (V4HImode, operands[0]), l, h));
} }
/* Emit a signed or unsigned V8QI dot product operation. */ /* Emit a signed or unsigned V8QI dot product operation. */
...@@ -2104,62 +2061,31 @@ ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp) ...@@ -2104,62 +2061,31 @@ ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp)
void void
ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp) ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
{ {
rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3; rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
rtx p1, p2, p3, p4, s1, s2, s3;
/* Fill in x1 and x2 with the sign extension of each element. */ op1 = operands[1];
if (unsignedp) op2 = operands[2];
x1 = x2 = CONST0_RTX (V8QImode); sn1 = ia64_unpack_sign (op1, unsignedp);
else sn2 = ia64_unpack_sign (op2, unsignedp);
{
bool neg;
x1 = gen_reg_rtx (V8QImode);
x2 = gen_reg_rtx (V8QImode);
neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
CONST0_RTX (V8QImode));
gcc_assert (!neg);
neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
CONST0_RTX (V8QImode));
gcc_assert (!neg);
}
l1 = gen_reg_rtx (V4HImode); l1 = gen_reg_rtx (V4HImode);
l2 = gen_reg_rtx (V4HImode); l2 = gen_reg_rtx (V4HImode);
h1 = gen_reg_rtx (V4HImode); h1 = gen_reg_rtx (V4HImode);
h2 = gen_reg_rtx (V4HImode); h2 = gen_reg_rtx (V4HImode);
ia64_unpack_assemble (l1, op1, sn1, false);
if (TARGET_BIG_ENDIAN) ia64_unpack_assemble (l2, op2, sn2, false);
{ ia64_unpack_assemble (h1, op1, sn1, true);
emit_insn (gen_vec_interleave_lowv8qi ia64_unpack_assemble (h2, op2, sn2, true);
(gen_lowpart (V8QImode, l1), x1, operands[1]));
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l2), x2, operands[2]));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h1), x1, operands[1]));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h2), x2, operands[2]));
}
else
{
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l1), operands[1], x1));
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l2), operands[2], x2));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h1), operands[1], x1));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h2), operands[2], x2));
}
p1 = gen_reg_rtx (V2SImode); p1 = gen_reg_rtx (V2SImode);
p2 = gen_reg_rtx (V2SImode); p2 = gen_reg_rtx (V2SImode);
p3 = gen_reg_rtx (V2SImode); p3 = gen_reg_rtx (V2SImode);
p4 = gen_reg_rtx (V2SImode); p4 = gen_reg_rtx (V2SImode);
emit_insn (gen_pmpy2_r (p1, l1, l2)); emit_insn (gen_pmpy2_even (p1, l1, l2));
emit_insn (gen_pmpy2_l (p2, l1, l2)); emit_insn (gen_pmpy2_even (p2, h1, h2));
emit_insn (gen_pmpy2_r (p3, h1, h2)); emit_insn (gen_pmpy2_odd (p3, l1, l2));
emit_insn (gen_pmpy2_l (p4, h1, h2)); emit_insn (gen_pmpy2_odd (p4, h1, h2));
s1 = gen_reg_rtx (V2SImode); s1 = gen_reg_rtx (V2SImode);
s2 = gen_reg_rtx (V2SImode); s2 = gen_reg_rtx (V2SImode);
......
...@@ -624,3 +624,7 @@ ...@@ -624,3 +624,7 @@
return REG_P (op) && REG_POINTER (op); return REG_P (op) && REG_POINTER (op);
}) })
;; True if this is the right-most vector element; for mux1 @brcst.
(define_predicate "mux1_brcst_element"
(and (match_code "const_int")
(match_test "INTVAL (op) == (TARGET_BIG_ENDIAN ? 7 : 0)")))
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment