Commit 0cbc5fc8 authored by sje's avatar sje
Browse files

2011-02-04 Richard Henderson <rth@redhat.com>

	    Steve Ellcey  <sje@cup.hp.com>

	PR target/46997
	* config/ia64/predicates.md (mux1_brcst_element): New.
	* config/ia64/ia64-protos.h (ia64_unpack_assemble): New.
	* config/ia64/ia64.c (ia64_unpack_assemble): New.
	(ia64_unpack_sign): New.
	(ia64_expand_unpack): Rewrite using new routines.
	(ia64_expand_widen_sum): Ditto.
	(ia64_expand_dot_prod_v8qi): Ditto.
	* config/ia64/vect.md (mulv8qi3): Rewrite to use new
	routines, add endian check.
	(pmpy2_even): Rename from pmpy2_r, add endian check.
	(pmpy2_odd): Rename from pmpy2_l, add endian check.
	(vec_widen_smult_lo_v4hi): Rewrite using new routines.
	(vec_widen_smult_hi_v4hi): Ditto.
	(vec_widen_umult_lo_v4hi): Ditto.
	(vec_widen_umult_hi_v4hi): Ditto.
	(mulv2si3): Change endian checks.
	(sdot_prodv4hi): Rewrite with new calls.
	(udot_prodv4hi): New.
	(vec_pack_ssat_v4hi): Add endian check.
	(vec_pack_usat_v4hi): Ditto.
	(vec_pack_ssat_v2si): Ditto.
	(max1_even): Rename from max1_r, add endian check.
	(max1_odd): Rename from max1_l, add endian check.
	(*mux1_rev): Format change.
	(*mux1_mix): Ditto.
	(*mux1_shuf): Ditto.
	(*mux1_alt): Ditto.
	(*mux1_brcst_v8qi): Use new predicate.
	(vec_extract_evenv8qi): Remove endian check.
	(vec_extract_oddv8qi): Ditto.
	(vec_interleave_lowv4hi): Format change.
	(vec_interleave_highv4hi): Ditto.
	(mix2_even): Rename from mix2_r, add endian check.
	(mix2_odd): Rename from mux2_l, add endian check.
	(*mux2): Fix mask setting for TARGET_BIG_ENDIAN.
	(vec_extract_evenodd_helper): Format change.
	(vec_extract_evenv4hi): Remove endian check.
	(vec_extract_oddv4hi): Remove endian check.
	(vec_interleave_lowv2si): Format change.
	(vec_interleave_highv2si): Format change.
	(vec_initv2si): Remove endian check.
	(vecinit_v2si): Add endian check.
	(reduc_splus_v2sf): Add endian check.
	(reduc_smax_v2sf): Ditto.
	(reduc_smin_v2sf): Ditto.
	(vec_initv2sf): Remove endian check.
	(fpack): Add endian check.
	(fswap): Add endian check.
	(vec_interleave_highv2sf): Add endian check.
	(vec_interleave_lowv2sf): Add endian check.
	(fmix_lr): Add endian check.
	(vec_setv2sf): Format change.
	(*vec_extractv2sf_0_be): Use shift to extract operand.
	(*vec_extractv2sf_1_be): New.
	(vec_pack_trunc_v4hi): Add endian check.
	(vec_pack_trunc_v2si): Format change.


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@169840 138bc75d-0d04-0410-961f-82ee72b054a4
parent 797db64b
2011-02-04 Richard Henderson <rth@redhat.com>
Steve Ellcey <sje@cup.hp.com>
PR target/46997
* config/ia64/predicates.md (mux1_brcst_element): New.
* config/ia64/ia64-protos.h (ia64_unpack_assemble): New.
* config/ia64/ia64.c (ia64_unpack_assemble): New.
(ia64_unpack_sign): New.
(ia64_expand_unpack): Rewrite using new routines.
(ia64_expand_widen_sum): Ditto.
(ia64_expand_dot_prod_v8qi): Ditto.
* config/ia64/vect.md (mulv8qi3): Rewrite to use new
routines, add endian check.
(pmpy2_even): Rename from pmpy2_r, add endian check.
(pmpy2_odd): Rename from pmpy2_l, add endian check.
(vec_widen_smult_lo_v4hi): Rewrite using new routines.
(vec_widen_smult_hi_v4hi): Ditto.
(vec_widen_umult_lo_v4hi): Ditto.
(vec_widen_umult_hi_v4hi): Ditto.
(mulv2si3): Change endian checks.
(sdot_prodv4hi): Rewrite with new calls.
(udot_prodv4hi): New.
(vec_pack_ssat_v4hi): Add endian check.
(vec_pack_usat_v4hi): Ditto.
(vec_pack_ssat_v2si): Ditto.
(max1_even): Rename from max1_r, add endian check.
(max1_odd): Rename from max1_l, add endian check.
(*mux1_rev): Format change.
(*mux1_mix): Ditto.
(*mux1_shuf): Ditto.
(*mux1_alt): Ditto.
(*mux1_brcst_v8qi): Use new predicate.
(vec_extract_evenv8qi): Remove endian check.
(vec_extract_oddv8qi): Ditto.
(vec_interleave_lowv4hi): Format change.
(vec_interleave_highv4hi): Ditto.
(mix2_even): Rename from mix2_r, add endian check.
(mix2_odd): Rename from mux2_l, add endian check.
(*mux2): Fix mask setting for TARGET_BIG_ENDIAN.
(vec_extract_evenodd_helper): Format change.
(vec_extract_evenv4hi): Remove endian check.
(vec_extract_oddv4hi): Remove endian check.
(vec_interleave_lowv2si): Format change.
(vec_interleave_highv2si): Format change.
(vec_initv2si): Remove endian check.
(vecinit_v2si): Add endian check.
(reduc_splus_v2sf): Add endian check.
(reduc_smax_v2sf): Ditto.
(reduc_smin_v2sf): Ditto.
(vec_initv2sf): Remove endian check.
(fpack): Add endian check.
(fswap): Add endian check.
(vec_interleave_highv2sf): Add endian check.
(vec_interleave_lowv2sf): Add endian check.
(fmix_lr): Add endian check.
(vec_setv2sf): Format change.
(*vec_extractv2sf_0_be): Use shift to extract operand.
(*vec_extractv2sf_1_be): New.
(vec_pack_trunc_v4hi): Add endian check.
(vec_pack_trunc_v2si): Format change.
2011-02-04 Jakub Jelinek <jakub@redhat.com>
PR inline-asm/23200
......
......@@ -39,9 +39,9 @@ extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]);
extern void ia64_expand_compare (rtx *, rtx *, rtx *);
extern void ia64_expand_vecint_cmov (rtx[]);
extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
extern void ia64_unpack_assemble (rtx, rtx, rtx, bool);
extern void ia64_expand_unpack (rtx [], bool, bool);
extern void ia64_expand_widen_sum (rtx[], bool);
extern void ia64_expand_widen_mul_v4hi (rtx [], bool, bool);
extern void ia64_expand_dot_prod_v8qi (rtx[], bool);
extern void ia64_expand_call (rtx, rtx, rtx, int);
extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);
......
......@@ -1972,12 +1972,13 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
return true;
}
/* Emit an integral vector unpack operation. */
/* The vectors LO and HI each contain N halves of a double-wide vector.
Reassemble either the first N/2 or the second N/2 elements. */
void
ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
{
enum machine_mode mode = GET_MODE (operands[1]);
enum machine_mode mode = GET_MODE (lo);
rtx (*gen) (rtx, rtx, rtx);
rtx x;
......@@ -1993,110 +1994,66 @@ ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
gcc_unreachable ();
}
/* Fill in x with the sign extension of each element in op1. */
if (unsignedp)
x = CONST0_RTX (mode);
else
{
bool neg;
x = gen_reg_rtx (mode);
neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
CONST0_RTX (mode));
gcc_assert (!neg);
}
x = gen_lowpart (mode, out);
if (TARGET_BIG_ENDIAN)
emit_insn (gen (gen_lowpart (mode, operands[0]), x, operands[1]));
x = gen (x, hi, lo);
else
emit_insn (gen (gen_lowpart (mode, operands[0]), operands[1], x));
x = gen (x, lo, hi);
emit_insn (x);
}
/* Emit an integral vector widening sum operations. */
/* Return a vector of the sign-extension of VEC. */
void
ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
static rtx
ia64_unpack_sign (rtx vec, bool unsignedp)
{
rtx l, h, x, s;
enum machine_mode wmode, mode;
rtx (*unpack_l) (rtx, rtx, rtx);
rtx (*unpack_h) (rtx, rtx, rtx);
rtx (*plus) (rtx, rtx, rtx);
wmode = GET_MODE (operands[0]);
mode = GET_MODE (operands[1]);
enum machine_mode mode = GET_MODE (vec);
rtx zero = CONST0_RTX (mode);
switch (mode)
{
case V8QImode:
unpack_l = gen_vec_interleave_lowv8qi;
unpack_h = gen_vec_interleave_highv8qi;
plus = gen_addv4hi3;
break;
case V4HImode:
unpack_l = gen_vec_interleave_lowv4hi;
unpack_h = gen_vec_interleave_highv4hi;
plus = gen_addv2si3;
break;
default:
gcc_unreachable ();
}
/* Fill in x with the sign extension of each element in op1. */
if (unsignedp)
x = CONST0_RTX (mode);
return zero;
else
{
rtx sign = gen_reg_rtx (mode);
bool neg;
x = gen_reg_rtx (mode);
neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
CONST0_RTX (mode));
neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
gcc_assert (!neg);
return sign;
}
}
l = gen_reg_rtx (wmode);
h = gen_reg_rtx (wmode);
s = gen_reg_rtx (wmode);
/* Emit an integral vector unpack operation. */
if (TARGET_BIG_ENDIAN)
{
emit_insn (unpack_l (gen_lowpart (mode, l), x, operands[1]));
emit_insn (unpack_h (gen_lowpart (mode, h), x, operands[1]));
}
else
{
emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
}
emit_insn (plus (s, l, operands[2]));
emit_insn (plus (operands[0], h, s));
void
ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
{
rtx sign = ia64_unpack_sign (operands[1], unsignedp);
ia64_unpack_assemble (operands[0], operands[1], sign, highp);
}
/* Emit an integral vector widening sum operations. */
void
ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp)
ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
{
rtx l = gen_reg_rtx (V4HImode);
rtx h = gen_reg_rtx (V4HImode);
rtx (*mulhigh)(rtx, rtx, rtx, rtx);
rtx (*interl)(rtx, rtx, rtx);
enum machine_mode wmode;
rtx l, h, t, sign;
emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
sign = ia64_unpack_sign (operands[1], unsignedp);
wmode = GET_MODE (operands[0]);
l = gen_reg_rtx (wmode);
h = gen_reg_rtx (wmode);
/* For signed, pmpy2.r would appear to more closely match this operation.
However, the vectorizer is more likely to use the LO and HI patterns
in pairs. At which point, with this formulation, the first two insns
of each can be CSEd. */
mulhigh = unsignedp ? gen_pmpyshr2_u : gen_pmpyshr2;
emit_insn (mulhigh (h, operands[1], operands[2], GEN_INT (16)));
ia64_unpack_assemble (l, operands[1], sign, false);
ia64_unpack_assemble (h, operands[1], sign, true);
interl = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
if (TARGET_BIG_ENDIAN)
emit_insn (interl (gen_lowpart (V4HImode, operands[0]), h, l));
else
emit_insn (interl (gen_lowpart (V4HImode, operands[0]), l, h));
t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
if (t != operands[0])
emit_move_insn (operands[0], t);
}
/* Emit a signed or unsigned V8QI dot product operation. */
......@@ -2104,62 +2061,31 @@ ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp)
void
ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
{
rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
rtx p1, p2, p3, p4, s1, s2, s3;
/* Fill in x1 and x2 with the sign extension of each element. */
if (unsignedp)
x1 = x2 = CONST0_RTX (V8QImode);
else
{
bool neg;
x1 = gen_reg_rtx (V8QImode);
x2 = gen_reg_rtx (V8QImode);
neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
CONST0_RTX (V8QImode));
gcc_assert (!neg);
neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
CONST0_RTX (V8QImode));
gcc_assert (!neg);
}
op1 = operands[1];
op2 = operands[2];
sn1 = ia64_unpack_sign (op1, unsignedp);
sn2 = ia64_unpack_sign (op2, unsignedp);
l1 = gen_reg_rtx (V4HImode);
l2 = gen_reg_rtx (V4HImode);
h1 = gen_reg_rtx (V4HImode);
h2 = gen_reg_rtx (V4HImode);
if (TARGET_BIG_ENDIAN)
{
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l1), x1, operands[1]));
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l2), x2, operands[2]));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h1), x1, operands[1]));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h2), x2, operands[2]));
}
else
{
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l1), operands[1], x1));
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l2), operands[2], x2));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h1), operands[1], x1));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h2), operands[2], x2));
}
ia64_unpack_assemble (l1, op1, sn1, false);
ia64_unpack_assemble (l2, op2, sn2, false);
ia64_unpack_assemble (h1, op1, sn1, true);
ia64_unpack_assemble (h2, op2, sn2, true);
p1 = gen_reg_rtx (V2SImode);
p2 = gen_reg_rtx (V2SImode);
p3 = gen_reg_rtx (V2SImode);
p4 = gen_reg_rtx (V2SImode);
emit_insn (gen_pmpy2_r (p1, l1, l2));
emit_insn (gen_pmpy2_l (p2, l1, l2));
emit_insn (gen_pmpy2_r (p3, h1, h2));
emit_insn (gen_pmpy2_l (p4, h1, h2));
emit_insn (gen_pmpy2_even (p1, l1, l2));
emit_insn (gen_pmpy2_even (p2, h1, h2));
emit_insn (gen_pmpy2_odd (p3, l1, l2));
emit_insn (gen_pmpy2_odd (p4, h1, h2));
s1 = gen_reg_rtx (V2SImode);
s2 = gen_reg_rtx (V2SImode);
......
......@@ -624,3 +624,7 @@
return REG_P (op) && REG_POINTER (op);
})
;; True if this is the right-most vector element; for mux1 @brcst.
(define_predicate "mux1_brcst_element"
(and (match_code "const_int")
(match_test "INTVAL (op) == (TARGET_BIG_ENDIAN ? 7 : 0)")))
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment