Commit 221e9a92 authored by dorit's avatar dorit
Browse files

* tree-vectorizer.h (vect_is_simple_reduction): Takes a loop_vec_info

        as argument instead of struct loop.
        (nested_in_vect_loop_p): New function.
        (vect_relevant): Add enum values vect_used_in_outer_by_reduction and
        vect_used_in_outer.
        (is_loop_header_bb_p): New. Used to differentiate loop-header phis
        from other phis in the loop.
        (destroy_loop_vec_info): Add additional argument to declaration.

        * tree-vectorizer.c (supportable_widening_operation): Also check if
        nested_in_vect_loop_p (don't allow changing the order in this case).
        (vect_is_simple_reduction): Takes a loop_vec_info as argument instead
        of struct loop. Call nested_in_vect_loop_p and don't require
        flag_unsafe_math_optimizations if it returns true.
        (new_stmt_vec_info): When setting def_type for phis differentiate
        loop-header phis from other phis.
        (bb_in_loop_p): New function.
        (new_loop_vec_info): Inner-loop phis already have a stmt_vinfo, so just
        update their loop_vinfo.  Order of BB traversal now matters - call
        dfs_enumerate_from with bb_in_loop_p.
        (destroy_loop_vec_info): Takes additional argument to control whether
        stmt_vinfo of the loop stmts should be destroyed as well.
        (vect_is_simple_reduction): Allow the "non-reduction" use of a
        reduction stmt to be defines by a non loop-header phi.
        (vectorize_loops): Call destroy_loop_vec_info with additional argument.

        * tree-vect-transform.c (vectorizable_reduction): Call
        nested_in_vect_loop_p. Check for multitypes in the inner-loop.
        (vectorizable_call): Likewise.
        (vectorizable_conversion): Likewise.
        (vectorizable_operation): Likewise.
        (vectorizable_type_promotion): Likewise.
        (vectorizable_type_demotion): Likewise.
        (vectorizable_store): Likewise.
        (vectorizable_live_operation): Likewise.
        (vectorizable_reduction): Likewise. Also pass loop_info to
        vect_is_simple_reduction instead of loop.
        (vect_init_vector): Call nested_in_vect_loop_p.
        (get_initial_def_for_reduction): Likewise.
        (vect_create_epilog_for_reduction): Likewise.
        (vect_init_vector): Check which loop to work with, in case there's an
        inner-loop.
        (get_initial_def_for_inducion): Extend to handle outer-loop
        vectorization. Fix indentation.
        (vect_get_vec_def_for_operand): Support phis in the case vect_loop_def.
        In the case vect_induction_def get the vector def from the induction
        phi node, instead of calling get_initial_def_for_inducion.
        (get_initial_def_for_reduction): Extend to handle outer-loop
        vectorization.
        (vect_create_epilog_for_reduction): Extend to handle outer-loop
        vectorization.
        (vect_transform_loop): Change assert to just skip this case.  Add a
        dump printout.
        (vect_finish_stmt_generation): Add a couple asserts.

        (vect_estimate_min_profitable_iters): Multiply
        cost of inner-loop stmts (in outer-loop vectorization) by estimated
        inner-loop bound.
        (vect_model_reduction_cost): Don't add reduction epilogue cost in case
        this is an inner-loop reduction in outer-loop vectorization.

        * tree-vect-analyze.c (vect_analyze_scalar_cycles_1): New function.
        Same code as what used to be vect_analyze_scalar_cycles, only with
        additional argument loop, and loop_info passed to
        vect_is_simple_reduction instead of loop.
        (vect_analyze_scalar_cycles): Code factored out into
        vect_analyze_scalar_cycles_1. Call it for each relevant loop-nest.
        Updated documentation.
        (analyze_operations): Check for inner-loop loop-closed exit-phis during
        outer-loop vectorization that are live or not used in the outerloop,
        cause this requires special handling.
        (vect_enhance_data_refs_alignment): Don't consider versioning for
        nested-loops.
        (vect_analyze_data_refs): Check that there are no datarefs in the
        inner-loop.
        (vect_mark_stmts_to_be_vectorized): Also consider vect_used_in_outer
        and vect_used_in_outer_by_reduction cases.
        (process_use): Also consider the case of outer-loop stmt defining an
        inner-loop stmt and vice versa.
        (vect_analyze_loop_1): New function.
        (vect_analyze_loop_form): Extend, to allow a restricted form of nested
        loops.  Call vect_analyze_loop_1.
        (vect_analyze_loop): Skip (inner-)loops within outer-loops that have
        been vectorized.  Call destroy_loop_vec_info with additional argument.

        * tree-vect-patterns.c (vect_recog_widen_sum_pattern): Don't allow
        in the inner-loop when doing outer-loop vectorization. Add
        documentation and printout.
        (vect_recog_dot_prod_pattern): Likewise. Also add check for
        GIMPLE_MODIFY_STMT (in case we encounter a phi in the loop).



git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@127623 138bc75d-0d04-0410-961f-82ee72b054a4
parent 8787bd6a
No related merge requests found
...@@ -124,6 +124,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) ...@@ -124,6 +124,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes; int nbbs = loop->num_nodes;
int byte_misalign; int byte_misalign;
int innerloop_iters, factor;
/* Cost model disabled. */ /* Cost model disabled. */
if (!flag_vect_cost_model) if (!flag_vect_cost_model)
...@@ -152,11 +153,20 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) ...@@ -152,11 +153,20 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
TODO: Consider assigning different costs to different scalar TODO: Consider assigning different costs to different scalar
statements. */ statements. */
/* FORNOW. */
if (loop->inner)
innerloop_iters = 50; /* FIXME */
for (i = 0; i < nbbs; i++) for (i = 0; i < nbbs; i++)
{ {
block_stmt_iterator si; block_stmt_iterator si;
basic_block bb = bbs[i]; basic_block bb = bbs[i];
if (bb->loop_father == loop->inner)
factor = innerloop_iters;
else
factor = 1;
for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
{ {
tree stmt = bsi_stmt (si); tree stmt = bsi_stmt (si);
...@@ -164,8 +174,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) ...@@ -164,8 +174,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
if (!STMT_VINFO_RELEVANT_P (stmt_info) if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info)) && !STMT_VINFO_LIVE_P (stmt_info))
continue; continue;
scalar_single_iter_cost += cost_for_stmt (stmt); scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info); vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
/* FIXME: for stmts in the inner-loop in outer-loop vectorization,
some of the "outside" costs are generated inside the outer-loop. */
vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info); vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
} }
} }
...@@ -1071,6 +1083,9 @@ vect_init_vector (tree stmt, tree vector_var, tree vector_type) ...@@ -1071,6 +1083,9 @@ vect_init_vector (tree stmt, tree vector_var, tree vector_type)
tree new_temp; tree new_temp;
basic_block new_bb; basic_block new_bb;
if (nested_in_vect_loop_p (loop, stmt))
loop = loop->inner;
new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_"); new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
add_referenced_var (new_var); add_referenced_var (new_var);
...@@ -1096,6 +1111,7 @@ vect_init_vector (tree stmt, tree vector_var, tree vector_type) ...@@ -1096,6 +1111,7 @@ vect_init_vector (tree stmt, tree vector_var, tree vector_type)
/* Function get_initial_def_for_induction /* Function get_initial_def_for_induction
Input: Input:
STMT - a stmt that performs an induction operation in the loop.
IV_PHI - the initial value of the induction variable IV_PHI - the initial value of the induction variable
Output: Output:
...@@ -1114,8 +1130,8 @@ get_initial_def_for_induction (tree iv_phi) ...@@ -1114,8 +1130,8 @@ get_initial_def_for_induction (tree iv_phi)
tree vectype = get_vectype_for_scalar_type (scalar_type); tree vectype = get_vectype_for_scalar_type (scalar_type);
int nunits = TYPE_VECTOR_SUBPARTS (vectype); int nunits = TYPE_VECTOR_SUBPARTS (vectype);
edge pe = loop_preheader_edge (loop); edge pe = loop_preheader_edge (loop);
struct loop *iv_loop;
basic_block new_bb; basic_block new_bb;
block_stmt_iterator bsi;
tree vec, vec_init, vec_step, t; tree vec, vec_init, vec_step, t;
tree access_fn; tree access_fn;
tree new_var; tree new_var;
...@@ -1129,8 +1145,13 @@ get_initial_def_for_induction (tree iv_phi) ...@@ -1129,8 +1145,13 @@ get_initial_def_for_induction (tree iv_phi)
int ncopies = vf / nunits; int ncopies = vf / nunits;
tree expr; tree expr;
stmt_vec_info phi_info = vinfo_for_stmt (iv_phi); stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
bool nested_in_vect_loop = false;
tree stmts; tree stmts;
tree stmt = NULL_TREE; imm_use_iterator imm_iter;
use_operand_p use_p;
tree exit_phi;
edge latch_e;
tree loop_arg;
block_stmt_iterator si; block_stmt_iterator si;
basic_block bb = bb_for_stmt (iv_phi); basic_block bb = bb_for_stmt (iv_phi);
...@@ -1139,65 +1160,107 @@ get_initial_def_for_induction (tree iv_phi) ...@@ -1139,65 +1160,107 @@ get_initial_def_for_induction (tree iv_phi)
/* Find the first insertion point in the BB. */ /* Find the first insertion point in the BB. */
si = bsi_after_labels (bb); si = bsi_after_labels (bb);
stmt = bsi_stmt (si);
access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi)); if (INTEGRAL_TYPE_P (scalar_type))
step_expr = build_int_cst (scalar_type, 0);
else
step_expr = build_real (scalar_type, dconst0);
/* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
if (nested_in_vect_loop_p (loop, iv_phi))
{
nested_in_vect_loop = true;
iv_loop = loop->inner;
}
else
iv_loop = loop;
gcc_assert (iv_loop == (bb_for_stmt (iv_phi))->loop_father);
latch_e = loop_latch_edge (iv_loop);
loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
gcc_assert (access_fn); gcc_assert (access_fn);
ok = vect_is_simple_iv_evolution (loop->num, access_fn, ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
&init_expr, &step_expr); &init_expr, &step_expr);
gcc_assert (ok); gcc_assert (ok);
pe = loop_preheader_edge (iv_loop);
/* Create the vector that holds the initial_value of the induction. */ /* Create the vector that holds the initial_value of the induction. */
new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_"); if (nested_in_vect_loop)
add_referenced_var (new_var);
new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
if (stmts)
{ {
new_bb = bsi_insert_on_edge_immediate (pe, stmts); /* iv_loop is nested in the loop to be vectorized. init_expr had already
gcc_assert (!new_bb); been created during vectorization of previous stmts; We obtain it from
the STMT_VINFO_VEC_STMT of the defining stmt. */
tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop));
vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
} }
else
t = NULL_TREE;
t = tree_cons (NULL_TREE, new_name, t);
for (i = 1; i < nunits; i++)
{ {
tree tmp; /* iv_loop is the loop to be vectorized. Create:
vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
add_referenced_var (new_var);
/* Create: new_name = new_name + step_expr */ new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr); if (stmts)
init_stmt = build_gimple_modify_stmt (new_var, tmp); {
new_name = make_ssa_name (new_var, init_stmt); new_bb = bsi_insert_on_edge_immediate (pe, stmts);
GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name; gcc_assert (!new_bb);
}
new_bb = bsi_insert_on_edge_immediate (pe, init_stmt); t = NULL_TREE;
gcc_assert (!new_bb); t = tree_cons (NULL_TREE, init_expr, t);
for (i = 1; i < nunits; i++)
{
tree tmp;
if (vect_print_dump_info (REPORT_DETAILS)) /* Create: new_name_i = new_name + step_expr */
{ tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
fprintf (vect_dump, "created new init_stmt: "); init_stmt = build_gimple_modify_stmt (new_var, tmp);
print_generic_expr (vect_dump, init_stmt, TDF_SLIM); new_name = make_ssa_name (new_var, init_stmt);
} GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
t = tree_cons (NULL_TREE, new_name, t);
new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
gcc_assert (!new_bb);
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "created new init_stmt: ");
print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
}
t = tree_cons (NULL_TREE, new_name, t);
}
/* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
vec = build_constructor_from_list (vectype, nreverse (t));
vec_init = vect_init_vector (iv_phi, vec, vectype);
} }
vec = build_constructor_from_list (vectype, nreverse (t));
vec_init = vect_init_vector (stmt, vec, vectype);
/* Create the vector that holds the step of the induction. */ /* Create the vector that holds the step of the induction. */
expr = build_int_cst (scalar_type, vf); if (nested_in_vect_loop)
new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr); /* iv_loop is nested in the loop to be vectorized. Generate:
vec_step = [S, S, S, S] */
new_name = step_expr;
else
{
/* iv_loop is the loop to be vectorized. Generate:
vec_step = [VF*S, VF*S, VF*S, VF*S] */
expr = build_int_cst (scalar_type, vf);
new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
}
t = NULL_TREE; t = NULL_TREE;
for (i = 0; i < nunits; i++) for (i = 0; i < nunits; i++)
t = tree_cons (NULL_TREE, unshare_expr (new_name), t); t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
vec = build_constructor_from_list (vectype, t); vec = build_constructor_from_list (vectype, t);
vec_step = vect_init_vector (stmt, vec, vectype); vec_step = vect_init_vector (iv_phi, vec, vectype);
/* Create the following def-use cycle: /* Create the following def-use cycle:
loop prolog: loop prolog:
vec_init = [X, X+S, X+2*S, X+3*S] vec_init = ...
vec_step = [VF*S, VF*S, VF*S, VF*S] vec_step = ...
loop: loop:
vec_iv = PHI <vec_init, vec_loop> vec_iv = PHI <vec_init, vec_loop>
... ...
...@@ -1208,7 +1271,7 @@ get_initial_def_for_induction (tree iv_phi) ...@@ -1208,7 +1271,7 @@ get_initial_def_for_induction (tree iv_phi)
/* Create the induction-phi that defines the induction-operand. */ /* Create the induction-phi that defines the induction-operand. */
vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"); vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
add_referenced_var (vec_dest); add_referenced_var (vec_dest);
induction_phi = create_phi_node (vec_dest, loop->header); induction_phi = create_phi_node (vec_dest, iv_loop->header);
set_stmt_info (get_stmt_ann (induction_phi), set_stmt_info (get_stmt_ann (induction_phi),
new_stmt_vec_info (induction_phi, loop_vinfo)); new_stmt_vec_info (induction_phi, loop_vinfo));
induc_def = PHI_RESULT (induction_phi); induc_def = PHI_RESULT (induction_phi);
...@@ -1219,15 +1282,16 @@ get_initial_def_for_induction (tree iv_phi) ...@@ -1219,15 +1282,16 @@ get_initial_def_for_induction (tree iv_phi)
induc_def, vec_step)); induc_def, vec_step));
vec_def = make_ssa_name (vec_dest, new_stmt); vec_def = make_ssa_name (vec_dest, new_stmt);
GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def; GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
bsi = bsi_for_stmt (stmt); bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
vect_finish_stmt_generation (stmt, new_stmt, &bsi); set_stmt_info (get_stmt_ann (new_stmt),
new_stmt_vec_info (new_stmt, loop_vinfo));
/* Set the arguments of the phi node: */ /* Set the arguments of the phi node: */
add_phi_arg (induction_phi, vec_init, loop_preheader_edge (loop)); add_phi_arg (induction_phi, vec_init, pe);
add_phi_arg (induction_phi, vec_def, loop_latch_edge (loop)); add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop));
/* In case the vectorization factor (VF) is bigger than the number /* In case that vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate of elements that we can fit in a vectype (nunits), we have to generate
more than one vector stmt - i.e - we need to "unroll" the more than one vector stmt - i.e - we need to "unroll" the
vector stmt by a factor VF/nunits. For more details see documentation vector stmt by a factor VF/nunits. For more details see documentation
...@@ -1236,6 +1300,8 @@ get_initial_def_for_induction (tree iv_phi) ...@@ -1236,6 +1300,8 @@ get_initial_def_for_induction (tree iv_phi)
if (ncopies > 1) if (ncopies > 1)
{ {
stmt_vec_info prev_stmt_vinfo; stmt_vec_info prev_stmt_vinfo;
/* FORNOW. This restriction should be relaxed. */
gcc_assert (!nested_in_vect_loop);
/* Create the vector that holds the step of the induction. */ /* Create the vector that holds the step of the induction. */
expr = build_int_cst (scalar_type, nunits); expr = build_int_cst (scalar_type, nunits);
...@@ -1244,7 +1310,7 @@ get_initial_def_for_induction (tree iv_phi) ...@@ -1244,7 +1310,7 @@ get_initial_def_for_induction (tree iv_phi)
for (i = 0; i < nunits; i++) for (i = 0; i < nunits; i++)
t = tree_cons (NULL_TREE, unshare_expr (new_name), t); t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
vec = build_constructor_from_list (vectype, t); vec = build_constructor_from_list (vectype, t);
vec_step = vect_init_vector (stmt, vec, vectype); vec_step = vect_init_vector (iv_phi, vec, vectype);
vec_def = induc_def; vec_def = induc_def;
prev_stmt_vinfo = vinfo_for_stmt (induction_phi); prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
...@@ -1252,19 +1318,50 @@ get_initial_def_for_induction (tree iv_phi) ...@@ -1252,19 +1318,50 @@ get_initial_def_for_induction (tree iv_phi)
{ {
tree tmp; tree tmp;
/* vec_i = vec_prev + vec_{step*nunits} */ /* vec_i = vec_prev + vec_step */
tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step); tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp); new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
vec_def = make_ssa_name (vec_dest, new_stmt); vec_def = make_ssa_name (vec_dest, new_stmt);
GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def; GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
bsi = bsi_for_stmt (stmt); bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
vect_finish_stmt_generation (stmt, new_stmt, &bsi); set_stmt_info (get_stmt_ann (new_stmt),
new_stmt_vec_info (new_stmt, loop_vinfo));
STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt; STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
prev_stmt_vinfo = vinfo_for_stmt (new_stmt); prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
} }
} }
if (nested_in_vect_loop)
{
/* Find the loop-closed exit-phi of the induction, and record
the final vector of induction results: */
exit_phi = NULL;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
{
if (!flow_bb_inside_loop_p (iv_loop, bb_for_stmt (USE_STMT (use_p))))
{
exit_phi = USE_STMT (use_p);
break;
}
}
if (exit_phi)
{
stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
/* FORNOW. Currently not supporting the case that an inner-loop induction
is not used in the outer-loop (i.e. only outside the outer-loop). */
gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
&& !STMT_VINFO_LIVE_P (stmt_vinfo));
STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "vector of inductions after inner-loop:");
print_generic_expr (vect_dump, new_stmt, TDF_SLIM);
}
}
}
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
{ {
fprintf (vect_dump, "transform induction: created def-use cycle:"); fprintf (vect_dump, "transform induction: created def-use cycle:");
...@@ -1300,7 +1397,6 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def) ...@@ -1300,7 +1397,6 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
int nunits = TYPE_VECTOR_SUBPARTS (vectype); int nunits = TYPE_VECTOR_SUBPARTS (vectype);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree vec_inv; tree vec_inv;
tree vec_cst; tree vec_cst;
tree t = NULL_TREE; tree t = NULL_TREE;
...@@ -1386,14 +1482,20 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def) ...@@ -1386,14 +1482,20 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
def_stmt_info = vinfo_for_stmt (def_stmt); def_stmt_info = vinfo_for_stmt (def_stmt);
vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
gcc_assert (vec_stmt); gcc_assert (vec_stmt);
vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0); if (TREE_CODE (vec_stmt) == PHI_NODE)
vec_oprnd = PHI_RESULT (vec_stmt);
else
vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
return vec_oprnd; return vec_oprnd;
} }
/* Case 4: operand is defined by a loop header phi - reduction */ /* Case 4: operand is defined by a loop header phi - reduction */
case vect_reduction_def: case vect_reduction_def:
{ {
struct loop *loop;
gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
loop = (bb_for_stmt (def_stmt))->loop_father;
/* Get the def before the loop */ /* Get the def before the loop */
op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
...@@ -1405,8 +1507,12 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def) ...@@ -1405,8 +1507,12 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
{ {
gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
/* Get the def before the loop */ /* Get the def from the vectorized stmt. */
return get_initial_def_for_induction (def_stmt); def_stmt_info = vinfo_for_stmt (def_stmt);
vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
gcc_assert (vec_stmt && (TREE_CODE (vec_stmt) == PHI_NODE));
vec_oprnd = PHI_RESULT (vec_stmt);
return vec_oprnd;
} }
default: default:
...@@ -1487,7 +1593,6 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) ...@@ -1487,7 +1593,6 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info); vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
gcc_assert (vec_stmt_for_operand); gcc_assert (vec_stmt_for_operand);
vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0); vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
return vec_oprnd; return vec_oprnd;
} }
...@@ -1503,7 +1608,11 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, ...@@ -1503,7 +1608,11 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt,
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
gcc_assert (stmt == bsi_stmt (*bsi));
gcc_assert (TREE_CODE (stmt) != LABEL_EXPR);
bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
set_stmt_info (get_stmt_ann (vec_stmt), set_stmt_info (get_stmt_ann (vec_stmt),
new_stmt_vec_info (vec_stmt, loop_vinfo)); new_stmt_vec_info (vec_stmt, loop_vinfo));
...@@ -1571,6 +1680,8 @@ static tree ...@@ -1571,6 +1680,8 @@ static tree
get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def) get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
{ {
stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
int nunits = TYPE_VECTOR_SUBPARTS (vectype); int nunits = TYPE_VECTOR_SUBPARTS (vectype);
enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)); enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
...@@ -1581,8 +1692,14 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def) ...@@ -1581,8 +1692,14 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
tree t = NULL_TREE; tree t = NULL_TREE;
int i; int i;
tree vector_type; tree vector_type;
bool nested_in_vect_loop = false;
gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)); gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
if (nested_in_vect_loop_p (loop, stmt))
nested_in_vect_loop = true;
else
gcc_assert (loop == (bb_for_stmt (stmt))->loop_father);
vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL); vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
switch (code) switch (code)
...@@ -1590,7 +1707,10 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def) ...@@ -1590,7 +1707,10 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
case WIDEN_SUM_EXPR: case WIDEN_SUM_EXPR:
case DOT_PROD_EXPR: case DOT_PROD_EXPR:
case PLUS_EXPR: case PLUS_EXPR:
*adjustment_def = init_val; if (nested_in_vect_loop)
*adjustment_def = vecdef;
else
*adjustment_def = init_val;
/* Create a vector of zeros for init_def. */ /* Create a vector of zeros for init_def. */
if (INTEGRAL_TYPE_P (type)) if (INTEGRAL_TYPE_P (type))
def_for_init = build_int_cst (type, 0); def_for_init = build_int_cst (type, 0);
...@@ -1679,24 +1799,31 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, ...@@ -1679,24 +1799,31 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
tree new_phi; tree new_phi;
block_stmt_iterator exit_bsi; block_stmt_iterator exit_bsi;
tree vec_dest; tree vec_dest;
tree new_temp; tree new_temp = NULL_TREE;
tree new_name; tree new_name;
tree epilog_stmt; tree epilog_stmt = NULL_TREE;
tree new_scalar_dest, exit_phi; tree new_scalar_dest, exit_phi, new_dest;
tree bitsize, bitpos, bytesize; tree bitsize, bitpos, bytesize;
enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)); enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
tree scalar_initial_def; tree adjustment_def;
tree vec_initial_def; tree vec_initial_def;
tree orig_name; tree orig_name;
imm_use_iterator imm_iter; imm_use_iterator imm_iter;
use_operand_p use_p; use_operand_p use_p;
bool extract_scalar_result; bool extract_scalar_result = false;
tree reduction_op; tree reduction_op, expr;
tree orig_stmt; tree orig_stmt;
tree use_stmt; tree use_stmt;
tree operation = GIMPLE_STMT_OPERAND (stmt, 1); tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
bool nested_in_vect_loop = false;
int op_type; int op_type;
if (nested_in_vect_loop_p (loop, stmt))
{
loop = loop->inner;
nested_in_vect_loop = true;
}
op_type = TREE_OPERAND_LENGTH (operation); op_type = TREE_OPERAND_LENGTH (operation);
reduction_op = TREE_OPERAND (operation, op_type-1); reduction_op = TREE_OPERAND (operation, op_type-1);
vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op)); vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
...@@ -1709,7 +1836,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, ...@@ -1709,7 +1836,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
the scalar def before the loop, that defines the initial value the scalar def before the loop, that defines the initial value
of the reduction variable. */ of the reduction variable. */
vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
&scalar_initial_def); &adjustment_def);
add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop)); add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
/* 1.2 set the loop-latch arg for the reduction-phi: */ /* 1.2 set the loop-latch arg for the reduction-phi: */
...@@ -1788,6 +1915,15 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, ...@@ -1788,6 +1915,15 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
bitsize = TYPE_SIZE (scalar_type); bitsize = TYPE_SIZE (scalar_type);
bytesize = TYPE_SIZE_UNIT (scalar_type); bytesize = TYPE_SIZE_UNIT (scalar_type);
/* In case this is a reduction in an inner-loop while vectorizing an outer
loop - we don't need to extract a single scalar result at the end of the
inner-loop. The final vector of partial results will be used in the
vectorized outer-loop, or reduced to a scalar result at the end of the
outer-loop. */
if (nested_in_vect_loop)
goto vect_finalize_reduction;
/* 2.3 Create the reduction code, using one of the three schemes described /* 2.3 Create the reduction code, using one of the three schemes described
above. */ above. */
...@@ -1934,6 +2070,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, ...@@ -1934,6 +2070,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
{ {
tree rhs; tree rhs;
gcc_assert (!nested_in_vect_loop);
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "extract scalar result"); fprintf (vect_dump, "extract scalar result");
...@@ -1952,25 +2089,42 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, ...@@ -1952,25 +2089,42 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT); bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
} }
/* 2.4 Adjust the final result by the initial value of the reduction vect_finalize_reduction:
/* 2.5 Adjust the final result by the initial value of the reduction
variable. (When such adjustment is not needed, then variable. (When such adjustment is not needed, then
'scalar_initial_def' is zero). 'adjustment_def' is zero). For example, if code is PLUS we create:
new_temp = loop_exit_def + adjustment_def */
Create: if (adjustment_def)
s_out4 = scalar_expr <s_out3, scalar_initial_def> */
if (scalar_initial_def)
{ {
tree tmp = build2 (code, scalar_type, new_temp, scalar_initial_def); if (nested_in_vect_loop)
epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp); {
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
new_dest = vect_create_destination_var (scalar_dest, vectype);
}
else
{
gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
expr = build2 (code, scalar_type, new_temp, adjustment_def);
new_dest = vect_create_destination_var (scalar_dest, scalar_type);
}
epilog_stmt = build_gimple_modify_stmt (new_dest, expr);
new_temp = make_ssa_name (new_dest, epilog_stmt);
GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp; GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
#if 0
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
#else
bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT); bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
#endif
} }
/* 2.6 Replace uses of s_out0 with uses of s_out3 */
/* Find the loop-closed-use at the loop exit of the original scalar result. /* 2.6 Handle the loop-exit phi */
/* Replace uses of s_out0 with uses of s_out3:
Find the loop-closed-use at the loop exit of the original scalar result.
(The reduction result is expected to have two immediate uses - one at the (The reduction result is expected to have two immediate uses - one at the
latch block, and one at the loop exit). */ latch block, and one at the loop exit). */
exit_phi = NULL; exit_phi = NULL;
...@@ -1984,6 +2138,29 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, ...@@ -1984,6 +2138,29 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
} }
/* We expect to have found an exit_phi because of loop-closed-ssa form. */ /* We expect to have found an exit_phi because of loop-closed-ssa form. */
gcc_assert (exit_phi); gcc_assert (exit_phi);
if (nested_in_vect_loop)
{
stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
/* FORNOW. Currently not supporting the case that an inner-loop reduction
is not used in the outer-loop (but only outside the outer-loop). */
gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
&& !STMT_VINFO_LIVE_P (stmt_vinfo));
epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
set_stmt_info (get_stmt_ann (epilog_stmt),
new_stmt_vec_info (epilog_stmt, loop_vinfo));
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "vector of partial results after inner-loop:");
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
}
return;
}
/* Replace the uses: */ /* Replace the uses: */
orig_name = PHI_RESULT (exit_phi); orig_name = PHI_RESULT (exit_phi);
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name) FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
...@@ -2065,15 +2242,30 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) ...@@ -2065,15 +2242,30 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
tree new_stmt = NULL_TREE; tree new_stmt = NULL_TREE;
int j; int j;
if (nested_in_vect_loop_p (loop, stmt))
{
loop = loop->inner;
/* FORNOW. This restriction should be relaxed. */
if (ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
}
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
/* 1. Is vectorizable reduction? */ /* 1. Is vectorizable reduction? */
/* Not supportable if the reduction variable is used in the loop. */ /* Not supportable if the reduction variable is used in the loop. */
if (STMT_VINFO_RELEVANT_P (stmt_info)) if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
return false; return false;
if (!STMT_VINFO_LIVE_P (stmt_info)) /* Reductions that are not used even in an enclosing outer-loop,
are expected to be "live" (used out of the loop). */
if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop
&& !STMT_VINFO_LIVE_P (stmt_info))
return false; return false;
/* Make sure it was already recognized as a reduction computation. */ /* Make sure it was already recognized as a reduction computation. */
...@@ -2130,9 +2322,9 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) ...@@ -2130,9 +2322,9 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
gcc_assert (dt == vect_reduction_def); gcc_assert (dt == vect_reduction_def);
gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
if (orig_stmt) if (orig_stmt)
gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt)); gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
else else
gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt)); gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
return false; return false;
...@@ -2357,6 +2549,7 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) ...@@ -2357,6 +2549,7 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
int nunits_in; int nunits_in;
int nunits_out; int nunits_out;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type; tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
tree new_stmt; tree new_stmt;
...@@ -2466,6 +2659,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) ...@@ -2466,6 +2659,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
needs to be generated. */ needs to be generated. */
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
if (!vec_stmt) /* transformation not required. */ if (!vec_stmt) /* transformation not required. */
{ {
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
...@@ -2480,6 +2681,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) ...@@ -2480,6 +2681,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform operation."); fprintf (vect_dump, "transform operation.");
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
/* Handle def. */ /* Handle def. */
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0); scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
vec_dest = vect_create_destination_var (scalar_dest, vectype_out); vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
...@@ -2671,6 +2880,7 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, ...@@ -2671,6 +2880,7 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,
tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
tree decl1 = NULL_TREE, decl2 = NULL_TREE; tree decl1 = NULL_TREE, decl2 = NULL_TREE;
tree new_temp; tree new_temp;
...@@ -2752,6 +2962,14 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, ...@@ -2752,6 +2962,14 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,
needs to be generated. */ needs to be generated. */
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
/* Check the operands of the operation. */ /* Check the operands of the operation. */
if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0)) if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
{ {
...@@ -3093,6 +3311,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) ...@@ -3093,6 +3311,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree vectype = STMT_VINFO_VECTYPE (stmt_info);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code; enum tree_code code;
enum machine_mode vec_mode; enum machine_mode vec_mode;
tree new_temp; tree new_temp;
...@@ -3111,6 +3330,13 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) ...@@ -3111,6 +3330,13 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
int j; int j;
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
if (!STMT_VINFO_RELEVANT_P (stmt_info)) if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false; return false;
...@@ -3373,6 +3599,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, ...@@ -3373,6 +3599,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
tree vec_oprnd0=NULL, vec_oprnd1=NULL; tree vec_oprnd0=NULL, vec_oprnd1=NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, code1 = ERROR_MARK; enum tree_code code, code1 = ERROR_MARK;
tree new_temp; tree new_temp;
tree def, def_stmt; tree def, def_stmt;
...@@ -3425,6 +3652,13 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, ...@@ -3425,6 +3652,13 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
&& INTEGRAL_TYPE_P (TREE_TYPE (op0))) && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
...@@ -3522,6 +3756,7 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi, ...@@ -3522,6 +3756,7 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
tree vec_oprnd0=NULL, vec_oprnd1=NULL; tree vec_oprnd0=NULL, vec_oprnd1=NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
tree decl1 = NULL_TREE, decl2 = NULL_TREE; tree decl1 = NULL_TREE, decl2 = NULL_TREE;
int op_type; int op_type;
...@@ -3575,6 +3810,13 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi, ...@@ -3575,6 +3810,13 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
&& INTEGRAL_TYPE_P (TREE_TYPE (op0))) && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
...@@ -3867,6 +4109,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) ...@@ -3867,6 +4109,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL; struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree vectype = STMT_VINFO_VECTYPE (stmt_info);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum machine_mode vec_mode; enum machine_mode vec_mode;
tree dummy; tree dummy;
enum dr_alignment_support alignment_support_cheme; enum dr_alignment_support alignment_support_cheme;
...@@ -3882,6 +4125,13 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) ...@@ -3882,6 +4125,13 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
unsigned int group_size, i; unsigned int group_size, i;
VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL; VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
gcc_assert (ncopies >= 1); gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
if (!STMT_VINFO_RELEVANT_P (stmt_info)) if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false; return false;
...@@ -4517,6 +4767,15 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) ...@@ -4517,6 +4767,15 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
bool strided_load = false; bool strided_load = false;
tree first_stmt; tree first_stmt;
gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
if (!STMT_VINFO_RELEVANT_P (stmt_info)) if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false; return false;
...@@ -4812,6 +5071,7 @@ vectorizable_live_operation (tree stmt, ...@@ -4812,6 +5071,7 @@ vectorizable_live_operation (tree stmt,
tree operation; tree operation;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
int i; int i;
int op_type; int op_type;
tree op; tree op;
...@@ -4829,6 +5089,10 @@ vectorizable_live_operation (tree stmt, ...@@ -4829,6 +5089,10 @@ vectorizable_live_operation (tree stmt,
if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME) if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
return false; return false;
/* FORNOW. CHECKME. */
if (nested_in_vect_loop_p (loop, stmt))
return false;
operation = GIMPLE_STMT_OPERAND (stmt, 1); operation = GIMPLE_STMT_OPERAND (stmt, 1);
op_type = TREE_OPERAND_LENGTH (operation); op_type = TREE_OPERAND_LENGTH (operation);
...@@ -6124,8 +6388,18 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -6124,8 +6388,18 @@ vect_transform_loop (loop_vec_info loop_vinfo)
fprintf (vect_dump, "------>vectorizing statement: "); fprintf (vect_dump, "------>vectorizing statement: ");
print_generic_expr (vect_dump, stmt, TDF_SLIM); print_generic_expr (vect_dump, stmt, TDF_SLIM);
} }
stmt_info = vinfo_for_stmt (stmt); stmt_info = vinfo_for_stmt (stmt);
gcc_assert (stmt_info);
/* vector stmts created in the outer-loop during vectorization of
stmts in an inner-loop may not have a stmt_info, and do not
need to be vectorized. */
if (!stmt_info)
{
bsi_next (&si);
continue;
}
if (!STMT_VINFO_RELEVANT_P (stmt_info) if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info)) && !STMT_VINFO_LIVE_P (stmt_info))
{ {
...@@ -6197,4 +6471,6 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -6197,4 +6471,6 @@ vect_transform_loop (loop_vec_info loop_vinfo)
if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)) if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
fprintf (vect_dump, "LOOP VECTORIZED."); fprintf (vect_dump, "LOOP VECTORIZED.");
if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
fprintf (vect_dump, "OUTER LOOP VECTORIZED.");
} }
...@@ -1345,7 +1345,7 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo) ...@@ -1345,7 +1345,7 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
STMT_VINFO_IN_PATTERN_P (res) = false; STMT_VINFO_IN_PATTERN_P (res) = false;
STMT_VINFO_RELATED_STMT (res) = NULL; STMT_VINFO_RELATED_STMT (res) = NULL;
STMT_VINFO_DATA_REF (res) = NULL; STMT_VINFO_DATA_REF (res) = NULL;
if (TREE_CODE (stmt) == PHI_NODE) if (TREE_CODE (stmt) == PHI_NODE && is_loop_header_bb_p (bb_for_stmt (stmt)))
STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
else else
STMT_VINFO_DEF_TYPE (res) = vect_loop_def; STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
...@@ -1364,6 +1364,20 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo) ...@@ -1364,6 +1364,20 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
} }
/* Function bb_in_loop_p
Used as predicate for dfs order traversal of the loop bbs. */
static bool
bb_in_loop_p (const_basic_block bb, const void *data)
{
struct loop *loop = (struct loop *)data;
if (flow_bb_inside_loop_p (loop, bb))
return true;
return false;
}
/* Function new_loop_vec_info. /* Function new_loop_vec_info.
Create and initialize a new loop_vec_info struct for LOOP, as well as Create and initialize a new loop_vec_info struct for LOOP, as well as
...@@ -1375,37 +1389,76 @@ new_loop_vec_info (struct loop *loop) ...@@ -1375,37 +1389,76 @@ new_loop_vec_info (struct loop *loop)
loop_vec_info res; loop_vec_info res;
basic_block *bbs; basic_block *bbs;
block_stmt_iterator si; block_stmt_iterator si;
unsigned int i; unsigned int i, nbbs;
res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info)); res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
LOOP_VINFO_LOOP (res) = loop;
bbs = get_loop_body (loop); bbs = get_loop_body (loop);
/* Create stmt_info for all stmts in the loop. */ /* Create/Update stmt_info for all stmts in the loop. */
for (i = 0; i < loop->num_nodes; i++) for (i = 0; i < loop->num_nodes; i++)
{ {
basic_block bb = bbs[i]; basic_block bb = bbs[i];
tree phi; tree phi;
for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) /* BBs in a nested inner-loop will have been already processed (because
{ we will have called vect_analyze_loop_form for any nested inner-loop).
stmt_ann_t ann = get_stmt_ann (phi); Therefore, for stmts in an inner-loop we just want to update the
set_stmt_info (ann, new_stmt_vec_info (phi, res)); STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
} loop_info of the outer-loop we are currently considering to vectorize
(instead of the loop_info of the inner-loop).
for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) For stmts in other BBs we need to create a stmt_info from scratch. */
if (bb->loop_father != loop)
{ {
tree stmt = bsi_stmt (si); /* Inner-loop bb. */
stmt_ann_t ann; gcc_assert (loop->inner && bb->loop_father == loop->inner);
for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
{
stmt_vec_info stmt_info = vinfo_for_stmt (phi);
loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
STMT_VINFO_LOOP_VINFO (stmt_info) = res;
}
for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
{
tree stmt = bsi_stmt (si);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
STMT_VINFO_LOOP_VINFO (stmt_info) = res;
}
}
else
{
/* bb in current nest. */
for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
{
stmt_ann_t ann = get_stmt_ann (phi);
set_stmt_info (ann, new_stmt_vec_info (phi, res));
}
ann = stmt_ann (stmt); for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
set_stmt_info (ann, new_stmt_vec_info (stmt, res)); {
tree stmt = bsi_stmt (si);
stmt_ann_t ann = stmt_ann (stmt);
set_stmt_info (ann, new_stmt_vec_info (stmt, res));
}
} }
} }
LOOP_VINFO_LOOP (res) = loop; /* CHECKME: We want to visit all BBs before their successors (except for
latch blocks, for which this assertion wouldn't hold). In the simple
case of the loop forms we allow, a dfs order of the BBs would the same
as reversed postorder traversal, so we are safe. */
free (bbs);
bbs = XCNEWVEC (basic_block, loop->num_nodes);
nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
bbs, loop->num_nodes, loop);
gcc_assert (nbbs == loop->num_nodes);
LOOP_VINFO_BBS (res) = bbs; LOOP_VINFO_BBS (res) = bbs;
LOOP_VINFO_EXIT_COND (res) = NULL;
LOOP_VINFO_NITERS (res) = NULL; LOOP_VINFO_NITERS (res) = NULL;
LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0; LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
LOOP_VINFO_VECTORIZABLE_P (res) = 0; LOOP_VINFO_VECTORIZABLE_P (res) = 0;
...@@ -1430,7 +1483,7 @@ new_loop_vec_info (struct loop *loop) ...@@ -1430,7 +1483,7 @@ new_loop_vec_info (struct loop *loop)
stmts in the loop. */ stmts in the loop. */
void void
destroy_loop_vec_info (loop_vec_info loop_vinfo) destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
{ {
struct loop *loop; struct loop *loop;
basic_block *bbs; basic_block *bbs;
...@@ -1446,6 +1499,18 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo) ...@@ -1446,6 +1499,18 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo)
bbs = LOOP_VINFO_BBS (loop_vinfo); bbs = LOOP_VINFO_BBS (loop_vinfo);
nbbs = loop->num_nodes; nbbs = loop->num_nodes;
if (!clean_stmts)
{
free (LOOP_VINFO_BBS (loop_vinfo));
free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
free (loop_vinfo);
loop->aux = NULL;
return;
}
for (j = 0; j < nbbs; j++) for (j = 0; j < nbbs; j++)
{ {
basic_block bb = bbs[j]; basic_block bb = bbs[j];
...@@ -1597,7 +1662,6 @@ vect_supportable_dr_alignment (struct data_reference *dr) ...@@ -1597,7 +1662,6 @@ vect_supportable_dr_alignment (struct data_reference *dr)
return dr_aligned; return dr_aligned;
/* Possibly unaligned access. */ /* Possibly unaligned access. */
if (DR_IS_READ (dr)) if (DR_IS_READ (dr))
{ {
if (optab_handler (vec_realign_load_optab, mode)->insn_code != CODE_FOR_nothing if (optab_handler (vec_realign_load_optab, mode)->insn_code != CODE_FOR_nothing
...@@ -1718,8 +1782,6 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt, ...@@ -1718,8 +1782,6 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
{ {
case PHI_NODE: case PHI_NODE:
*def = PHI_RESULT (*def_stmt); *def = PHI_RESULT (*def_stmt);
gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
|| *dt == vect_invariant_def);
break; break;
case GIMPLE_MODIFY_STMT: case GIMPLE_MODIFY_STMT:
...@@ -1760,6 +1822,8 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype, ...@@ -1760,6 +1822,8 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
enum tree_code *code1, enum tree_code *code2) enum tree_code *code1, enum tree_code *code2)
{ {
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
bool ordered_p; bool ordered_p;
enum machine_mode vec_mode; enum machine_mode vec_mode;
enum insn_code icode1, icode2; enum insn_code icode1, icode2;
...@@ -1782,9 +1846,15 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype, ...@@ -1782,9 +1846,15 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
Some targets can take advantage of this and generate more efficient code. Some targets can take advantage of this and generate more efficient code.
For example, targets like Altivec, that support widen_mult using a sequence For example, targets like Altivec, that support widen_mult using a sequence
of {mult_even,mult_odd} generate the following vectors: of {mult_even,mult_odd} generate the following vectors:
vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. */ vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
When vectorizaing outer-loops, we execute the inner-loop sequentially
(each vectorized inner-loop iteration contributes to VF outer-loop
iterations in parallel). We therefore don't allow to change the order
of the computation in the inner-loop during outer-loop vectorization. */
if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction) if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
&& !nested_in_vect_loop_p (vect_loop, stmt))
ordered_p = false; ordered_p = false;
else else
ordered_p = true; ordered_p = true;
...@@ -2008,8 +2078,10 @@ reduction_code_for_scalar_code (enum tree_code code, ...@@ -2008,8 +2078,10 @@ reduction_code_for_scalar_code (enum tree_code code,
Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. */ Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. */
tree tree
vect_is_simple_reduction (struct loop *loop, tree phi) vect_is_simple_reduction (loop_vec_info loop_info, tree phi)
{ {
struct loop *loop = (bb_for_stmt (phi))->loop_father;
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
edge latch_e = loop_latch_edge (loop); edge latch_e = loop_latch_edge (loop);
tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
tree def_stmt, def1, def2; tree def_stmt, def1, def2;
...@@ -2022,6 +2094,8 @@ vect_is_simple_reduction (struct loop *loop, tree phi) ...@@ -2022,6 +2094,8 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
imm_use_iterator imm_iter; imm_use_iterator imm_iter;
use_operand_p use_p; use_operand_p use_p;
gcc_assert (loop == vect_loop || flow_loop_nested_p (vect_loop, loop));
name = PHI_RESULT (phi); name = PHI_RESULT (phi);
nloop_uses = 0; nloop_uses = 0;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
...@@ -2133,8 +2207,16 @@ vect_is_simple_reduction (struct loop *loop, tree phi) ...@@ -2133,8 +2207,16 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
return NULL_TREE; return NULL_TREE;
} }
/* Generally, when vectorizing a reduction we change the order of the
computation. This may change the behavior of the program in some
cases, so we need to check that this is ok. One exception is when
vectorizing an outer-loop: the inner-loop is executed sequentially,
and therefore vectorizing reductions in the inner-loop durint
outer-loop vectorization is safe. */
/* CHECKME: check for !flag_finite_math_only too? */ /* CHECKME: check for !flag_finite_math_only too? */
if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations) if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations
&& !nested_in_vect_loop_p (vect_loop, def_stmt))
{ {
/* Changing the order of operations changes the semantics. */ /* Changing the order of operations changes the semantics. */
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
...@@ -2144,7 +2226,8 @@ vect_is_simple_reduction (struct loop *loop, tree phi) ...@@ -2144,7 +2226,8 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
} }
return NULL_TREE; return NULL_TREE;
} }
else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)) else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
&& !nested_in_vect_loop_p (vect_loop, def_stmt))
{ {
/* Changing the order of operations changes the semantics. */ /* Changing the order of operations changes the semantics. */
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
...@@ -2183,13 +2266,16 @@ vect_is_simple_reduction (struct loop *loop, tree phi) ...@@ -2183,13 +2266,16 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
/* Check that one def is the reduction def, defined by PHI, /* Check that one def is the reduction def, defined by PHI,
the other def is either defined in the loop by a GIMPLE_MODIFY_STMT, the other def is either defined in the loop ("vect_loop_def"),
or it's an induction (defined by some phi node). */ or it's an induction (defined by a loop-header phi-node). */
if (def2 == phi if (def2 == phi
&& flow_bb_inside_loop_p (loop, bb_for_stmt (def1)) && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
&& (TREE_CODE (def1) == GIMPLE_MODIFY_STMT && (TREE_CODE (def1) == GIMPLE_MODIFY_STMT
|| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def)) || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def
|| (TREE_CODE (def1) == PHI_NODE
&& STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_loop_def
&& !is_loop_header_bb_p (bb_for_stmt (def1)))))
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
{ {
...@@ -2201,7 +2287,10 @@ vect_is_simple_reduction (struct loop *loop, tree phi) ...@@ -2201,7 +2287,10 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
else if (def1 == phi else if (def1 == phi
&& flow_bb_inside_loop_p (loop, bb_for_stmt (def2)) && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
&& (TREE_CODE (def2) == GIMPLE_MODIFY_STMT && (TREE_CODE (def2) == GIMPLE_MODIFY_STMT
|| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def)) || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def
|| (TREE_CODE (def2) == PHI_NODE
&& STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_loop_def
&& !is_loop_header_bb_p (bb_for_stmt (def2)))))
{ {
/* Swap operands (just for simplicity - so that the rest of the code /* Swap operands (just for simplicity - so that the rest of the code
can assume that the reduction variable is always the last (second) can assume that the reduction variable is always the last (second)
...@@ -2340,7 +2429,7 @@ vectorize_loops (void) ...@@ -2340,7 +2429,7 @@ vectorize_loops (void)
if (!loop) if (!loop)
continue; continue;
loop_vinfo = loop->aux; loop_vinfo = loop->aux;
destroy_loop_vec_info (loop_vinfo); destroy_loop_vec_info (loop_vinfo, true);
loop->aux = NULL; loop->aux = NULL;
} }
......
...@@ -92,9 +92,6 @@ typedef struct _loop_vec_info { ...@@ -92,9 +92,6 @@ typedef struct _loop_vec_info {
/* The loop basic blocks. */ /* The loop basic blocks. */
basic_block *bbs; basic_block *bbs;
/* The loop exit_condition. */
tree exit_cond;
/* Number of iterations. */ /* Number of iterations. */
tree num_iters; tree num_iters;
...@@ -148,7 +145,6 @@ typedef struct _loop_vec_info { ...@@ -148,7 +145,6 @@ typedef struct _loop_vec_info {
/* Access Functions. */ /* Access Functions. */
#define LOOP_VINFO_LOOP(L) (L)->loop #define LOOP_VINFO_LOOP(L) (L)->loop
#define LOOP_VINFO_BBS(L) (L)->bbs #define LOOP_VINFO_BBS(L) (L)->bbs
#define LOOP_VINFO_EXIT_COND(L) (L)->exit_cond
#define LOOP_VINFO_NITERS(L) (L)->num_iters #define LOOP_VINFO_NITERS(L) (L)->num_iters
#define LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters #define LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters
#define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable
...@@ -170,6 +166,19 @@ typedef struct _loop_vec_info { ...@@ -170,6 +166,19 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_NITERS_KNOWN_P(L) \ #define LOOP_VINFO_NITERS_KNOWN_P(L) \
NITERS_KNOWN_P((L)->num_iters) NITERS_KNOWN_P((L)->num_iters)
static inline loop_vec_info
loop_vec_info_for_loop (struct loop *loop)
{
return (loop_vec_info) loop->aux;
}
static inline bool
nested_in_vect_loop_p (struct loop *loop, tree stmt)
{
return (loop->inner
&& (loop->inner == (bb_for_stmt (stmt))->loop_father));
}
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
/* Info on vectorized defs. */ /* Info on vectorized defs. */
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
...@@ -185,12 +194,15 @@ enum stmt_vec_info_type { ...@@ -185,12 +194,15 @@ enum stmt_vec_info_type {
induc_vec_info_type, induc_vec_info_type,
type_promotion_vec_info_type, type_promotion_vec_info_type,
type_demotion_vec_info_type, type_demotion_vec_info_type,
type_conversion_vec_info_type type_conversion_vec_info_type,
loop_exit_ctrl_vec_info_type
}; };
/* Indicates whether/how a variable is used in the loop. */ /* Indicates whether/how a variable is used in the loop. */
enum vect_relevant { enum vect_relevant {
vect_unused_in_loop = 0, vect_unused_in_loop = 0,
vect_used_in_outer_by_reduction,
vect_used_in_outer,
/* defs that feed computations that end up (only) in a reduction. These /* defs that feed computations that end up (only) in a reduction. These
defs may be used by non-reduction stmts, but eventually, any defs may be used by non-reduction stmts, but eventually, any
...@@ -408,6 +420,15 @@ is_pattern_stmt_p (stmt_vec_info stmt_info) ...@@ -408,6 +420,15 @@ is_pattern_stmt_p (stmt_vec_info stmt_info)
return false; return false;
} }
static inline bool
is_loop_header_bb_p (basic_block bb)
{
if (bb == (bb->loop_father)->header)
return true;
gcc_assert (EDGE_COUNT (bb->preds) == 1);
return false;
}
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
/* Info on data references alignment. */ /* Info on data references alignment. */
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
...@@ -467,7 +488,7 @@ extern tree get_vectype_for_scalar_type (tree); ...@@ -467,7 +488,7 @@ extern tree get_vectype_for_scalar_type (tree);
extern bool vect_is_simple_use (tree, loop_vec_info, tree *, tree *, extern bool vect_is_simple_use (tree, loop_vec_info, tree *, tree *,
enum vect_def_type *); enum vect_def_type *);
extern bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *); extern bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *);
extern tree vect_is_simple_reduction (struct loop *, tree); extern tree vect_is_simple_reduction (loop_vec_info, tree);
extern bool vect_can_force_dr_alignment_p (tree, unsigned int); extern bool vect_can_force_dr_alignment_p (tree, unsigned int);
extern enum dr_alignment_support vect_supportable_dr_alignment extern enum dr_alignment_support vect_supportable_dr_alignment
(struct data_reference *); (struct data_reference *);
...@@ -479,7 +500,7 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree, ...@@ -479,7 +500,7 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
/* Creation and deletion of loop and stmt info structs. */ /* Creation and deletion of loop and stmt info structs. */
extern loop_vec_info new_loop_vec_info (struct loop *loop); extern loop_vec_info new_loop_vec_info (struct loop *loop);
extern void destroy_loop_vec_info (loop_vec_info); extern void destroy_loop_vec_info (loop_vec_info, bool);
extern stmt_vec_info new_stmt_vec_info (tree stmt, loop_vec_info); extern stmt_vec_info new_stmt_vec_info (tree stmt, loop_vec_info);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment