Commit 221e9a92 authored by dorit's avatar dorit
Browse files

* tree-vectorizer.h (vect_is_simple_reduction): Takes a loop_vec_info

        as argument instead of struct loop.
        (nested_in_vect_loop_p): New function.
        (vect_relevant): Add enum values vect_used_in_outer_by_reduction and
        vect_used_in_outer.
        (is_loop_header_bb_p): New. Used to differentiate loop-header phis
        from other phis in the loop.
        (destroy_loop_vec_info): Add additional argument to declaration.

        * tree-vectorizer.c (supportable_widening_operation): Also check if
        nested_in_vect_loop_p (don't allow changing the order in this case).
        (vect_is_simple_reduction): Takes a loop_vec_info as argument instead
        of struct loop. Call nested_in_vect_loop_p and don't require
        flag_unsafe_math_optimizations if it returns true.
        (new_stmt_vec_info): When setting def_type for phis differentiate
        loop-header phis from other phis.
        (bb_in_loop_p): New function.
        (new_loop_vec_info): Inner-loop phis already have a stmt_vinfo, so just
        update their loop_vinfo.  Order of BB traversal now matters - call
        dfs_enumerate_from with bb_in_loop_p.
        (destroy_loop_vec_info): Takes additional argument to control whether
        stmt_vinfo of the loop stmts should be destroyed as well.
        (vect_is_simple_reduction): Allow the "non-reduction" use of a
        reduction stmt to be defines by a non loop-header phi.
        (vectorize_loops): Call destroy_loop_vec_info with additional argument.

        * tree-vect-transform.c (vectorizable_reduction): Call
        nested_in_vect_loop_p. Check for multitypes in the inner-loop.
        (vectorizable_call): Likewise.
        (vectorizable_conversion): Likewise.
        (vectorizable_operation): Likewise.
        (vectorizable_type_promotion): Likewise.
        (vectorizable_type_demotion): Likewise.
        (vectorizable_store): Likewise.
        (vectorizable_live_operation): Likewise.
        (vectorizable_reduction): Likewise. Also pass loop_info to
        vect_is_simple_reduction instead of loop.
        (vect_init_vector): Call nested_in_vect_loop_p.
        (get_initial_def_for_reduction): Likewise.
        (vect_create_epilog_for_reduction): Likewise.
        (vect_init_vector): Check which loop to work with, in case there's an
        inner-loop.
        (get_initial_def_for_inducion): Extend to handle outer-loop
        vectorization. Fix indentation.
        (vect_get_vec_def_for_operand): Support phis in the case vect_loop_def.
        In the case vect_induction_def get the vector def from the induction
        phi node, instead of calling get_initial_def_for_inducion.
        (get_initial_def_for_reduction): Extend to handle outer-loop
        vectorization.
        (vect_create_epilog_for_reduction): Extend to handle outer-loop
        vectorization.
        (vect_transform_loop): Change assert to just skip this case.  Add a
        dump printout.
        (vect_finish_stmt_generation): Add a couple asserts.

        (vect_estimate_min_profitable_iters): Multiply
        cost of inner-loop stmts (in outer-loop vectorization) by estimated
        inner-loop bound.
        (vect_model_reduction_cost): Don't add reduction epilogue cost in case
        this is an inner-loop reduction in outer-loop vectorization.

        * tree-vect-analyze.c (vect_analyze_scalar_cycles_1): New function.
        Same code as what used to be vect_analyze_scalar_cycles, only with
        additional argument loop, and loop_info passed to
        vect_is_simple_reduction instead of loop.
        (vect_analyze_scalar_cycles): Code factored out into
        vect_analyze_scalar_cycles_1. Call it for each relevant loop-nest.
        Updated documentation.
        (analyze_operations): Check for inner-loop loop-closed exit-phis during
        outer-loop vectorization that are live or not used in the outerloop,
        cause this requires special handling.
        (vect_enhance_data_refs_alignment): Don't consider versioning for
        nested-loops.
        (vect_analyze_data_refs): Check that there are no datarefs in the
        inner-loop.
        (vect_mark_stmts_to_be_vectorized): Also consider vect_used_in_outer
        and vect_used_in_outer_by_reduction cases.
        (process_use): Also consider the case of outer-loop stmt defining an
        inner-loop stmt and vice versa.
        (vect_analyze_loop_1): New function.
        (vect_analyze_loop_form): Extend, to allow a restricted form of nested
        loops.  Call vect_analyze_loop_1.
        (vect_analyze_loop): Skip (inner-)loops within outer-loops that have
        been vectorized.  Call destroy_loop_vec_info with additional argument.

        * tree-vect-patterns.c (vect_recog_widen_sum_pattern): Don't allow
        in the inner-loop when doing outer-loop vectorization. Add
        documentation and printout.
        (vect_recog_dot_prod_pattern): Likewise. Also add check for
        GIMPLE_MODIFY_STMT (in case we encounter a phi in the loop).



git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@127623 138bc75d-0d04-0410-961f-82ee72b054a4
parent 8787bd6a
/* { dg-do compile } */
#define N 40
int
foo (){
int i,j;
int diff = 0;
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
diff += j;
}
}
return diff;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int b[N];
int
foo (){
int i,j;
int sum,x,y;
for (i = 0; i < N/2; i++) {
sum = 0;
x = b[2*i];
y = b[2*i+1];
for (j = 0; j < N; j++) {
sum += j;
}
a[2*i] = sum + x;
a[2*i+1] = sum + y;
}
}
int main (void)
{
int i,j;
int sum;
check_vect ();
for (i=0; i<N; i++)
b[i] = i;
foo ();
/* check results: */
for (i=0; i<N/2; i++)
{
sum = 0;
for (j = 0; j < N; j++)
sum += j;
if (a[2*i] != sum + b[2*i] || a[2*i+1] != sum + b[2*i+1])
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (){
int i;
unsigned short j;
int sum = 0;
unsigned short sum_j;
for (i = 0; i < N; i++) {
sum += i;
sum_j = i;
for (j = 0; j < N; j++) {
sum_j += j;
}
a[i] = sum_j + 5;
}
return sum;
}
int main (void)
{
int i;
unsigned short j, sum_j;
int sum = 0;
int res;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
res = foo ();
/* check results: */
for (i=0; i<N; i++)
{
sum += i;
sum_j = i;
for (j = 0; j < N; j++){
sum_j += j;
}
if (a[i] != sum_j + 5)
abort();
}
if (res != sum)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (int n){
int i,j;
int sum;
if (n<=0)
return 0;
/* inner-loop index j used after the inner-loop */
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < n; j+=2) {
sum += j;
}
a[i] = sum + j;
}
}
int main (void)
{
int i,j;
int sum;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo (N);
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j+=2)
sum += j;
if (a[i] != sum + j)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (){
int i,j;
int sum;
/* inner-loop step > 1 */
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < N; j+=2) {
sum += j;
}
a[i] = sum;
}
}
int main (void)
{
int i,j;
int sum;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo ();
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j+=2)
sum += j;
if (a[i] != sum)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
/* induction variable k advances through inner and outer loops. */
int
foo (int n){
int i,j,k=0;
int sum;
if (n<=0)
return 0;
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < n; j+=2) {
sum += k++;
}
a[i] = sum + j;
}
}
int main (void)
{
int i,j,k=0;
int sum;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo (N);
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j+=2)
sum += k++;
if (a[i] != sum + j)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (){
int i,j;
int sum;
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < N; j++) {
sum += j;
}
a[i] += sum + i;
}
}
int main (void)
{
int i,j;
int sum;
int aa[N];
check_vect ();
for (i=0; i<N; i++){
a[i] = i;
aa[i] = i;
}
foo ();
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j++)
sum += j;
if (a[i] != aa[i] + sum + i)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int
foo (int * __restrict__ b, int k){
int i,j;
int sum,x;
int a[N];
for (i = 0; i < N; i++) {
sum = b[i];
for (j = 0; j < N; j++) {
sum += j;
}
a[i] = sum;
}
return a[k];
}
int main (void)
{
int i,j;
int sum;
int b[N];
int a[N];
check_vect ();
for (i=0; i<N; i++)
b[i] = i + 2;
for (i=0; i<N; i++)
a[i] = foo (b,i);
/* check results: */
for (i=0; i<N; i++)
{
sum = b[i];
for (j = 0; j < N; j++){
sum += j;
}
if (a[i] != sum)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
unsigned short in[N];
unsigned short coef[N];
unsigned short a[N];
unsigned int
foo (short scale){
int i;
unsigned short j;
unsigned int sum = 0;
unsigned short sum_j;
for (i = 0; i < N; i++) {
sum_j = 0;
for (j = 0; j < N; j++) {
sum_j += j;
}
a[i] = sum_j;
sum += ((unsigned int) in[i] * (unsigned int) coef[i]) >> scale;
}
return sum;
}
unsigned short
bar (void)
{
unsigned short j;
unsigned short sum_j;
sum_j = 0;
for (j = 0; j < N; j++) {
sum_j += j;
}
return sum_j;
}
int main (void)
{
int i;
unsigned short j, sum_j;
unsigned int sum = 0;
unsigned int res;
check_vect ();
for (i=0; i<N; i++){
in[i] = 2*i;
coef[i] = i;
}
res = foo (2);
/* check results: */
for (i=0; i<N; i++)
{
if (a[i] != bar ())
abort ();
sum += ((unsigned int) in[i] * (unsigned int) coef[i]) >> 2;
}
if (res != sum)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_widen_mult_hi_to_si } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int
foo (int *a){
int i,j;
int sum;
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < N; j++) {
sum += j;
}
a[i] = sum;
}
}
int main (void)
{
int i,j;
int sum;
int a[N];
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo (a);
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j++)
sum += j;
if (a[i] != sum)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (int n){
int i,j;
int sum;
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < n; j++) {
sum += j;
}
a[i] = sum;
}
}
int main (void)
{
int i,j;
int sum;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo (N);
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j++)
sum += j;
if (a[i] != sum)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (int n){
int i,j;
int sum;
if (n<=0)
return 0;
for (i = 0; i < N; i++) {
sum = 0;
j = 0;
do {
sum += j;
}while (++j < n);
a[i] = sum;
}
}
int main (void)
{
int i,j;
int sum;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo (N);
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j++)
sum += j;
if (a[i] != sum)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (int n){
int i,j;
int sum;
if (n<=0)
return 0;
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < n; j++) {
sum += j;
}
a[i] = sum;
}
}
int main (void)
{
int i,j;
int sum;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo (N);
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j++)
sum += j;
if (a[i] != sum)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 26
int main1 (int X)
{
int s = X;
int i;
/* vectorization of reduction with induction.
Need -fno-tree-scev-cprop or else the loop is eliminated. */
for (i = 0; i < N; i++)
s += i;
return s;
}
int main (void)
{
int s;
check_vect ();
s = main1 (3);
if (s != 328)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
int main1 ()
{
int arr1[N];
int k = 0;
int m = 3, i = 0;
/* Vectorization of induction that is used after the loop.
Currently vectorizable because scev_ccp disconnects the
use-after-the-loop from the iv def inside the loop. */
do {
k = k + 2;
arr1[i] = k;
m = m + k;
i++;
} while (i < N);
/* check results: */
for (i = 0; i < N; i++)
{
if (arr1[i] != 2+2*i)
abort ();
}
return m + k;
}
int main (void)
{
int res;
check_vect ();
res = main1 ();
if (res != 32 + 275)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 26
unsigned int main1 ()
{
unsigned short i;
unsigned int intsum = 0;
/* vectorization of reduction with induction, and widenning sum:
sum shorts into int.
Need -fno-tree-scev-cprop or else the loop is eliminated. */
for (i = 0; i < N; i++)
{
intsum += i;
}
return intsum;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_sum_hi_to_si } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_sum_pattern: detected" 1 "vect" { target vect_widen_sum_hi_to_si } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
......@@ -42,4 +42,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
......@@ -182,8 +182,20 @@ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-trapping-math-*.\[cS\]]]
# -fno-tree-scev-cprop
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
lappend DEFAULT_VECTCFLAGS "-fno-tree-scev-cprop"
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-scev-cprop-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-scevccp-vect-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
# -fno-tree-scev-cprop
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
lappend DEFAULT_VECTCFLAGS "-fno-tree-scev-cprop"
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-scevccp-outer-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
# -fno-tree-scev-cprop -fno-tree-reassoc
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
lappend DEFAULT_VECTCFLAGS "-fno-tree-scev-cprop" "-fno-tree-reassoc"
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-scevccp-noreassoc-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
# -fno-tree-dominator-opts
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
......
This diff is collapsed.
......@@ -148,7 +148,14 @@ widened_name_p (tree name, tree use_stmt, tree *half_type, tree *def_stmt)
* Return value: A new stmt that will be used to replace the sequence of
stmts that constitute the pattern. In this case it will be:
WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
*/
Note: The dot-prod idiom is a widening reduction pattern that is
vectorized without preserving all the intermediate results. It
produces only N/2 (widened) results (by summing up pairs of
intermediate results) rather than all N results. Therefore, we
cannot allow this pattern when we want to get all the results and in
the correct order (as is the case when this computation is in an
inner-loop nested in an outer-loop that us being vectorized). */
static tree
vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out)
......@@ -160,6 +167,8 @@ vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out)
tree type, half_type;
tree pattern_expr;
tree prod_type;
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_info);
if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT)
return NULL;
......@@ -242,6 +251,10 @@ vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out)
gcc_assert (stmt_vinfo);
if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_loop_def)
return NULL;
/* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
inside the loop (in case we are analyzing an outer-loop). */
if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
return NULL;
expr = GIMPLE_STMT_OPERAND (stmt, 1);
if (TREE_CODE (expr) != MULT_EXPR)
return NULL;
......@@ -295,6 +308,16 @@ vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out)
fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
print_generic_expr (vect_dump, pattern_expr, TDF_SLIM);
}
/* We don't allow changing the order of the computation in the inner-loop
when doing outer-loop vectorization. */
if (nested_in_vect_loop_p (loop, last_stmt))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vect_recog_dot_prod_pattern: not allowed.");
return NULL;
}
return pattern_expr;
}
......@@ -521,7 +544,14 @@ vect_recog_pow_pattern (tree last_stmt, tree *type_in, tree *type_out)
* Return value: A new stmt that will be used to replace the sequence of
stmts that constitute the pattern. In this case it will be:
WIDEN_SUM <x_t, sum_0>
*/
Note: The widneing-sum idiom is a widening reduction pattern that is
vectorized without preserving all the intermediate results. It
produces only N/2 (widened) results (by summing up pairs of
intermediate results) rather than all N results. Therefore, we
cannot allow this pattern when we want to get all the results and in
the correct order (as is the case when this computation is in an
inner-loop nested in an outer-loop that us being vectorized). */
static tree
vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out)
......@@ -531,6 +561,8 @@ vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out)
stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
tree type, half_type;
tree pattern_expr;
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_info);
if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT)
return NULL;
......@@ -580,6 +612,16 @@ vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out)
fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
print_generic_expr (vect_dump, pattern_expr, TDF_SLIM);
}
/* We don't allow changing the order of the computation in the inner-loop
when doing outer-loop vectorization. */
if (nested_in_vect_loop_p (loop, last_stmt))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vect_recog_widen_sum_pattern: not allowed.");
return NULL;
}
return pattern_expr;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment