Commit 221e9a92 authored by dorit's avatar dorit
Browse files

* tree-vectorizer.h (vect_is_simple_reduction): Takes a loop_vec_info

        as argument instead of struct loop.
        (nested_in_vect_loop_p): New function.
        (vect_relevant): Add enum values vect_used_in_outer_by_reduction and
        vect_used_in_outer.
        (is_loop_header_bb_p): New. Used to differentiate loop-header phis
        from other phis in the loop.
        (destroy_loop_vec_info): Add additional argument to declaration.

        * tree-vectorizer.c (supportable_widening_operation): Also check if
        nested_in_vect_loop_p (don't allow changing the order in this case).
        (vect_is_simple_reduction): Takes a loop_vec_info as argument instead
        of struct loop. Call nested_in_vect_loop_p and don't require
        flag_unsafe_math_optimizations if it returns true.
        (new_stmt_vec_info): When setting def_type for phis differentiate
        loop-header phis from other phis.
        (bb_in_loop_p): New function.
        (new_loop_vec_info): Inner-loop phis already have a stmt_vinfo, so just
        update their loop_vinfo.  Order of BB traversal now matters - call
        dfs_enumerate_from with bb_in_loop_p.
        (destroy_loop_vec_info): Takes additional argument to control whether
        stmt_vinfo of the loop stmts should be destroyed as well.
        (vect_is_simple_reduction): Allow the "non-reduction" use of a
        reduction stmt to be defines by a non loop-header phi.
        (vectorize_loops): Call destroy_loop_vec_info with additional argument.

        * tree-vect-transform.c (vectorizable_reduction): Call
        nested_in_vect_loop_p. Check for multitypes in the inner-loop.
        (vectorizable_call): Likewise.
        (vectorizable_conversion): Likewise.
        (vectorizable_operation): Likewise.
        (vectorizable_type_promotion): Likewise.
        (vectorizable_type_demotion): Likewise.
        (vectorizable_store): Likewise.
        (vectorizable_live_operation): Likewise.
        (vectorizable_reduction): Likewise. Also pass loop_info to
        vect_is_simple_reduction instead of loop.
        (vect_init_vector): Call nested_in_vect_loop_p.
        (get_initial_def_for_reduction): Likewise.
        (vect_create_epilog_for_reduction): Likewise.
        (vect_init_vector): Check which loop to work with, in case there's an
        inner-loop.
        (get_initial_def_for_inducion): Extend to handle outer-loop
        vectorization. Fix indentation.
        (vect_get_vec_def_for_operand): Support phis in the case vect_loop_def.
        In the case vect_induction_def get the vector def from the induction
        phi node, instead of calling get_initial_def_for_inducion.
        (get_initial_def_for_reduction): Extend to handle outer-loop
        vectorization.
        (vect_create_epilog_for_reduction): Extend to handle outer-loop
        vectorization.
        (vect_transform_loop): Change assert to just skip this case.  Add a
        dump printout.
        (vect_finish_stmt_generation): Add a couple asserts.

        (vect_estimate_min_profitable_iters): Multiply
        cost of inner-loop stmts (in outer-loop vectorization) by estimated
        inner-loop bound.
        (vect_model_reduction_cost): Don't add reduction epilogue cost in case
        this is an inner-loop reduction in outer-loop vectorization.

        * tree-vect-analyze.c (vect_analyze_scalar_cycles_1): New function.
        Same code as what used to be vect_analyze_scalar_cycles, only with
        additional argument loop, and loop_info passed to
        vect_is_simple_reduction instead of loop.
        (vect_analyze_scalar_cycles): Code factored out into
        vect_analyze_scalar_cycles_1. Call it for each relevant loop-nest.
        Updated documentation.
        (analyze_operations): Check for inner-loop loop-closed exit-phis during
        outer-loop vectorization that are live or not used in the outerloop,
        cause this requires special handling.
        (vect_enhance_data_refs_alignment): Don't consider versioning for
        nested-loops.
        (vect_analyze_data_refs): Check that there are no datarefs in the
        inner-loop.
        (vect_mark_stmts_to_be_vectorized): Also consider vect_used_in_outer
        and vect_used_in_outer_by_reduction cases.
        (process_use): Also consider the case of outer-loop stmt defining an
        inner-loop stmt and vice versa.
        (vect_analyze_loop_1): New function.
        (vect_analyze_loop_form): Extend, to allow a restricted form of nested
        loops.  Call vect_analyze_loop_1.
        (vect_analyze_loop): Skip (inner-)loops within outer-loops that have
        been vectorized.  Call destroy_loop_vec_info with additional argument.

        * tree-vect-patterns.c (vect_recog_widen_sum_pattern): Don't allow
        in the inner-loop when doing outer-loop vectorization. Add
        documentation and printout.
        (vect_recog_dot_prod_pattern): Likewise. Also add check for
        GIMPLE_MODIFY_STMT (in case we encounter a phi in the loop).



git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@127623 138bc75d-0d04-0410-961f-82ee72b054a4
parent 8787bd6a
No related merge requests found
/* { dg-do compile } */
#define N 40
int
foo (){
int i,j;
int diff = 0;
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
diff += j;
}
}
return diff;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int b[N];
int
foo (){
int i,j;
int sum,x,y;
for (i = 0; i < N/2; i++) {
sum = 0;
x = b[2*i];
y = b[2*i+1];
for (j = 0; j < N; j++) {
sum += j;
}
a[2*i] = sum + x;
a[2*i+1] = sum + y;
}
}
int main (void)
{
int i,j;
int sum;
check_vect ();
for (i=0; i<N; i++)
b[i] = i;
foo ();
/* check results: */
for (i=0; i<N/2; i++)
{
sum = 0;
for (j = 0; j < N; j++)
sum += j;
if (a[2*i] != sum + b[2*i] || a[2*i+1] != sum + b[2*i+1])
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (){
int i;
unsigned short j;
int sum = 0;
unsigned short sum_j;
for (i = 0; i < N; i++) {
sum += i;
sum_j = i;
for (j = 0; j < N; j++) {
sum_j += j;
}
a[i] = sum_j + 5;
}
return sum;
}
int main (void)
{
int i;
unsigned short j, sum_j;
int sum = 0;
int res;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
res = foo ();
/* check results: */
for (i=0; i<N; i++)
{
sum += i;
sum_j = i;
for (j = 0; j < N; j++){
sum_j += j;
}
if (a[i] != sum_j + 5)
abort();
}
if (res != sum)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (int n){
int i,j;
int sum;
if (n<=0)
return 0;
/* inner-loop index j used after the inner-loop */
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < n; j+=2) {
sum += j;
}
a[i] = sum + j;
}
}
int main (void)
{
int i,j;
int sum;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo (N);
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j+=2)
sum += j;
if (a[i] != sum + j)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (){
int i,j;
int sum;
/* inner-loop step > 1 */
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < N; j+=2) {
sum += j;
}
a[i] = sum;
}
}
int main (void)
{
int i,j;
int sum;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo ();
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j+=2)
sum += j;
if (a[i] != sum)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
/* induction variable k advances through inner and outer loops. */
int
foo (int n){
int i,j,k=0;
int sum;
if (n<=0)
return 0;
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < n; j+=2) {
sum += k++;
}
a[i] = sum + j;
}
}
int main (void)
{
int i,j,k=0;
int sum;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo (N);
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j+=2)
sum += k++;
if (a[i] != sum + j)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (){
int i,j;
int sum;
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < N; j++) {
sum += j;
}
a[i] += sum + i;
}
}
int main (void)
{
int i,j;
int sum;
int aa[N];
check_vect ();
for (i=0; i<N; i++){
a[i] = i;
aa[i] = i;
}
foo ();
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j++)
sum += j;
if (a[i] != aa[i] + sum + i)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int
foo (int * __restrict__ b, int k){
int i,j;
int sum,x;
int a[N];
for (i = 0; i < N; i++) {
sum = b[i];
for (j = 0; j < N; j++) {
sum += j;
}
a[i] = sum;
}
return a[k];
}
int main (void)
{
int i,j;
int sum;
int b[N];
int a[N];
check_vect ();
for (i=0; i<N; i++)
b[i] = i + 2;
for (i=0; i<N; i++)
a[i] = foo (b,i);
/* check results: */
for (i=0; i<N; i++)
{
sum = b[i];
for (j = 0; j < N; j++){
sum += j;
}
if (a[i] != sum)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
unsigned short in[N];
unsigned short coef[N];
unsigned short a[N];
unsigned int
foo (short scale){
int i;
unsigned short j;
unsigned int sum = 0;
unsigned short sum_j;
for (i = 0; i < N; i++) {
sum_j = 0;
for (j = 0; j < N; j++) {
sum_j += j;
}
a[i] = sum_j;
sum += ((unsigned int) in[i] * (unsigned int) coef[i]) >> scale;
}
return sum;
}
unsigned short
bar (void)
{
unsigned short j;
unsigned short sum_j;
sum_j = 0;
for (j = 0; j < N; j++) {
sum_j += j;
}
return sum_j;
}
int main (void)
{
int i;
unsigned short j, sum_j;
unsigned int sum = 0;
unsigned int res;
check_vect ();
for (i=0; i<N; i++){
in[i] = 2*i;
coef[i] = i;
}
res = foo (2);
/* check results: */
for (i=0; i<N; i++)
{
if (a[i] != bar ())
abort ();
sum += ((unsigned int) in[i] * (unsigned int) coef[i]) >> 2;
}
if (res != sum)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_widen_mult_hi_to_si } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int
foo (int *a){
int i,j;
int sum;
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < N; j++) {
sum += j;
}
a[i] = sum;
}
}
int main (void)
{
int i,j;
int sum;
int a[N];
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo (a);
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j++)
sum += j;
if (a[i] != sum)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (int n){
int i,j;
int sum;
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < n; j++) {
sum += j;
}
a[i] = sum;
}
}
int main (void)
{
int i,j;
int sum;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo (N);
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j++)
sum += j;
if (a[i] != sum)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (int n){
int i,j;
int sum;
if (n<=0)
return 0;
for (i = 0; i < N; i++) {
sum = 0;
j = 0;
do {
sum += j;
}while (++j < n);
a[i] = sum;
}
}
int main (void)
{
int i,j;
int sum;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo (N);
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j++)
sum += j;
if (a[i] != sum)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 40
int a[N];
int
foo (int n){
int i,j;
int sum;
if (n<=0)
return 0;
for (i = 0; i < N; i++) {
sum = 0;
for (j = 0; j < n; j++) {
sum += j;
}
a[i] = sum;
}
}
int main (void)
{
int i,j;
int sum;
check_vect ();
for (i=0; i<N; i++)
a[i] = i;
foo (N);
/* check results: */
for (i=0; i<N; i++)
{
sum = 0;
for (j = 0; j < N; j++)
sum += j;
if (a[i] != sum)
abort();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 26
int main1 (int X)
{
int s = X;
int i;
/* vectorization of reduction with induction.
Need -fno-tree-scev-cprop or else the loop is eliminated. */
for (i = 0; i < N; i++)
s += i;
return s;
}
int main (void)
{
int s;
check_vect ();
s = main1 (3);
if (s != 328)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
int main1 ()
{
int arr1[N];
int k = 0;
int m = 3, i = 0;
/* Vectorization of induction that is used after the loop.
Currently vectorizable because scev_ccp disconnects the
use-after-the-loop from the iv def inside the loop. */
do {
k = k + 2;
arr1[i] = k;
m = m + k;
i++;
} while (i < N);
/* check results: */
for (i = 0; i < N; i++)
{
if (arr1[i] != 2+2*i)
abort ();
}
return m + k;
}
int main (void)
{
int res;
check_vect ();
res = main1 ();
if (res != 32 + 275)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 26
unsigned int main1 ()
{
unsigned short i;
unsigned int intsum = 0;
/* vectorization of reduction with induction, and widenning sum:
sum shorts into int.
Need -fno-tree-scev-cprop or else the loop is eliminated. */
for (i = 0; i < N; i++)
{
intsum += i;
}
return intsum;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_sum_hi_to_si } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_sum_pattern: detected" 1 "vect" { target vect_widen_sum_hi_to_si } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -42,4 +42,5 @@ int main (void) ...@@ -42,4 +42,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -182,8 +182,20 @@ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-trapping-math-*.\[cS\]]] ...@@ -182,8 +182,20 @@ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-trapping-math-*.\[cS\]]]
# -fno-tree-scev-cprop # -fno-tree-scev-cprop
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
lappend DEFAULT_VECTCFLAGS "-fno-tree-scev-cprop" lappend DEFAULT_VECTCFLAGS "-fno-tree-scev-cprop"
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-scev-cprop-*.\[cS\]]] \ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-scevccp-vect-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS "" $DEFAULT_VECTCFLAGS
# -fno-tree-scev-cprop
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
lappend DEFAULT_VECTCFLAGS "-fno-tree-scev-cprop"
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-scevccp-outer-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
# -fno-tree-scev-cprop -fno-tree-reassoc
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
lappend DEFAULT_VECTCFLAGS "-fno-tree-scev-cprop" "-fno-tree-reassoc"
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-scevccp-noreassoc-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
# -fno-tree-dominator-opts # -fno-tree-dominator-opts
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
......
...@@ -325,6 +325,24 @@ vect_analyze_operations (loop_vec_info loop_vinfo) ...@@ -325,6 +325,24 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
print_generic_expr (vect_dump, phi, TDF_SLIM); print_generic_expr (vect_dump, phi, TDF_SLIM);
} }
if (! is_loop_header_bb_p (bb))
{
/* inner-loop loop-closed exit phi in outer-loop vectorization
(i.e. a phi in the tail of the outer-loop).
FORNOW: we currently don't support the case that these phis
are not used in the outerloop, cause this case requires
to actually do something here. */
if (!STMT_VINFO_RELEVANT_P (stmt_info)
|| STMT_VINFO_LIVE_P (stmt_info))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump,
"Unsupported loop-closed phi in outer-loop.");
return false;
}
continue;
}
gcc_assert (stmt_info); gcc_assert (stmt_info);
if (STMT_VINFO_LIVE_P (stmt_info)) if (STMT_VINFO_LIVE_P (stmt_info))
...@@ -398,7 +416,9 @@ vect_analyze_operations (loop_vec_info loop_vinfo) ...@@ -398,7 +416,9 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
break; break;
case vect_reduction_def: case vect_reduction_def:
gcc_assert (relevance == vect_unused_in_loop); gcc_assert (relevance == vect_used_in_outer
|| relevance == vect_used_in_outer_by_reduction
|| relevance == vect_unused_in_loop);
break; break;
case vect_induction_def: case vect_induction_def:
...@@ -589,50 +609,17 @@ exist_non_indexing_operands_for_use_p (tree use, tree stmt) ...@@ -589,50 +609,17 @@ exist_non_indexing_operands_for_use_p (tree use, tree stmt)
} }
/* Function vect_analyze_scalar_cycles. /* Function vect_analyze_scalar_cycles_1.
Examine the cross iteration def-use cycles of scalar variables, by
analyzing the loop (scalar) PHIs; Classify each cycle as one of the
following: invariant, induction, reduction, unknown.
Some forms of scalar cycles are not yet supported.
Example1: reduction: (unsupported yet)
loop1:
for (i=0; i<N; i++)
sum += a[i];
Example2: induction: (unsupported yet)
loop2:
for (i=0; i<N; i++)
a[i] = i;
Note: the following loop *is* vectorizable:
loop3:
for (i=0; i<N; i++)
a[i] = b[i];
even though it has a def-use cycle caused by the induction variable i:
loop: i_2 = PHI (i_0, i_1)
a[i_2] = ...;
i_1 = i_2 + 1;
GOTO loop;
because the def-use cycle in loop3 is considered "not relevant" - i.e., Examine the cross iteration def-use cycles of scalar variables
it does not need to be vectorized because it is only used for array in LOOP. LOOP_VINFO represents the loop that is noe being
indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in considered for vectorization (can be LOOP, or an outer-loop
loop2 on the other hand is relevant (it is being written to memory). enclosing LOOP). */
*/
static void static void
vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
{ {
tree phi; tree phi;
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block bb = loop->header; basic_block bb = loop->header;
tree dumy; tree dumy;
VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64); VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
...@@ -698,7 +685,7 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) ...@@ -698,7 +685,7 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
gcc_assert (is_gimple_reg (SSA_NAME_VAR (def))); gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type); gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
reduc_stmt = vect_is_simple_reduction (loop, phi); reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
if (reduc_stmt) if (reduc_stmt)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
...@@ -717,6 +704,48 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) ...@@ -717,6 +704,48 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
} }
/* Function vect_analyze_scalar_cycles.
Examine the cross iteration def-use cycles of scalar variables, by
analyzing the loop-header PHIs of scalar variables; Classify each
cycle as one of the following: invariant, induction, reduction, unknown.
We do that for the loop represented by LOOP_VINFO, and also to its
inner-loop, if exists.
Examples for scalar cycles:
Example1: reduction:
loop1:
for (i=0; i<N; i++)
sum += a[i];
Example2: induction:
loop2:
for (i=0; i<N; i++)
a[i] = i; */
static void
vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
/* When vectorizing an outer-loop, the inner-loop is executed sequentially.
Reductions in such inner-loop therefore have different properties than
the reductions in the nest that gets vectorized:
1. When vectorized, they are executed in the same order as in the original
scalar loop, so we can't change the order of computation when
vectorizing them.
2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
current checks are too strict. */
if (loop->inner)
vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
}
/* Function vect_insert_into_interleaving_chain. /* Function vect_insert_into_interleaving_chain.
Insert DRA into the interleaving chain of DRB according to DRA's INIT. */ Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
...@@ -1166,6 +1195,8 @@ vect_is_duplicate_ddr (VEC (ddr_p, heap) * may_alias_ddrs, ddr_p ddr_new) ...@@ -1166,6 +1195,8 @@ vect_is_duplicate_ddr (VEC (ddr_p, heap) * may_alias_ddrs, ddr_p ddr_new)
static bool static bool
vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo) vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
{ {
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
if (vect_print_dump_info (REPORT_DR_DETAILS)) if (vect_print_dump_info (REPORT_DR_DETAILS))
{ {
fprintf (vect_dump, "mark for run-time aliasing test between "); fprintf (vect_dump, "mark for run-time aliasing test between ");
...@@ -1174,6 +1205,14 @@ vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo) ...@@ -1174,6 +1205,14 @@ vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM); print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
} }
/* FORNOW: We don't support versioning with outer-loop vectorization. */
if (loop->inner)
{
if (vect_print_dump_info (REPORT_DR_DETAILS))
fprintf (vect_dump, "versioning not yet supported for outer-loops.");
return false;
}
/* Do not add to the list duplicate ddrs. */ /* Do not add to the list duplicate ddrs. */
if (vect_is_duplicate_ddr (LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr)) if (vect_is_duplicate_ddr (LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr))
return true; return true;
...@@ -1805,7 +1844,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1805,7 +1844,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4) all misaligned data refs with a known misalignment are supported, and 4) all misaligned data refs with a known misalignment are supported, and
5) the number of runtime alignment checks is within reason. */ 5) the number of runtime alignment checks is within reason. */
do_versioning = flag_tree_vect_loop_version && (!optimize_size); do_versioning =
flag_tree_vect_loop_version
&& (!optimize_size)
&& (!loop->inner); /* FORNOW */
if (do_versioning) if (do_versioning)
{ {
...@@ -2188,6 +2230,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo) ...@@ -2188,6 +2230,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo)
{ {
tree stmt; tree stmt;
stmt_vec_info stmt_info; stmt_vec_info stmt_info;
basic_block bb;
if (!dr || !DR_REF (dr)) if (!dr || !DR_REF (dr))
{ {
...@@ -2200,6 +2243,16 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo) ...@@ -2200,6 +2243,16 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo)
stmt = DR_STMT (dr); stmt = DR_STMT (dr);
stmt_info = vinfo_for_stmt (stmt); stmt_info = vinfo_for_stmt (stmt);
/* If outer-loop vectorization: we don't yet support datarefs
in the innermost loop. */
bb = bb_for_stmt (stmt);
if (bb->loop_father != LOOP_VINFO_LOOP (loop_vinfo))
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
fprintf (vect_dump, "not vectorized: data-ref in nested loop");
return false;
}
if (STMT_VINFO_DATA_REF (stmt_info)) if (STMT_VINFO_DATA_REF (stmt_info))
{ {
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
...@@ -2287,11 +2340,13 @@ vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt, ...@@ -2287,11 +2340,13 @@ vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
/* This is the last stmt in a sequence that was detected as a /* This is the last stmt in a sequence that was detected as a
pattern that can potentially be vectorized. Don't mark the stmt pattern that can potentially be vectorized. Don't mark the stmt
as relevant/live because it's not going to vectorized. as relevant/live because it's not going to be vectorized.
Instead mark the pattern-stmt that replaces it. */ Instead mark the pattern-stmt that replaces it. */
pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live."); fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
stmt_info = vinfo_for_stmt (pattern_stmt); stmt_info = vinfo_for_stmt (pattern_stmt);
gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
save_relevant = STMT_VINFO_RELEVANT (stmt_info); save_relevant = STMT_VINFO_RELEVANT (stmt_info);
...@@ -2341,7 +2396,8 @@ vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo, ...@@ -2341,7 +2396,8 @@ vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
*live_p = false; *live_p = false;
/* cond stmt other than loop exit cond. */ /* cond stmt other than loop exit cond. */
if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo))) if (is_ctrl_stmt (stmt)
&& STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
*relevant = vect_used_in_loop; *relevant = vect_used_in_loop;
/* changing memory. */ /* changing memory. */
...@@ -2398,6 +2454,8 @@ vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo, ...@@ -2398,6 +2454,8 @@ vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
of the respective DEF_STMT is left unchanged. of the respective DEF_STMT is left unchanged.
- case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
skip DEF_STMT cause it had already been processed. skip DEF_STMT cause it had already been processed.
- case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
be modified accordingly.
Return true if everything is as expected. Return false otherwise. */ Return true if everything is as expected. Return false otherwise. */
...@@ -2408,7 +2466,7 @@ process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p, ...@@ -2408,7 +2466,7 @@ process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
stmt_vec_info dstmt_vinfo; stmt_vec_info dstmt_vinfo;
basic_block def_bb; basic_block bb, def_bb;
tree def, def_stmt; tree def, def_stmt;
enum vect_def_type dt; enum vect_def_type dt;
...@@ -2429,17 +2487,27 @@ process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p, ...@@ -2429,17 +2487,27 @@ process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
def_bb = bb_for_stmt (def_stmt); def_bb = bb_for_stmt (def_stmt);
if (!flow_bb_inside_loop_p (loop, def_bb)) if (!flow_bb_inside_loop_p (loop, def_bb))
return true; {
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "def_stmt is out of loop.");
return true;
}
/* case 2: A reduction phi defining a reduction stmt (DEF_STMT). DEF_STMT /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
must have already been processed, so we just check that everything is as DEF_STMT must have already been processed, because this should be the
expected, and we are done. */ only way that STMT, which is a reduction-phi, was put in the worklist,
as there should be no other uses for DEF_STMT in the loop. So we just
check that everything is as expected, and we are done. */
dstmt_vinfo = vinfo_for_stmt (def_stmt); dstmt_vinfo = vinfo_for_stmt (def_stmt);
bb = bb_for_stmt (stmt);
if (TREE_CODE (stmt) == PHI_NODE if (TREE_CODE (stmt) == PHI_NODE
&& STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
&& TREE_CODE (def_stmt) != PHI_NODE && TREE_CODE (def_stmt) != PHI_NODE
&& STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def) && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
&& bb->loop_father == def_bb->loop_father)
{ {
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo)) if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo)); dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction); gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
...@@ -2448,6 +2516,73 @@ process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p, ...@@ -2448,6 +2516,73 @@ process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
return true; return true;
} }
/* case 3a: outer-loop stmt defining an inner-loop stmt:
outer-loop-header-bb:
d = def_stmt
inner-loop:
stmt # use (d)
outer-loop-tail-bb:
... */
if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
switch (relevant)
{
case vect_unused_in_loop:
relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
vect_used_by_reduction : vect_unused_in_loop;
break;
case vect_used_in_outer_by_reduction:
relevant = vect_used_by_reduction;
break;
case vect_used_in_outer:
relevant = vect_used_in_loop;
break;
case vect_used_by_reduction:
case vect_used_in_loop:
break;
default:
gcc_unreachable ();
}
}
/* case 3b: inner-loop stmt defining an outer-loop stmt:
outer-loop-header-bb:
...
inner-loop:
d = def_stmt
outer-loop-tail-bb:
stmt # use (d) */
else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
switch (relevant)
{
case vect_unused_in_loop:
relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
vect_used_in_outer_by_reduction : vect_unused_in_loop;
break;
case vect_used_in_outer_by_reduction:
case vect_used_in_outer:
break;
case vect_used_by_reduction:
relevant = vect_used_in_outer_by_reduction;
break;
case vect_used_in_loop:
relevant = vect_used_in_outer;
break;
default:
gcc_unreachable ();
}
}
vect_mark_relevant (worklist, def_stmt, relevant, live_p); vect_mark_relevant (worklist, def_stmt, relevant, live_p);
return true; return true;
} }
...@@ -2556,25 +2691,38 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) ...@@ -2556,25 +2691,38 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
identify stmts that are used solely by a reduction, and therefore the identify stmts that are used solely by a reduction, and therefore the
order of the results that they produce does not have to be kept. order of the results that they produce does not have to be kept.
Reduction phis are expected to be used by a reduction stmt; Other Reduction phis are expected to be used by a reduction stmt, or by
reduction stmts are expected to be unused in the loop. These are the in an outer loop; Other reduction stmts are expected to be
expected values of "relevant" for reduction phis/stmts in the loop: in the loop, and possibly used by a stmt in an outer loop.
Here are the expected values of "relevant" for reduction phis/stmts:
relevance: phi stmt relevance: phi stmt
vect_unused_in_loop ok vect_unused_in_loop ok
vect_used_in_outer_by_reduction ok ok
vect_used_in_outer ok ok
vect_used_by_reduction ok vect_used_by_reduction ok
vect_used_in_loop */ vect_used_in_loop */
if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
{ {
switch (relevant) enum vect_relevant tmp_relevant = relevant;
switch (tmp_relevant)
{ {
case vect_unused_in_loop: case vect_unused_in_loop:
gcc_assert (TREE_CODE (stmt) != PHI_NODE); gcc_assert (TREE_CODE (stmt) != PHI_NODE);
relevant = vect_used_by_reduction;
break; break;
case vect_used_in_outer_by_reduction:
case vect_used_in_outer:
gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
&& TREE_CODE (stmt) != DOT_PROD_EXPR);
break;
case vect_used_by_reduction: case vect_used_by_reduction:
if (TREE_CODE (stmt) == PHI_NODE) if (TREE_CODE (stmt) == PHI_NODE)
break; break;
/* fall through */
case vect_used_in_loop: case vect_used_in_loop:
default: default:
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
...@@ -2582,7 +2730,6 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) ...@@ -2582,7 +2730,6 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
VEC_free (tree, heap, worklist); VEC_free (tree, heap, worklist);
return false; return false;
} }
relevant = vect_used_by_reduction;
live_p = false; live_p = false;
} }
...@@ -2724,11 +2871,39 @@ vect_get_loop_niters (struct loop *loop, tree *number_of_iterations) ...@@ -2724,11 +2871,39 @@ vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
} }
/* Function vect_analyze_loop_1.
Apply a set of analyses on LOOP, and create a loop_vec_info struct
for it. The different analyses will record information in the
loop_vec_info struct. This is a subset of the analyses applied in
vect_analyze_loop, to be applied on an inner-loop nested in the loop
that is now considered for (outer-loop) vectorization. */
static loop_vec_info
vect_analyze_loop_1 (struct loop *loop)
{
loop_vec_info loop_vinfo;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
/* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
loop_vinfo = vect_analyze_loop_form (loop);
if (!loop_vinfo)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad inner-loop form.");
return NULL;
}
return loop_vinfo;
}
/* Function vect_analyze_loop_form. /* Function vect_analyze_loop_form.
Verify the following restrictions (some may be relaxed in the future): Verify that certain CFG restrictions hold, including:
- it's an inner-most loop
- number of BBs = 2 (which are the loop header and the latch)
- the loop has a pre-header - the loop has a pre-header
- the loop has a single entry and exit - the loop has a single entry and exit
- the loop exit condition is simple enough, and the number of iterations - the loop exit condition is simple enough, and the number of iterations
...@@ -2740,31 +2915,134 @@ vect_analyze_loop_form (struct loop *loop) ...@@ -2740,31 +2915,134 @@ vect_analyze_loop_form (struct loop *loop)
loop_vec_info loop_vinfo; loop_vec_info loop_vinfo;
tree loop_cond; tree loop_cond;
tree number_of_iterations = NULL; tree number_of_iterations = NULL;
loop_vec_info inner_loop_vinfo = NULL;
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_analyze_loop_form ==="); fprintf (vect_dump, "=== vect_analyze_loop_form ===");
if (loop->inner) /* Different restrictions apply when we are considering an inner-most loop,
vs. an outer (nested) loop.
(FORNOW. May want to relax some of these restrictions in the future). */
if (!loop->inner)
{ {
if (vect_print_dump_info (REPORT_OUTER_LOOPS)) /* Inner-most loop. We currently require that the number of BBs is
fprintf (vect_dump, "not vectorized: nested loop."); exactly 2 (the header and latch). Vectorizable inner-most loops
look like this:
(pre-header)
|
header <--------+
| | |
| +--> latch --+
|
(exit-bb) */
if (loop->num_nodes != 2)
{
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump, "not vectorized: too many BBs in loop.");
return NULL;
}
if (empty_block_p (loop->header))
{
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump, "not vectorized: empty loop.");
return NULL; return NULL;
} }
}
else
{
struct loop *innerloop = loop->inner;
edge backedge, entryedge;
/* Nested loop. We currently require that the loop is doubly-nested,
contains a single inner loop, and the number of BBs is exactly 5.
Vectorizable outer-loops look like this:
(pre-header)
|
header <---+
| |
inner-loop |
| |
tail ------+
|
(exit-bb)
The inner-loop has the properties expected of inner-most loops
as described above. */
if ((loop->inner)->inner || (loop->inner)->next)
{
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump, "not vectorized: multiple nested loops.");
return NULL;
}
/* Analyze the inner-loop. */
inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
if (!inner_loop_vinfo)
{
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump, "not vectorized: Bad inner loop.");
return NULL;
}
if (!expr_invariant_in_loop_p (loop,
LOOP_VINFO_NITERS (inner_loop_vinfo)))
{
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump,
"not vectorized: inner-loop count not invariant.");
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
if (loop->num_nodes != 5)
{
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump, "not vectorized: too many BBs in loop.");
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
backedge = EDGE_PRED (innerloop->header, 1);
entryedge = EDGE_PRED (innerloop->header, 0);
if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
{
backedge = EDGE_PRED (innerloop->header, 0);
entryedge = EDGE_PRED (innerloop->header, 1);
}
if (entryedge->src != loop->header
|| !single_exit (innerloop)
|| single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
{
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Considering outer-loop vectorization.");
}
if (!single_exit (loop) if (!single_exit (loop)
|| loop->num_nodes != 2
|| EDGE_COUNT (loop->header->preds) != 2) || EDGE_COUNT (loop->header->preds) != 2)
{ {
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
{ {
if (!single_exit (loop)) if (!single_exit (loop))
fprintf (vect_dump, "not vectorized: multiple exits."); fprintf (vect_dump, "not vectorized: multiple exits.");
else if (loop->num_nodes != 2)
fprintf (vect_dump, "not vectorized: too many BBs in loop.");
else if (EDGE_COUNT (loop->header->preds) != 2) else if (EDGE_COUNT (loop->header->preds) != 2)
fprintf (vect_dump, "not vectorized: too many incoming edges."); fprintf (vect_dump, "not vectorized: too many incoming edges.");
} }
if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL; return NULL;
} }
...@@ -2777,6 +3055,8 @@ vect_analyze_loop_form (struct loop *loop) ...@@ -2777,6 +3055,8 @@ vect_analyze_loop_form (struct loop *loop)
{ {
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump, "not vectorized: unexpected loop form."); fprintf (vect_dump, "not vectorized: unexpected loop form.");
if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL; return NULL;
} }
...@@ -2794,22 +3074,19 @@ vect_analyze_loop_form (struct loop *loop) ...@@ -2794,22 +3074,19 @@ vect_analyze_loop_form (struct loop *loop)
{ {
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump, "not vectorized: abnormal loop exit edge."); fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL; return NULL;
} }
} }
if (empty_block_p (loop->header))
{
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump, "not vectorized: empty loop.");
return NULL;
}
loop_cond = vect_get_loop_niters (loop, &number_of_iterations); loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
if (!loop_cond) if (!loop_cond)
{ {
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump, "not vectorized: complicated exit condition."); fprintf (vect_dump, "not vectorized: complicated exit condition.");
if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL; return NULL;
} }
...@@ -2818,6 +3095,8 @@ vect_analyze_loop_form (struct loop *loop) ...@@ -2818,6 +3095,8 @@ vect_analyze_loop_form (struct loop *loop)
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump, fprintf (vect_dump,
"not vectorized: number of iterations cannot be computed."); "not vectorized: number of iterations cannot be computed.");
if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL; return NULL;
} }
...@@ -2825,7 +3104,9 @@ vect_analyze_loop_form (struct loop *loop) ...@@ -2825,7 +3104,9 @@ vect_analyze_loop_form (struct loop *loop)
{ {
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump, "Infinite number of iterations."); fprintf (vect_dump, "Infinite number of iterations.");
return false; if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
} }
if (!NITERS_KNOWN_P (number_of_iterations)) if (!NITERS_KNOWN_P (number_of_iterations))
...@@ -2840,12 +3121,19 @@ vect_analyze_loop_form (struct loop *loop) ...@@ -2840,12 +3121,19 @@ vect_analyze_loop_form (struct loop *loop)
{ {
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
fprintf (vect_dump, "not vectorized: number of iterations = 0."); fprintf (vect_dump, "not vectorized: number of iterations = 0.");
if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, false);
return NULL; return NULL;
} }
loop_vinfo = new_loop_vec_info (loop); loop_vinfo = new_loop_vec_info (loop);
LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
/* CHECKME: May want to keep it around it in the future. */
if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, false);
gcc_assert (!loop->aux); gcc_assert (!loop->aux);
loop->aux = loop_vinfo; loop->aux = loop_vinfo;
...@@ -2867,6 +3155,15 @@ vect_analyze_loop (struct loop *loop) ...@@ -2867,6 +3155,15 @@ vect_analyze_loop (struct loop *loop)
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "===== analyze_loop_nest ====="); fprintf (vect_dump, "===== analyze_loop_nest =====");
if (loop_outer (loop)
&& loop_vec_info_for_loop (loop_outer (loop))
&& LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "outer-loop already vectorized.");
return NULL;
}
/* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */ /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
loop_vinfo = vect_analyze_loop_form (loop); loop_vinfo = vect_analyze_loop_form (loop);
...@@ -2888,7 +3185,7 @@ vect_analyze_loop (struct loop *loop) ...@@ -2888,7 +3185,7 @@ vect_analyze_loop (struct loop *loop)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data references."); fprintf (vect_dump, "bad data references.");
destroy_loop_vec_info (loop_vinfo); destroy_loop_vec_info (loop_vinfo, true);
return NULL; return NULL;
} }
...@@ -2906,7 +3203,7 @@ vect_analyze_loop (struct loop *loop) ...@@ -2906,7 +3203,7 @@ vect_analyze_loop (struct loop *loop)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "unexpected pattern."); fprintf (vect_dump, "unexpected pattern.");
destroy_loop_vec_info (loop_vinfo); destroy_loop_vec_info (loop_vinfo, true);
return NULL; return NULL;
} }
...@@ -2918,7 +3215,7 @@ vect_analyze_loop (struct loop *loop) ...@@ -2918,7 +3215,7 @@ vect_analyze_loop (struct loop *loop)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data alignment."); fprintf (vect_dump, "bad data alignment.");
destroy_loop_vec_info (loop_vinfo); destroy_loop_vec_info (loop_vinfo, true);
return NULL; return NULL;
} }
...@@ -2927,7 +3224,7 @@ vect_analyze_loop (struct loop *loop) ...@@ -2927,7 +3224,7 @@ vect_analyze_loop (struct loop *loop)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "can't determine vectorization factor."); fprintf (vect_dump, "can't determine vectorization factor.");
destroy_loop_vec_info (loop_vinfo); destroy_loop_vec_info (loop_vinfo, true);
return NULL; return NULL;
} }
...@@ -2939,7 +3236,7 @@ vect_analyze_loop (struct loop *loop) ...@@ -2939,7 +3236,7 @@ vect_analyze_loop (struct loop *loop)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data dependence."); fprintf (vect_dump, "bad data dependence.");
destroy_loop_vec_info (loop_vinfo); destroy_loop_vec_info (loop_vinfo, true);
return NULL; return NULL;
} }
...@@ -2951,7 +3248,7 @@ vect_analyze_loop (struct loop *loop) ...@@ -2951,7 +3248,7 @@ vect_analyze_loop (struct loop *loop)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data access."); fprintf (vect_dump, "bad data access.");
destroy_loop_vec_info (loop_vinfo); destroy_loop_vec_info (loop_vinfo, true);
return NULL; return NULL;
} }
...@@ -2963,7 +3260,7 @@ vect_analyze_loop (struct loop *loop) ...@@ -2963,7 +3260,7 @@ vect_analyze_loop (struct loop *loop)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data alignment."); fprintf (vect_dump, "bad data alignment.");
destroy_loop_vec_info (loop_vinfo); destroy_loop_vec_info (loop_vinfo, true);
return NULL; return NULL;
} }
...@@ -2975,7 +3272,7 @@ vect_analyze_loop (struct loop *loop) ...@@ -2975,7 +3272,7 @@ vect_analyze_loop (struct loop *loop)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad operation or unsupported loop bound."); fprintf (vect_dump, "bad operation or unsupported loop bound.");
destroy_loop_vec_info (loop_vinfo); destroy_loop_vec_info (loop_vinfo, true);
return NULL; return NULL;
} }
......
...@@ -148,7 +148,14 @@ widened_name_p (tree name, tree use_stmt, tree *half_type, tree *def_stmt) ...@@ -148,7 +148,14 @@ widened_name_p (tree name, tree use_stmt, tree *half_type, tree *def_stmt)
* Return value: A new stmt that will be used to replace the sequence of * Return value: A new stmt that will be used to replace the sequence of
stmts that constitute the pattern. In this case it will be: stmts that constitute the pattern. In this case it will be:
WIDEN_DOT_PRODUCT <x_t, y_t, sum_0> WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
*/
Note: The dot-prod idiom is a widening reduction pattern that is
vectorized without preserving all the intermediate results. It
produces only N/2 (widened) results (by summing up pairs of
intermediate results) rather than all N results. Therefore, we
cannot allow this pattern when we want to get all the results and in
the correct order (as is the case when this computation is in an
inner-loop nested in an outer-loop that us being vectorized). */
static tree static tree
vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out) vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out)
...@@ -160,6 +167,8 @@ vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out) ...@@ -160,6 +167,8 @@ vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out)
tree type, half_type; tree type, half_type;
tree pattern_expr; tree pattern_expr;
tree prod_type; tree prod_type;
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_info);
if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT) if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT)
return NULL; return NULL;
...@@ -242,6 +251,10 @@ vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out) ...@@ -242,6 +251,10 @@ vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out)
gcc_assert (stmt_vinfo); gcc_assert (stmt_vinfo);
if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_loop_def) if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_loop_def)
return NULL; return NULL;
/* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
inside the loop (in case we are analyzing an outer-loop). */
if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
return NULL;
expr = GIMPLE_STMT_OPERAND (stmt, 1); expr = GIMPLE_STMT_OPERAND (stmt, 1);
if (TREE_CODE (expr) != MULT_EXPR) if (TREE_CODE (expr) != MULT_EXPR)
return NULL; return NULL;
...@@ -295,6 +308,16 @@ vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out) ...@@ -295,6 +308,16 @@ vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out)
fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: "); fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
print_generic_expr (vect_dump, pattern_expr, TDF_SLIM); print_generic_expr (vect_dump, pattern_expr, TDF_SLIM);
} }
/* We don't allow changing the order of the computation in the inner-loop
when doing outer-loop vectorization. */
if (nested_in_vect_loop_p (loop, last_stmt))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vect_recog_dot_prod_pattern: not allowed.");
return NULL;
}
return pattern_expr; return pattern_expr;
} }
...@@ -521,7 +544,14 @@ vect_recog_pow_pattern (tree last_stmt, tree *type_in, tree *type_out) ...@@ -521,7 +544,14 @@ vect_recog_pow_pattern (tree last_stmt, tree *type_in, tree *type_out)
* Return value: A new stmt that will be used to replace the sequence of * Return value: A new stmt that will be used to replace the sequence of
stmts that constitute the pattern. In this case it will be: stmts that constitute the pattern. In this case it will be:
WIDEN_SUM <x_t, sum_0> WIDEN_SUM <x_t, sum_0>
*/
Note: The widneing-sum idiom is a widening reduction pattern that is
vectorized without preserving all the intermediate results. It
produces only N/2 (widened) results (by summing up pairs of
intermediate results) rather than all N results. Therefore, we
cannot allow this pattern when we want to get all the results and in
the correct order (as is the case when this computation is in an
inner-loop nested in an outer-loop that us being vectorized). */
static tree static tree
vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out) vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out)
...@@ -531,6 +561,8 @@ vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out) ...@@ -531,6 +561,8 @@ vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out)
stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
tree type, half_type; tree type, half_type;
tree pattern_expr; tree pattern_expr;
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_info);
if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT) if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT)
return NULL; return NULL;
...@@ -580,6 +612,16 @@ vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out) ...@@ -580,6 +612,16 @@ vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out)
fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: "); fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
print_generic_expr (vect_dump, pattern_expr, TDF_SLIM); print_generic_expr (vect_dump, pattern_expr, TDF_SLIM);
} }
/* We don't allow changing the order of the computation in the inner-loop
when doing outer-loop vectorization. */
if (nested_in_vect_loop_p (loop, last_stmt))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vect_recog_widen_sum_pattern: not allowed.");
return NULL;
}
return pattern_expr; return pattern_expr;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment