tree-vect-patterns.c 83.4 KB
Newer Older
1
/* Analysis Utilities for Loop Vectorization.
jakub's avatar
jakub committed
2
   Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
jakub's avatar
jakub committed
3
   Free Software Foundation, Inc.
4 5 6 7 8 9
   Contributed by Dorit Nuzman <dorit@il.ibm.com>

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
10
Software Foundation; either version 3, or (at your option) any later
11 12 13 14 15 16 17 18
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
19 20
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */
21 22 23 24 25 26 27 28 29

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "ggc.h"
#include "tree.h"
#include "target.h"
#include "basic-block.h"
30
#include "gimple-pretty-print.h"
31 32 33 34 35 36 37 38 39
#include "tree-flow.h"
#include "tree-dump.h"
#include "cfgloop.h"
#include "expr.h"
#include "optabs.h"
#include "params.h"
#include "tree-data-ref.h"
#include "tree-vectorizer.h"
#include "recog.h"
40
#include "diagnostic-core.h"
41 42

/* Pattern recognition functions  */
irar's avatar
 
irar committed
43 44 45 46 47 48 49
static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
					    tree *);
static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
					     tree *);
static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
					   tree *);
static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
irar's avatar
 
irar committed
50 51
static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
                                                 tree *);
irar's avatar
 
irar committed
52 53
static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
	                                tree *, tree *);
54 55
static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
						      tree *, tree *);
56 57
static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
					       tree *, tree *);
jakub's avatar
jakub committed
58 59
static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
						  tree *, tree *);
jakub's avatar
jakub committed
60
static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61 62 63
static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
	vect_recog_widen_mult_pattern,
	vect_recog_widen_sum_pattern,
64
	vect_recog_dot_prod_pattern,
irar's avatar
 
irar committed
65
	vect_recog_pow_pattern,
jakub's avatar
jakub committed
66
	vect_recog_over_widening_pattern,
irar's avatar
 
irar committed
67
	vect_recog_widen_shift_pattern,
68
	vect_recog_vector_vector_shift_pattern,
69
	vect_recog_sdivmod_pow2_pattern,
jakub's avatar
jakub committed
70 71
	vect_recog_mixed_size_cond_pattern,
	vect_recog_bool_pattern};
72

jakub's avatar
jakub committed
73 74 75
static inline void
append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
{
76 77
  gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
				      stmt);
jakub's avatar
jakub committed
78 79 80 81 82 83 84 85 86
}

static inline void
new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
{
  STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
  append_pattern_def_seq (stmt_info, stmt);
}

87 88 89 90 91
/* Function widened_name_p

   Check whether NAME, an ssa-name used in USE_STMT,
   is a result of a type-promotion, such that:
     DEF_STMT: NAME = NOP (name0)
hjl's avatar
hjl committed
92
   where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
irar's avatar
 
irar committed
93 94
   If CHECK_SIGN is TRUE, check that either both types are signed or both are
   unsigned.  */
95 96

static bool
irar's avatar
 
irar committed
97 98
widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
		bool check_sign)
99 100
{
  tree dummy;
101
  gimple dummy_gimple;
102 103 104 105 106 107 108 109 110 111
  loop_vec_info loop_vinfo;
  stmt_vec_info stmt_vinfo;
  tree type = TREE_TYPE (name);
  tree oprnd0;
  enum vect_def_type dt;
  tree def;

  stmt_vinfo = vinfo_for_stmt (use_stmt);
  loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);

irar's avatar
 
irar committed
112
  if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
113 114
    return false;

irar's avatar
 
irar committed
115 116
  if (dt != vect_internal_def
      && dt != vect_external_def && dt != vect_constant_def)
117 118 119 120 121
    return false;

  if (! *def_stmt)
    return false;

122
  if (!is_gimple_assign (*def_stmt))
123 124
    return false;

125
  if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
126 127
    return false;

128
  oprnd0 = gimple_assign_rhs1 (*def_stmt);
129 130 131

  *half_type = TREE_TYPE (oprnd0);
  if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
irar's avatar
 
irar committed
132
      || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
133 134 135
      || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
    return false;

hjl's avatar
hjl committed
136
  if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
irar's avatar
 
irar committed
137
                           &dt))
138 139 140 141 142
    return false;

  return true;
}

143 144 145 146 147 148 149 150 151 152 153 154
/* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
   is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */

static tree
vect_recog_temp_ssa_var (tree type, gimple stmt)
{
  tree var = create_tmp_var (type, "patt");

  add_referenced_var (var);
  var = make_ssa_name (var, stmt);
  return var;
}
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172

/* Function vect_recog_dot_prod_pattern

   Try to find the following pattern:

     type x_t, y_t;
     TYPE1 prod;
     TYPE2 sum = init;
   loop:
     sum_0 = phi <init, sum_1>
     S1  x_t = ...
     S2  y_t = ...
     S3  x_T = (TYPE1) x_t;
     S4  y_T = (TYPE1) y_t;
     S5  prod = x_T * y_T;
     [S6  prod = (TYPE2) prod;  #optional]
     S7  sum_1 = prod + sum_0;

hjl's avatar
hjl committed
173 174
   where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
   same size of 'TYPE1' or bigger. This is a special case of a reduction
175
   computation.
hjl's avatar
hjl committed
176

177 178
   Input:

irar's avatar
 
irar committed
179 180 181
   * STMTS: Contains a stmt from which the pattern search begins.  In the
   example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
   will be detected.
182 183 184 185 186 187 188 189 190 191

   Output:

   * TYPE_IN: The type of the input arguments to the pattern.

   * TYPE_OUT: The type of the output  of this pattern.

   * Return value: A new stmt that will be used to replace the sequence of
   stmts that constitute the pattern. In this case it will be:
        WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
192 193 194 195 196 197 198 199

   Note: The dot-prod idiom is a widening reduction pattern that is
         vectorized without preserving all the intermediate results. It
         produces only N/2 (widened) results (by summing up pairs of
         intermediate results) rather than all N results.  Therefore, we
         cannot allow this pattern when we want to get all the results and in
         the correct order (as is the case when this computation is in an
         inner-loop nested in an outer-loop that us being vectorized).  */
200

201
static gimple
irar's avatar
 
irar committed
202 203
vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
			     tree *type_out)
204
{
irar's avatar
 
irar committed
205
  gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
206 207
  tree oprnd0, oprnd1;
  tree oprnd00, oprnd01;
irar's avatar
 
irar committed
208
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
209
  tree type, half_type;
210
  gimple pattern_stmt;
211
  tree prod_type;
212 213
  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
214
  tree var;
215

irar's avatar
 
irar committed
216
  if (!is_gimple_assign (last_stmt))
217 218
    return NULL;

irar's avatar
 
irar committed
219
  type = gimple_expr_type (last_stmt);
220

hjl's avatar
hjl committed
221
  /* Look for the following pattern
222 223
          DX = (TYPE1) X;
          DY = (TYPE1) Y;
hjl's avatar
hjl committed
224
          DPROD = DX * DY;
225 226
          DDPROD = (TYPE2) DPROD;
          sum_1 = DDPROD + sum_0;
hjl's avatar
hjl committed
227
     In which
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
     - DX is double the size of X
     - DY is double the size of Y
     - DX, DY, DPROD all have the same type
     - sum is the same size of DPROD or bigger
     - sum has been recognized as a reduction variable.

     This is equivalent to:
       DPROD = X w* Y;          #widen mult
       sum_1 = DPROD w+ sum_0;  #widen summation
     or
       DPROD = X w* Y;          #widen mult
       sum_1 = DPROD + sum_0;   #summation
   */

  /* Starting from LAST_STMT, follow the defs of its uses in search
     of the above pattern.  */

irar's avatar
 
irar committed
245
  if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
246 247 248 249 250 251 252
    return NULL;

  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
    {
      /* Has been detected as widening-summation?  */

      stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
253 254
      type = gimple_expr_type (stmt);
      if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
255
        return NULL;
256 257
      oprnd0 = gimple_assign_rhs1 (stmt);
      oprnd1 = gimple_assign_rhs2 (stmt);
258 259 260 261
      half_type = TREE_TYPE (oprnd0);
    }
  else
    {
262
      gimple def_stmt;
263 264 265

      if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
        return NULL;
irar's avatar
 
irar committed
266 267
      oprnd0 = gimple_assign_rhs1 (last_stmt);
      oprnd1 = gimple_assign_rhs2 (last_stmt);
268 269
      if (!types_compatible_p (TREE_TYPE (oprnd0), type)
	  || !types_compatible_p (TREE_TYPE (oprnd1), type))
270
        return NULL;
irar's avatar
 
irar committed
271
      stmt = last_stmt;
272

irar's avatar
 
irar committed
273
      if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
274 275
        {
          stmt = def_stmt;
276
          oprnd0 = gimple_assign_rhs1 (stmt);
277 278 279 280 281
        }
      else
        half_type = type;
    }

irar's avatar
 
irar committed
282
  /* So far so good.  Since last_stmt was detected as a (summation) reduction,
283 284 285
     we know that oprnd1 is the reduction variable (defined by a loop-header
     phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
     Left to check that oprnd0 is defined by a (widen_)mult_expr  */
286 287
  if (TREE_CODE (oprnd0) != SSA_NAME)
    return NULL;
288 289 290

  prod_type = half_type;
  stmt = SSA_NAME_DEF_STMT (oprnd0);
291 292

  /* It could not be the dot_prod pattern if the stmt is outside the loop.  */
irar's avatar
 
irar committed
293
  if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
294 295
    return NULL;

hjl's avatar
hjl committed
296
  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
297
     inside the loop (in case we are analyzing an outer-loop).  */
298
  if (!is_gimple_assign (stmt))
hjl's avatar
hjl committed
299
    return NULL;
300 301
  stmt_vinfo = vinfo_for_stmt (stmt);
  gcc_assert (stmt_vinfo);
irar's avatar
 
irar committed
302
  if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
dorit's avatar
dorit committed
303
    return NULL;
304
  if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
305 306 307 308 309 310
    return NULL;
  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
    {
      /* Has been detected as a widening multiplication?  */

      stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
311
      if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
312 313 314
        return NULL;
      stmt_vinfo = vinfo_for_stmt (stmt);
      gcc_assert (stmt_vinfo);
irar's avatar
 
irar committed
315
      gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
316 317
      oprnd00 = gimple_assign_rhs1 (stmt);
      oprnd01 = gimple_assign_rhs2 (stmt);
318 319 320 321
    }
  else
    {
      tree half_type0, half_type1;
322
      gimple def_stmt;
323 324
      tree oprnd0, oprnd1;

325 326
      oprnd0 = gimple_assign_rhs1 (stmt);
      oprnd1 = gimple_assign_rhs2 (stmt);
327 328
      if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
          || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
329
        return NULL;
irar's avatar
 
irar committed
330
      if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
331
        return NULL;
332
      oprnd00 = gimple_assign_rhs1 (def_stmt);
irar's avatar
 
irar committed
333
      if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
334
        return NULL;
335
      oprnd01 = gimple_assign_rhs1 (def_stmt);
336
      if (!types_compatible_p (half_type0, half_type1))
337 338 339 340 341 342 343 344
        return NULL;
      if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
	return NULL;
    }

  half_type = TREE_TYPE (oprnd00);
  *type_in = half_type;
  *type_out = type;
hjl's avatar
hjl committed
345

346
  /* Pattern detected. Create a stmt to be used to replace the pattern: */
347
  var = vect_recog_temp_ssa_var (type, NULL);
348 349
  pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
						oprnd00, oprnd01, oprnd1);
hjl's avatar
hjl committed
350

351 352 353
  if (vect_print_dump_info (REPORT_DETAILS))
    {
      fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
354
      print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
355
    }
356 357 358

  /* We don't allow changing the order of the computation in the inner-loop
     when doing outer-loop vectorization.  */
irar's avatar
 
irar committed
359
  gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
360

361
  return pattern_stmt;
362
}
hjl's avatar
hjl committed
363

irar's avatar
 
irar committed
364

irar's avatar
 
irar committed
365 366 367 368 369
/* Handle widening operation by a constant.  At the moment we support MULT_EXPR
   and LSHIFT_EXPR.

   For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
   we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
irar's avatar
 
irar committed
370 371

   Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
irar's avatar
 
irar committed
372 373 374 375
   HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
   that satisfies the above restrictions,  we can perform a widening opeartion
   from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
   with a_it = (interm_type) a_t;  */
irar's avatar
 
irar committed
376 377

static bool
irar's avatar
 
irar committed
378 379 380 381
vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
		               tree const_oprnd, tree *oprnd,
   		               VEC (gimple, heap) **stmts, tree type,
			       tree *half_type, gimple def_stmt)
irar's avatar
 
irar committed
382 383 384
{
  tree new_type, new_oprnd, tmp;
  gimple new_stmt;
irar's avatar
 
irar committed
385 386
  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
irar's avatar
 
irar committed
387

irar's avatar
 
irar committed
388 389 390 391 392 393 394 395
  if (code != MULT_EXPR && code != LSHIFT_EXPR)
    return false;

  if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
        || (code == LSHIFT_EXPR
            && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
	    	!= 1))
      && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
irar's avatar
 
irar committed
396 397 398 399 400 401 402
    {
      /* CONST_OPRND is a constant of HALF_TYPE.  */
      *oprnd = gimple_assign_rhs1 (def_stmt);
      return true;
    }

  if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
irar's avatar
 
irar committed
403 404
      || !gimple_bb (def_stmt)
      || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
irar's avatar
 
irar committed
405 406 407
      || !vinfo_for_stmt (def_stmt))
    return false;

irar's avatar
 
irar committed
408
  /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
irar's avatar
 
irar committed
409 410 411
     a type 2 times bigger than HALF_TYPE.  */
  new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
                                             TYPE_UNSIGNED (type));
irar's avatar
 
irar committed
412 413 414
  if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
      || (code == LSHIFT_EXPR
          && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
irar's avatar
 
irar committed
415 416
    return false;

irar's avatar
 
irar committed
417
  /* Use NEW_TYPE for widening operation.  */
irar's avatar
 
irar committed
418 419 420 421 422 423 424 425 426
  if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
    {
      new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
      /* Check if the already created pattern stmt is what we need.  */
      if (!is_gimple_assign (new_stmt)
          || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
          || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
        return false;

irar's avatar
 
irar committed
427
      VEC_safe_push (gimple, heap, *stmts, def_stmt);
irar's avatar
 
irar committed
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
      *oprnd = gimple_assign_lhs (new_stmt);
    }
  else
    {
      /* Create a_T = (NEW_TYPE) a_t;  */
      *oprnd = gimple_assign_rhs1 (def_stmt);
      tmp = create_tmp_var (new_type, NULL);
      add_referenced_var (tmp);
      new_oprnd = make_ssa_name (tmp, NULL);
      new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
					       NULL_TREE);
      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
      VEC_safe_push (gimple, heap, *stmts, def_stmt);
      *oprnd = new_oprnd;
    }

  *half_type = new_type;
  return true;
}


449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
/* Function vect_recog_widen_mult_pattern

   Try to find the following pattern:

     type a_t, b_t;
     TYPE a_T, b_T, prod_T;

     S1  a_t = ;
     S2  b_t = ;
     S3  a_T = (TYPE) a_t;
     S4  b_T = (TYPE) b_t;
     S5  prod_T = a_T * b_T;

   where type 'TYPE' is at least double the size of type 'type'.

irar's avatar
 
irar committed
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
   Also detect unsgigned cases:

     unsigned type a_t, b_t;
     unsigned TYPE u_prod_T;
     TYPE a_T, b_T, prod_T;

     S1  a_t = ;
     S2  b_t = ;
     S3  a_T = (TYPE) a_t;
     S4  b_T = (TYPE) b_t;
     S5  prod_T = a_T * b_T;
     S6  u_prod_T = (unsigned TYPE) prod_T;

   and multiplication by constants:

     type a_t;
     TYPE a_T, prod_T;

     S1  a_t = ;
     S3  a_T = (TYPE) a_t;
     S5  prod_T = a_T * CONST;

irar's avatar
 
irar committed
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
   A special case of multiplication by constants is when 'TYPE' is 4 times
   bigger than 'type', but CONST fits an intermediate type 2 times smaller
   than 'TYPE'.  In that case we create an additional pattern stmt for S3
   to create a variable of the intermediate type, and perform widen-mult
   on the intermediate type as well:

     type a_t;
     interm_type a_it;
     TYPE a_T, prod_T,  prod_T';

     S1  a_t = ;
     S3  a_T = (TYPE) a_t;
           '--> a_it = (interm_type) a_t;
     S5  prod_T = a_T * CONST;
           '--> prod_T' = a_it w* CONST;
501

irar's avatar
 
irar committed
502 503 504 505 506 507 508 509
   Input/Output:

   * STMTS: Contains a stmt from which the pattern search begins.  In the
   example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
   is detected.  In case of unsigned widen-mult, the original stmt (S5) is
   replaced with S6 in STMTS.  In case of multiplication by a constant
   of an intermediate type (the last case above), STMTS also contains S3
   (inserted before S5).
510 511 512 513 514

   Output:

   * TYPE_IN: The type of the input arguments to the pattern.

irar's avatar
 
irar committed
515
   * TYPE_OUT: The type of the output of this pattern.
516 517

   * Return value: A new stmt that will be used to replace the sequence of
irar's avatar
 
irar committed
518
   stmts that constitute the pattern.  In this case it will be:
519 520 521
        WIDEN_MULT <a_t, b_t>
*/

522
static gimple
irar's avatar
 
irar committed
523 524
vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
                               tree *type_in, tree *type_out)
525
{
irar's avatar
 
irar committed
526
  gimple last_stmt = VEC_pop (gimple, *stmts);
527
  gimple def_stmt0, def_stmt1;
528 529
  tree oprnd0, oprnd1;
  tree type, half_type0, half_type1;
530
  gimple pattern_stmt;
irar's avatar
 
irar committed
531
  tree vectype, vectype_out = NULL_TREE;
532
  tree dummy;
533
  tree var;
534
  enum tree_code dummy_code;
535 536
  int dummy_int;
  VEC (tree, heap) *dummy_vec;
irar's avatar
 
irar committed
537
  bool op1_ok;
538

irar's avatar
 
irar committed
539
  if (!is_gimple_assign (last_stmt))
540 541
    return NULL;

irar's avatar
 
irar committed
542
  type = gimple_expr_type (last_stmt);
543 544 545 546

  /* Starting from LAST_STMT, follow the defs of its uses in search
     of the above pattern.  */

irar's avatar
 
irar committed
547
  if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
548 549
    return NULL;

irar's avatar
 
irar committed
550 551
  oprnd0 = gimple_assign_rhs1 (last_stmt);
  oprnd1 = gimple_assign_rhs2 (last_stmt);
552 553
  if (!types_compatible_p (TREE_TYPE (oprnd0), type)
      || !types_compatible_p (TREE_TYPE (oprnd1), type))
554 555
    return NULL;

irar's avatar
 
irar committed
556
  /* Check argument 0.  */
irar's avatar
 
irar committed
557 558
  if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
    return NULL;
irar's avatar
 
irar committed
559
  /* Check argument 1.  */
irar's avatar
 
irar committed
560
  op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
561

irar's avatar
 
irar committed
562
  if (op1_ok)
irar's avatar
 
irar committed
563 564 565 566
    {
      oprnd0 = gimple_assign_rhs1 (def_stmt0);
      oprnd1 = gimple_assign_rhs1 (def_stmt1);
    }	       
irar's avatar
 
irar committed
567
  else
irar's avatar
 
irar committed
568
    {
irar's avatar
 
irar committed
569
      if (TREE_CODE (oprnd1) == INTEGER_CST
irar's avatar
 
irar committed
570
          && TREE_CODE (half_type0) == INTEGER_TYPE
irar's avatar
 
irar committed
571 572 573
          && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
		                            &oprnd0, stmts, type,
					    &half_type0, def_stmt0))
irar's avatar
 
irar committed
574
        half_type1 = half_type0;
irar's avatar
 
irar committed
575 576 577 578 579 580 581 582 583
      else
        return NULL;
    }

  /* Handle unsigned case.  Look for
     S6  u_prod_T = (unsigned TYPE) prod_T;
     Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
  if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
    {
irar's avatar
 
irar committed
584
      tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
irar's avatar
 
irar committed
585 586 587 588 589 590 591 592 593 594 595
      imm_use_iterator imm_iter;
      use_operand_p use_p;
      int nuses = 0;
      gimple use_stmt = NULL;
      tree use_type;

      if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
        return NULL;

      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
        {
jakub's avatar
jakub committed
596 597
	  if (is_gimple_debug (USE_STMT (use_p)))
	    continue;
irar's avatar
 
irar committed
598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
          use_stmt = USE_STMT (use_p);
          nuses++;
        }

      if (nuses != 1 || !is_gimple_assign (use_stmt)
          || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
        return NULL;

      use_lhs = gimple_assign_lhs (use_stmt);
      use_type = TREE_TYPE (use_lhs);
      if (!INTEGRAL_TYPE_P (use_type)
          || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
          || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
        return NULL;

      type = use_type;
irar's avatar
 
irar committed
614
      last_stmt = use_stmt;
irar's avatar
 
irar committed
615
    }
616

617
  if (!types_compatible_p (half_type0, half_type1))
618 619 620 621 622 623 624 625
    return NULL;

  /* Pattern detected.  */
  if (vect_print_dump_info (REPORT_DETAILS))
    fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");

  /* Check target support  */
  vectype = get_vectype_for_scalar_type (half_type0);
626
  vectype_out = get_vectype_for_scalar_type (type);
627
  if (!vectype
belagod's avatar
gcc/  
belagod committed
628
      || !vectype_out
irar's avatar
 
irar committed
629
      || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
630
					  vectype_out, vectype,
631
					  &dummy, &dummy, &dummy_code,
632
					  &dummy_code, &dummy_int, &dummy_vec))
633 634 635
    return NULL;

  *type_in = vectype;
636
  *type_out = vectype_out;
637 638

  /* Pattern supported. Create a stmt to be used to replace the pattern: */
639 640 641 642
  var = vect_recog_temp_ssa_var (type, NULL);
  pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
					       oprnd1);

643
  if (vect_print_dump_info (REPORT_DETAILS))
644 645
    print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);

irar's avatar
 
irar committed
646
  VEC_safe_push (gimple, heap, *stmts, last_stmt);
647
  return pattern_stmt;
648 649 650
}


651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
/* Function vect_recog_pow_pattern

   Try to find the following pattern:

     x = POW (y, N);

   with POW being one of pow, powf, powi, powif and N being
   either 2 or 0.5.

   Input:

   * LAST_STMT: A stmt from which the pattern search begins.

   Output:

   * TYPE_IN: The type of the input arguments to the pattern.

   * TYPE_OUT: The type of the output of this pattern.

   * Return value: A new stmt that will be used to replace the sequence of
   stmts that constitute the pattern. In this case it will be:
672
        x = x * x
673
   or
674
	x = sqrt (x)
675 676
*/

677
static gimple
irar's avatar
 
irar committed
678 679
vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
			tree *type_out)
680
{
irar's avatar
 
irar committed
681
  gimple last_stmt = VEC_index (gimple, *stmts, 0);
682 683 684
  tree fn, base, exp = NULL;
  gimple stmt;
  tree var;
685

irar's avatar
 
irar committed
686
  if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
687 688
    return NULL;

irar's avatar
 
irar committed
689
  fn = gimple_call_fndecl (last_stmt);
irar's avatar
 
irar committed
690 691 692
  if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
   return NULL;

693 694 695 696 697 698
  switch (DECL_FUNCTION_CODE (fn))
    {
    case BUILT_IN_POWIF:
    case BUILT_IN_POWI:
    case BUILT_IN_POWF:
    case BUILT_IN_POW:
irar's avatar
 
irar committed
699 700
      base = gimple_call_arg (last_stmt, 0);
      exp = gimple_call_arg (last_stmt, 1);
701 702
      if (TREE_CODE (exp) != REAL_CST
	  && TREE_CODE (exp) != INTEGER_CST)
703
        return NULL;
704 705
      break;

706 707
    default:
      return NULL;
708 709 710 711 712 713 714 715 716 717 718 719
    }

  /* We now have a pow or powi builtin function call with a constant
     exponent.  */

  *type_out = NULL_TREE;

  /* Catch squaring.  */
  if ((host_integerp (exp, 0)
       && tree_low_cst (exp, 0) == 2)
      || (TREE_CODE (exp) == REAL_CST
          && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
720 721
    {
      *type_in = TREE_TYPE (base);
722 723 724 725

      var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
      stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
      return stmt;
726
    }
727 728 729 730 731 732

  /* Catch square root.  */
  if (TREE_CODE (exp) == REAL_CST
      && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
    {
      tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
733 734 735
      *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
      if (*type_in)
	{
736 737 738 739 740
	  gimple stmt = gimple_build_call (newfn, 1, base);
	  if (vectorizable_function (stmt, *type_in, *type_in)
	      != NULL_TREE)
	    {
	      var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
hjl's avatar
hjl committed
741
	      gimple_call_set_lhs (stmt, var);
742 743
	      return stmt;
	    }
744
	}
745 746
    }

747
  return NULL;
748 749 750
}


751 752 753 754
/* Function vect_recog_widen_sum_pattern

   Try to find the following pattern:

hjl's avatar
hjl committed
755
     type x_t;
756 757 758 759 760 761 762
     TYPE x_T, sum = init;
   loop:
     sum_0 = phi <init, sum_1>
     S1  x_t = *p;
     S2  x_T = (TYPE) x_t;
     S3  sum_1 = x_T + sum_0;

hjl's avatar
hjl committed
763
   where type 'TYPE' is at least double the size of type 'type', i.e - we're
764
   summing elements of type 'type' into an accumulator of type 'TYPE'. This is
765
   a special case of a reduction computation.
766 767 768 769 770

   Input:

   * LAST_STMT: A stmt from which the pattern search begins. In the example,
   when this function is called with S3, the pattern {S2,S3} will be detected.
hjl's avatar
hjl committed
771

772
   Output:
hjl's avatar
hjl committed
773

774 775 776 777 778 779 780
   * TYPE_IN: The type of the input arguments to the pattern.

   * TYPE_OUT: The type of the output of this pattern.

   * Return value: A new stmt that will be used to replace the sequence of
   stmts that constitute the pattern. In this case it will be:
        WIDEN_SUM <x_t, sum_0>
781

hjl's avatar
hjl committed
782
   Note: The widening-sum idiom is a widening reduction pattern that is
783
	 vectorized without preserving all the intermediate results. It
hjl's avatar
hjl committed
784 785 786 787
         produces only N/2 (widened) results (by summing up pairs of
	 intermediate results) rather than all N results.  Therefore, we
	 cannot allow this pattern when we want to get all the results and in
	 the correct order (as is the case when this computation is in an
788
	 inner-loop nested in an outer-loop that us being vectorized).  */
789

790
static gimple
irar's avatar
 
irar committed
791 792
vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
			      tree *type_out)
793
{
irar's avatar
 
irar committed
794
  gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
795
  tree oprnd0, oprnd1;
irar's avatar
 
irar committed
796
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
797
  tree type, half_type;
798
  gimple pattern_stmt;
799 800
  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
801
  tree var;
802

irar's avatar
 
irar committed
803
  if (!is_gimple_assign (last_stmt))
804 805
    return NULL;

irar's avatar
 
irar committed
806
  type = gimple_expr_type (last_stmt);
807 808 809 810 811 812 813 814 815 816 817

  /* Look for the following pattern
          DX = (TYPE) X;
          sum_1 = DX + sum_0;
     In which DX is at least double the size of X, and sum_1 has been
     recognized as a reduction variable.
   */

  /* Starting from LAST_STMT, follow the defs of its uses in search
     of the above pattern.  */

irar's avatar
 
irar committed
818
  if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
819 820 821 822 823
    return NULL;

  if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
    return NULL;

irar's avatar
 
irar committed
824 825
  oprnd0 = gimple_assign_rhs1 (last_stmt);
  oprnd1 = gimple_assign_rhs2 (last_stmt);
826 827
  if (!types_compatible_p (TREE_TYPE (oprnd0), type)
      || !types_compatible_p (TREE_TYPE (oprnd1), type))
828 829
    return NULL;

irar's avatar
 
irar committed
830
  /* So far so good.  Since last_stmt was detected as a (summation) reduction,
831 832 833 834 835
     we know that oprnd1 is the reduction variable (defined by a loop-header
     phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
     Left to check that oprnd0 is defined by a cast from type 'type' to type
     'TYPE'.  */

irar's avatar
 
irar committed
836
  if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
837 838
    return NULL;

839
  oprnd0 = gimple_assign_rhs1 (stmt);
840 841 842 843
  *type_in = half_type;
  *type_out = type;

  /* Pattern detected. Create a stmt to be used to replace the pattern: */
844 845 846 847
  var = vect_recog_temp_ssa_var (type, NULL);
  pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
					       oprnd0, oprnd1);

848 849 850
  if (vect_print_dump_info (REPORT_DETAILS))
    {
      fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
851
      print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
852
    }
853 854 855

  /* We don't allow changing the order of the computation in the inner-loop
     when doing outer-loop vectorization.  */
irar's avatar
 
irar committed
856
  gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
857

858
  return pattern_stmt;
859 860 861
}


irar's avatar
 
irar committed
862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
/* Return TRUE if the operation in STMT can be performed on a smaller type.

   Input:
   STMT - a statement to check.
   DEF - we support operations with two operands, one of which is constant.
         The other operand can be defined by a demotion operation, or by a
         previous statement in a sequence of over-promoted operations.  In the
         later case DEF is used to replace that operand.  (It is defined by a
         pattern statement we created for the previous statement in the
         sequence).

   Input/output:
   NEW_TYPE - Output: a smaller type that we are trying to use.  Input: if not
         NULL, it's the type of DEF.
   STMTS - additional pattern statements.  If a pattern statement (type
         conversion) is created in this function, its original statement is
         added to STMTS.

   Output:
   OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
         operands to use in the new pattern statement for STMT (will be created
         in vect_recog_over_widening_pattern ()).
   NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
         statements for STMT: the first one is a type promotion and the second
         one is the operation itself.  We return the type promotion statement
887
	 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
irar's avatar
 
irar committed
888 889 890 891 892 893 894 895 896 897 898 899
         the second pattern statement.  */

static bool
vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
                                  tree *op0, tree *op1, gimple *new_def_stmt,
                                  VEC (gimple, heap) **stmts)
{
  enum tree_code code;
  tree const_oprnd, oprnd;
  tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
  gimple def_stmt, new_stmt;
  bool first = false;
irar's avatar
 
irar committed
900 901
  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
irar's avatar
 
irar committed
902

903 904
  *op0 = NULL_TREE;
  *op1 = NULL_TREE;
irar's avatar
 
irar committed
905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932
  *new_def_stmt = NULL;

  if (!is_gimple_assign (stmt))
    return false;

  code = gimple_assign_rhs_code (stmt);
  if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
      && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
    return false;

  oprnd = gimple_assign_rhs1 (stmt);
  const_oprnd = gimple_assign_rhs2 (stmt);
  type = gimple_expr_type (stmt);

  if (TREE_CODE (oprnd) != SSA_NAME
      || TREE_CODE (const_oprnd) != INTEGER_CST)
    return false;

  /* If we are in the middle of a sequence, we use DEF from a previous
     statement.  Otherwise, OPRND has to be a result of type promotion.  */
  if (*new_type)
    {
      half_type = *new_type;
      oprnd = def;
    }
  else
    {
      first = true;
irar's avatar
 
irar committed
933
      if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
irar's avatar
 
irar committed
934 935
          || !gimple_bb (def_stmt)
          || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
irar's avatar
 
irar committed
936
          || !vinfo_for_stmt (def_stmt))
irar's avatar
 
irar committed
937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009
        return false;
    }

  /* Can we perform the operation on a smaller type?  */
  switch (code)
    {
      case BIT_IOR_EXPR:
      case BIT_XOR_EXPR:
      case BIT_AND_EXPR:
        if (!int_fits_type_p (const_oprnd, half_type))
          {
            /* HALF_TYPE is not enough.  Try a bigger type if possible.  */
            if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
              return false;

            interm_type = build_nonstandard_integer_type (
                        TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
            if (!int_fits_type_p (const_oprnd, interm_type))
              return false;
          }

        break;

      case LSHIFT_EXPR:
        /* Try intermediate type - HALF_TYPE is not enough for sure.  */
        if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
          return false;

        /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
          (e.g., if the original value was char, the shift amount is at most 8
           if we want to use short).  */
        if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
          return false;

        interm_type = build_nonstandard_integer_type (
                        TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));

        if (!vect_supportable_shift (code, interm_type))
          return false;

        break;

      case RSHIFT_EXPR:
        if (vect_supportable_shift (code, half_type))
          break;

        /* Try intermediate type - HALF_TYPE is not supported.  */
        if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
          return false;

        interm_type = build_nonstandard_integer_type (
                        TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));

        if (!vect_supportable_shift (code, interm_type))
          return false;

        break;

      default:
        gcc_unreachable ();
    }

  /* There are four possible cases:
     1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
        the first statement in the sequence)
        a. The original, HALF_TYPE, is not enough - we replace the promotion
           from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
        b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
           promotion.
     2. OPRND is defined by a pattern statement we created.
        a. Its type is not sufficient for the operation, we create a new stmt:
           a type conversion for OPRND from HALF_TYPE to INTERM_TYPE.  We store
           this statement in NEW_DEF_STMT, and it is later put in
1010
	   STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
irar's avatar
 
irar committed
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
        b. OPRND is good to use in the new statement.  */
  if (first)
    {
      if (interm_type)
        {
          /* Replace the original type conversion HALF_TYPE->TYPE with
             HALF_TYPE->INTERM_TYPE.  */
          if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
            {
              new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
              /* Check if the already created pattern stmt is what we need.  */
              if (!is_gimple_assign (new_stmt)
                  || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
                  || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
                return false;

irar's avatar
 
irar committed
1027
	      VEC_safe_push (gimple, heap, *stmts, def_stmt);
irar's avatar
 
irar committed
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
              oprnd = gimple_assign_lhs (new_stmt);
            }
          else
            {
              /* Create NEW_OPRND = (INTERM_TYPE) OPRND.  */
              oprnd = gimple_assign_rhs1 (def_stmt);
              tmp = create_tmp_reg (interm_type, NULL);
              add_referenced_var (tmp);
              new_oprnd = make_ssa_name (tmp, NULL);
              new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
                                                       oprnd, NULL_TREE);
              STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
              VEC_safe_push (gimple, heap, *stmts, def_stmt);
              oprnd = new_oprnd;
            }
        }
      else
        {
          /* Retrieve the operand before the type promotion.  */
          oprnd = gimple_assign_rhs1 (def_stmt);
        }
    }
  else
    {
      if (interm_type)
        {
          /* Create a type conversion HALF_TYPE->INTERM_TYPE.  */
          tmp = create_tmp_reg (interm_type, NULL);
          add_referenced_var (tmp);
          new_oprnd = make_ssa_name (tmp, NULL);
          new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
                                                   oprnd, NULL_TREE);
          oprnd = new_oprnd;
          *new_def_stmt = new_stmt;
        }

      /* Otherwise, OPRND is already set.  */
    }

  if (interm_type)
    *new_type = interm_type;
  else
    *new_type = half_type;

  *op0 = oprnd;
  *op1 = fold_convert (*new_type, const_oprnd);

  return true;
}


/* Try to find a statement or a sequence of statements that can be performed
   on a smaller type:

     type x_t;
     TYPE x_T, res0_T, res1_T;
   loop:
     S1  x_t = *p;
     S2  x_T = (TYPE) x_t;
     S3  res0_T = op (x_T, C0);
     S4  res1_T = op (res0_T, C1);
     S5  ... = () res1_T;  - type demotion

   where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
   constants.
   Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
   be 'type' or some intermediate type.  For now, we expect S5 to be a type
jakub's avatar
jakub committed
1095
   demotion operation.  We also check that S3 and S4 have only one use.  */
irar's avatar
 
irar committed
1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109

static gimple
vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
                                  tree *type_in, tree *type_out)
{
  gimple stmt = VEC_pop (gimple, *stmts);
  gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
  tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
  imm_use_iterator imm_iter;
  use_operand_p use_p;
  int nuses = 0;
  tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
  bool first;
  struct loop *loop = (gimple_bb (stmt))->loop_father;
irar's avatar
 
irar committed
1110
  tree type = NULL;
irar's avatar
 
irar committed
1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154

  first = true;
  while (1)
    {
      if (!vinfo_for_stmt (stmt)
          || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
        return NULL;

      new_def_stmt = NULL;
      if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
                                             &op0, &op1, &new_def_stmt,
                                             stmts))
        {
          if (first)
            return NULL;
          else
            break;
        }

      /* STMT can be performed on a smaller type.  Check its uses.  */
      lhs = gimple_assign_lhs (stmt);
      nuses = 0;
      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
        {
          if (is_gimple_debug (USE_STMT (use_p)))
            continue;
          use_stmt = USE_STMT (use_p);
          nuses++;
        }

      if (nuses != 1 || !is_gimple_assign (use_stmt)
          || !gimple_bb (use_stmt)
          || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
        return NULL;

      /* Create pattern statement for STMT.  */
      vectype = get_vectype_for_scalar_type (new_type);
      if (!vectype)
        return NULL;

      /* We want to collect all the statements for which we create pattern
         statetments, except for the case when the last statement in the
         sequence doesn't have a corresponding pattern statement.  In such
         case we associate the last pattern statement with the last statement
irar's avatar
 
irar committed
1155
         in the sequence.  Therefore, we only add the original statement to
irar's avatar
 
irar committed
1156 1157 1158 1159 1160
         the list if we know that it is not the last.  */
      if (prev_stmt)
        VEC_safe_push (gimple, heap, *stmts, prev_stmt);

      var = vect_recog_temp_ssa_var (new_type, NULL);
1161 1162 1163
      pattern_stmt
	= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
					op0, op1);
irar's avatar
 
irar committed
1164
      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
jakub's avatar
jakub committed
1165
      new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
irar's avatar
 
irar committed
1166 1167 1168 1169 1170 1171 1172

      if (vect_print_dump_info (REPORT_DETAILS))
        {
          fprintf (vect_dump, "created pattern stmt: ");
          print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
        }

irar's avatar
 
irar committed
1173
      type = gimple_expr_type (stmt);
irar's avatar
 
irar committed
1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188
      prev_stmt = stmt;
      stmt = use_stmt;

      first = false;
    }

  /* We got a sequence.  We expect it to end with a type demotion operation.
     Otherwise, we quit (for now).  There are three possible cases: the
     conversion is to NEW_TYPE (we don't do anything), the conversion is to
     a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
     NEW_TYPE differs (we create a new conversion statement).  */
  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
    {
      use_lhs = gimple_assign_lhs (use_stmt);
      use_type = TREE_TYPE (use_lhs);
irar's avatar
 
irar committed
1189
      /* Support only type demotion or signedess change.  */
irar's avatar
 
irar committed
1190
      if (!INTEGRAL_TYPE_P (use_type)
irar's avatar
 
irar committed
1191
	  || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
irar's avatar
 
irar committed
1192 1193
        return NULL;

irar's avatar
 
irar committed
1194 1195 1196 1197
      /* Check that NEW_TYPE is not bigger than the conversion result.  */
      if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
	return NULL;

irar's avatar
 
irar committed
1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221
      if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
          || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
        {
          /* Create NEW_TYPE->USE_TYPE conversion.  */
          tmp = create_tmp_reg (use_type, NULL);
          add_referenced_var (tmp);
          new_oprnd = make_ssa_name (tmp, NULL);
          pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
                                                       var, NULL_TREE);
          STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;

          *type_in = get_vectype_for_scalar_type (new_type);
          *type_out = get_vectype_for_scalar_type (use_type);

          /* We created a pattern statement for the last statement in the
             sequence, so we don't need to associate it with the pattern
             statement created for PREV_STMT.  Therefore, we add PREV_STMT
             to the list in order to mark it later in vect_pattern_recog_1.  */
          if (prev_stmt)
            VEC_safe_push (gimple, heap, *stmts, prev_stmt);
        }
      else
        {
          if (prev_stmt)
1222 1223
	    STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
	       = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
irar's avatar
 
irar committed
1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244

          *type_in = vectype;
          *type_out = NULL_TREE;
        }

      VEC_safe_push (gimple, heap, *stmts, use_stmt);
    }
  else
    /* TODO: support general case, create a conversion to the correct type.  */
    return NULL;

  /* Pattern detected.  */
  if (vect_print_dump_info (REPORT_DETAILS))
    {
      fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
      print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
    }

  return pattern_stmt;
}

irar's avatar
 
irar committed
1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255
/* Detect widening shift pattern:

   type a_t;
   TYPE a_T, res_T;

   S1 a_t = ;
   S2 a_T = (TYPE) a_t;
   S3 res_T = a_T << CONST;

  where type 'TYPE' is at least double the size of type 'type'.

irar's avatar
 
irar committed
1256
  Also detect unsigned cases:
irar's avatar
 
irar committed
1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468

  unsigned type a_t;
  unsigned TYPE u_res_T;
  TYPE a_T, res_T;

  S1 a_t = ;
  S2 a_T = (TYPE) a_t;
  S3 res_T = a_T << CONST;
  S4 u_res_T = (unsigned TYPE) res_T;

  And a case when 'TYPE' is 4 times bigger than 'type'.  In that case we
  create an additional pattern stmt for S2 to create a variable of an
  intermediate type, and perform widen-shift on the intermediate type:

  type a_t;
  interm_type a_it;
  TYPE a_T, res_T, res_T';

  S1 a_t = ;
  S2 a_T = (TYPE) a_t;
      '--> a_it = (interm_type) a_t;
  S3 res_T = a_T << CONST;
      '--> res_T' = a_it <<* CONST;

  Input/Output:

  * STMTS: Contains a stmt from which the pattern search begins.
    In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
    in STMTS.  When an intermediate type is used and a pattern statement is
    created for S2, we also put S2 here (before S3).

  Output:

  * TYPE_IN: The type of the input arguments to the pattern.

  * TYPE_OUT: The type of the output of this pattern.

  * Return value: A new stmt that will be used to replace the sequence of
    stmts that constitute the pattern.  In this case it will be:
    WIDEN_LSHIFT_EXPR <a_t, CONST>.  */

static gimple
vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
				tree *type_in, tree *type_out)
{
  gimple last_stmt = VEC_pop (gimple, *stmts);
  gimple def_stmt0;
  tree oprnd0, oprnd1;
  tree type, half_type0;
  gimple pattern_stmt, orig_stmt = NULL;
  tree vectype, vectype_out = NULL_TREE;
  tree dummy;
  tree var;
  enum tree_code dummy_code;
  int dummy_int;
  VEC (tree, heap) * dummy_vec;
  gimple use_stmt = NULL;
  bool over_widen = false;

  if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
    return NULL;

  orig_stmt = last_stmt;
  if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
    {
      /* This statement was also detected as over-widening operation (it can't
         be any other pattern, because only over-widening detects shifts).
         LAST_STMT is the final type demotion statement, but its related
         statement is shift.  We analyze the related statement to catch cases:

         orig code:
          type a_t;
          itype res;
          TYPE a_T, res_T;

          S1 a_T = (TYPE) a_t;
          S2 res_T = a_T << CONST;
          S3 res = (itype)res_T;

          (size of type * 2 <= size of itype
           and size of itype * 2 <= size of TYPE)

         code after over-widening pattern detection:

          S1 a_T = (TYPE) a_t;
               --> a_it = (itype) a_t;
          S2 res_T = a_T << CONST;
          S3 res = (itype)res_T;  <--- LAST_STMT
               --> res = a_it << CONST;

         after widen_shift:

          S1 a_T = (TYPE) a_t;
               --> a_it = (itype) a_t; - redundant
          S2 res_T = a_T << CONST;
          S3 res = (itype)res_T;
               --> res = a_t w<< CONST;

      i.e., we replace the three statements with res = a_t w<< CONST.  */
      last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
      over_widen = true;
    }

  if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
    return NULL;

  oprnd0 = gimple_assign_rhs1 (last_stmt);
  oprnd1 = gimple_assign_rhs2 (last_stmt);
  if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
    return NULL;

  /* Check operand 0: it has to be defined by a type promotion.  */
  if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
    return NULL;

  /* Check operand 1: has to be positive.  We check that it fits the type
     in vect_handle_widen_op_by_const ().  */
  if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
    return NULL;

  oprnd0 = gimple_assign_rhs1 (def_stmt0);
  type = gimple_expr_type (last_stmt);

  /* Check if this a widening operation.  */
  if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
       				      &oprnd0, stmts,
	                              type, &half_type0, def_stmt0))
    return NULL;

  /* Handle unsigned case.  Look for
     S4  u_res_T = (unsigned TYPE) res_T;
     Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR.  */
  if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
    {
      tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
      imm_use_iterator imm_iter;
      use_operand_p use_p;
      int nuses = 0;
      tree use_type;

      if (over_widen)
        {
          /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
             We check here that TYPE is the correct type for the operation,
             i.e., it's the type of the original result.  */
          tree orig_type = gimple_expr_type (orig_stmt);
          if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
              || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
            return NULL;
        }
      else
        {
          FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
            {
	      if (is_gimple_debug (USE_STMT (use_p)))
	        continue;
      	      use_stmt = USE_STMT (use_p);
 	      nuses++;
            }

          if (nuses != 1 || !is_gimple_assign (use_stmt)
	      || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
	    return NULL;

          use_lhs = gimple_assign_lhs (use_stmt);
          use_type = TREE_TYPE (use_lhs);

          if (!INTEGRAL_TYPE_P (use_type)
              || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
              || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
            return NULL;

          type = use_type;
        }
    }

  /* Pattern detected.  */
  if (vect_print_dump_info (REPORT_DETAILS))
    fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");

  /* Check target support.  */
  vectype = get_vectype_for_scalar_type (half_type0);
  vectype_out = get_vectype_for_scalar_type (type);

  if (!vectype
      || !vectype_out
      || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
					  vectype_out, vectype,
					  &dummy, &dummy, &dummy_code,
					  &dummy_code, &dummy_int,
					  &dummy_vec))
    return NULL;

  *type_in = vectype;
  *type_out = vectype_out;

  /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
  var = vect_recog_temp_ssa_var (type, NULL);
  pattern_stmt =
    gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);

  if (vect_print_dump_info (REPORT_DETAILS))
    print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);

  if (use_stmt)
    last_stmt = use_stmt;
  else
    last_stmt = orig_stmt;

  VEC_safe_push (gimple, heap, *stmts, last_stmt);
  return pattern_stmt;
}
irar's avatar
 
irar committed
1469

1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498
/* Detect a vector by vector shift pattern that wouldn't be otherwise
   vectorized:

   type a_t;
   TYPE b_T, res_T;

   S1 a_t = ;
   S2 b_T = ;
   S3 res_T = b_T op a_t;

  where type 'TYPE' is a type with different size than 'type',
  and op is <<, >> or rotate.

  Also detect cases:

   type a_t;
   TYPE b_T, c_T, res_T;

   S0 c_T = ;
   S1 a_t = (type) c_T;
   S2 b_T = ;
   S3 res_T = b_T op a_t;

  Input/Output:

  * STMTS: Contains a stmt from which the pattern search begins,
    i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
    with a shift/rotate which has same type on both operands, in the
    second case just b_T op c_T, in the first case with added cast
1499
    from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578

  Output:

  * TYPE_IN: The type of the input arguments to the pattern.

  * TYPE_OUT: The type of the output of this pattern.

  * Return value: A new stmt that will be used to replace the shift/rotate
    S3 stmt.  */

static gimple
vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
					tree *type_in, tree *type_out)
{
  gimple last_stmt = VEC_pop (gimple, *stmts);
  tree oprnd0, oprnd1, lhs, var;
  gimple pattern_stmt, def_stmt;
  enum tree_code rhs_code;
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
  enum vect_def_type dt;
  tree def;

  if (!is_gimple_assign (last_stmt))
    return NULL;

  rhs_code = gimple_assign_rhs_code (last_stmt);
  switch (rhs_code)
    {
    case LSHIFT_EXPR:
    case RSHIFT_EXPR:
    case LROTATE_EXPR:
    case RROTATE_EXPR:
      break;
    default:
      return NULL;
    }

  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
    return NULL;

  lhs = gimple_assign_lhs (last_stmt);
  oprnd0 = gimple_assign_rhs1 (last_stmt);
  oprnd1 = gimple_assign_rhs2 (last_stmt);
  if (TREE_CODE (oprnd0) != SSA_NAME
      || TREE_CODE (oprnd1) != SSA_NAME
      || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
      || TYPE_PRECISION (TREE_TYPE (oprnd1))
	 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
      || TYPE_PRECISION (TREE_TYPE (lhs))
	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
    return NULL;

  if (!vect_is_simple_use (oprnd1, loop_vinfo, NULL, &def_stmt, &def, &dt))
    return NULL;

  if (dt != vect_internal_def)
    return NULL;

  *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
  *type_out = *type_in;
  if (*type_in == NULL_TREE)
    return NULL;

  def = NULL_TREE;
  if (gimple_assign_cast_p (def_stmt))
    {
      tree rhs1 = gimple_assign_rhs1 (def_stmt);
      if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
	  && TYPE_PRECISION (TREE_TYPE (rhs1))
	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
	def = rhs1;
    }

  if (def == NULL_TREE)
    {
      def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
      def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
					       NULL_TREE);
jakub's avatar
jakub committed
1579
      new_pattern_def_seq (stmt_vinfo, def_stmt);
1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596
    }

  /* Pattern detected.  */
  if (vect_print_dump_info (REPORT_DETAILS))
    fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");

  /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
  var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
  pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);

  if (vect_print_dump_info (REPORT_DETAILS))
    print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);

  VEC_safe_push (gimple, heap, *stmts, last_stmt);
  return pattern_stmt;
}

1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706
/* Detect a signed division by power of two constant that wouldn't be
   otherwise vectorized:

   type a_t, b_t;

   S1 a_t = b_t / N;

  where type 'type' is a signed integral type and N is a constant positive
  power of two.

  Similarly handle signed modulo by power of two constant:

   S4 a_t = b_t % N;

  Input/Output:

  * STMTS: Contains a stmt from which the pattern search begins,
    i.e. the division stmt.  S1 is replaced by:
  S3  y_t = b_t < 0 ? N - 1 : 0;
  S2  x_t = b_t + y_t;
  S1' a_t = x_t >> log2 (N);

    S4 is replaced by (where *_T temporaries have unsigned type):
  S9  y_T = b_t < 0 ? -1U : 0U;
  S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
  S7  z_t = (type) z_T;
  S6  w_t = b_t + z_t;
  S5  x_t = w_t & (N - 1);
  S4' a_t = x_t - z_t;

  Output:

  * TYPE_IN: The type of the input arguments to the pattern.

  * TYPE_OUT: The type of the output of this pattern.

  * Return value: A new stmt that will be used to replace the division
    S1 or modulo S4 stmt.  */

static gimple
vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
				 tree *type_in, tree *type_out)
{
  gimple last_stmt = VEC_pop (gimple, *stmts);
  tree oprnd0, oprnd1, vectype, itype, cond;
  gimple pattern_stmt, def_stmt;
  enum tree_code rhs_code;
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
  optab optab;

  if (!is_gimple_assign (last_stmt))
    return NULL;

  rhs_code = gimple_assign_rhs_code (last_stmt);
  switch (rhs_code)
    {
    case TRUNC_DIV_EXPR:
    case TRUNC_MOD_EXPR:
      break;
    default:
      return NULL;
    }

  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
    return NULL;

  oprnd0 = gimple_assign_rhs1 (last_stmt);
  oprnd1 = gimple_assign_rhs2 (last_stmt);
  itype = TREE_TYPE (oprnd0);
  if (TREE_CODE (oprnd0) != SSA_NAME
      || TREE_CODE (oprnd1) != INTEGER_CST
      || TREE_CODE (itype) != INTEGER_TYPE
      || TYPE_UNSIGNED (itype)
      || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
      || !integer_pow2p (oprnd1)
      || tree_int_cst_sgn (oprnd1) != 1)
    return NULL;

  vectype = get_vectype_for_scalar_type (itype);
  if (vectype == NULL_TREE)
    return NULL;

  /* If the target can handle vectorized division or modulo natively,
     don't attempt to optimize this.  */
  optab = optab_for_tree_code (rhs_code, vectype, optab_default);
  if (optab != NULL)
    {
      enum machine_mode vec_mode = TYPE_MODE (vectype);
      int icode = (int) optab_handler (optab, vec_mode);
      if (icode != CODE_FOR_nothing
	  || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
	return NULL;
    }

  /* Pattern detected.  */
  if (vect_print_dump_info (REPORT_DETAILS))
    fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");

  cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
  if (rhs_code == TRUNC_DIV_EXPR)
    {
      tree var = vect_recog_temp_ssa_var (itype, NULL);
      def_stmt
	= gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
					 fold_build2 (MINUS_EXPR, itype,
						      oprnd1,
						      build_int_cst (itype,
								     1)),
					 build_int_cst (itype, 0));
jakub's avatar
jakub committed
1707
      new_pattern_def_seq (stmt_vinfo, def_stmt);
1708 1709 1710 1711
      var = vect_recog_temp_ssa_var (itype, NULL);
      def_stmt
	= gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
					gimple_assign_lhs (def_stmt));
jakub's avatar
jakub committed
1712
      append_pattern_def_seq (stmt_vinfo, def_stmt);
1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731

      pattern_stmt
	= gimple_build_assign_with_ops (RSHIFT_EXPR,
					vect_recog_temp_ssa_var (itype, NULL),
					var,
					build_int_cst (itype,
						       tree_log2 (oprnd1)));
    }
  else
    {
      tree signmask;
      STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
      if (compare_tree_int (oprnd1, 2) == 0)
	{
	  signmask = vect_recog_temp_ssa_var (itype, NULL);
	  def_stmt
	    = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
					     build_int_cst (itype, 1),
					     build_int_cst (itype, 0));
jakub's avatar
jakub committed
1732
	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751
	}
      else
	{
	  tree utype
	    = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
	  tree vecutype = get_vectype_for_scalar_type (utype);
	  tree shift
	    = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
				    - tree_log2 (oprnd1));
	  tree var = vect_recog_temp_ssa_var (utype, NULL);
	  stmt_vec_info def_stmt_vinfo;

	  def_stmt
	    = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
					     build_int_cst (utype, -1),
					     build_int_cst (utype, 0));
	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
jakub's avatar
jakub committed
1752
	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1753 1754 1755 1756 1757 1758 1759 1760
	  var = vect_recog_temp_ssa_var (utype, NULL);
	  def_stmt
	    = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
					    gimple_assign_lhs (def_stmt),
					    shift);
	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
jakub's avatar
jakub committed
1761
	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1762 1763 1764 1765
	  signmask = vect_recog_temp_ssa_var (itype, NULL);
	  def_stmt
	    = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
					    NULL_TREE);
jakub's avatar
jakub committed
1766
	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1767 1768 1769 1770 1771
	}
      def_stmt
	= gimple_build_assign_with_ops (PLUS_EXPR,
					vect_recog_temp_ssa_var (itype, NULL),
					oprnd0, signmask);
jakub's avatar
jakub committed
1772
      append_pattern_def_seq (stmt_vinfo, def_stmt);
1773 1774 1775 1776 1777 1778 1779 1780
      def_stmt
	= gimple_build_assign_with_ops (BIT_AND_EXPR,
					vect_recog_temp_ssa_var (itype, NULL),
					gimple_assign_lhs (def_stmt),
					fold_build2 (MINUS_EXPR, itype,
						     oprnd1,
						     build_int_cst (itype,
								    1)));
jakub's avatar
jakub committed
1781
      append_pattern_def_seq (stmt_vinfo, def_stmt);
1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799

      pattern_stmt
	= gimple_build_assign_with_ops (MINUS_EXPR,
					vect_recog_temp_ssa_var (itype, NULL),
					gimple_assign_lhs (def_stmt),
					signmask);
    }

  if (vect_print_dump_info (REPORT_DETAILS))
    print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);

  VEC_safe_push (gimple, heap, *stmts, last_stmt);

  *type_in = vectype;
  *type_out = vectype;
  return pattern_stmt;
}

jakub's avatar
jakub committed
1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854
/* Function vect_recog_mixed_size_cond_pattern

   Try to find the following pattern:

     type x_t, y_t;
     TYPE a_T, b_T, c_T;
   loop:
     S1  a_T = x_t CMP y_t ? b_T : c_T;

   where type 'TYPE' is an integral type which has different size
   from 'type'.  b_T and c_T are constants and if 'TYPE' is wider
   than 'type', the constants need to fit into an integer type
   with the same width as 'type'.

   Input:

   * LAST_STMT: A stmt from which the pattern search begins.

   Output:

   * TYPE_IN: The type of the input arguments to the pattern.

   * TYPE_OUT: The type of the output of this pattern.

   * Return value: A new stmt that will be used to replace the pattern.
	Additionally a def_stmt is added.

	a_it = x_t CMP y_t ? b_it : c_it;
	a_T = (TYPE) a_it;  */

static gimple
vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
				    tree *type_out)
{
  gimple last_stmt = VEC_index (gimple, *stmts, 0);
  tree cond_expr, then_clause, else_clause;
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
  tree type, vectype, comp_vectype, itype, vecitype;
  enum machine_mode cmpmode;
  gimple pattern_stmt, def_stmt;
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);

  if (!is_gimple_assign (last_stmt)
      || gimple_assign_rhs_code (last_stmt) != COND_EXPR
      || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
    return NULL;

  cond_expr = gimple_assign_rhs1 (last_stmt);
  then_clause = gimple_assign_rhs2 (last_stmt);
  else_clause = gimple_assign_rhs3 (last_stmt);

  if (TREE_CODE (then_clause) != INTEGER_CST
      || TREE_CODE (else_clause) != INTEGER_CST)
    return NULL;

jakub's avatar
jakub committed
1855 1856 1857 1858 1859 1860
  if (!COMPARISON_CLASS_P (cond_expr))
    return NULL;

  comp_vectype
    = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
  if (comp_vectype == NULL_TREE)
jakub's avatar
jakub committed
1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906
    return NULL;

  type = gimple_expr_type (last_stmt);
  cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));

  if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
    return NULL;

  vectype = get_vectype_for_scalar_type (type);
  if (vectype == NULL_TREE)
    return NULL;

  if (expand_vec_cond_expr_p (vectype, comp_vectype))
    return NULL;

  itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
					  TYPE_UNSIGNED (type));
  if (itype == NULL_TREE
      || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
    return NULL;

  vecitype = get_vectype_for_scalar_type (itype);
  if (vecitype == NULL_TREE)
    return NULL;

  if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
    return NULL;

  if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
    {
      if (!int_fits_type_p (then_clause, itype)
	  || !int_fits_type_p (else_clause, itype))
	return NULL;
    }

  def_stmt
    = gimple_build_assign_with_ops3 (COND_EXPR,
				     vect_recog_temp_ssa_var (itype, NULL),
				     unshare_expr (cond_expr),
				     fold_convert (itype, then_clause),
				     fold_convert (itype, else_clause));
  pattern_stmt
    = gimple_build_assign_with_ops (NOP_EXPR,
				    vect_recog_temp_ssa_var (type, NULL),
				    gimple_assign_lhs (def_stmt), NULL_TREE);

jakub's avatar
jakub committed
1907
  new_pattern_def_seq (stmt_vinfo, def_stmt);
jakub's avatar
jakub committed
1908 1909 1910 1911 1912 1913 1914 1915 1916 1917
  def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
  set_vinfo_for_stmt (def_stmt, def_stmt_info);
  STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
  *type_in = vecitype;
  *type_out = vectype;

  return pattern_stmt;
}


jakub's avatar
jakub committed
1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969
/* Helper function of vect_recog_bool_pattern.  Called recursively, return
   true if bool VAR can be optimized that way.  */

static bool
check_bool_pattern (tree var, loop_vec_info loop_vinfo)
{
  gimple def_stmt;
  enum vect_def_type dt;
  tree def, rhs1;
  enum tree_code rhs_code;

  if (!vect_is_simple_use (var, loop_vinfo, NULL, &def_stmt, &def, &dt))
    return false;

  if (dt != vect_internal_def)
    return false;

  if (!is_gimple_assign (def_stmt))
    return false;

  if (!has_single_use (def))
    return false;

  rhs1 = gimple_assign_rhs1 (def_stmt);
  rhs_code = gimple_assign_rhs_code (def_stmt);
  switch (rhs_code)
    {
    case SSA_NAME:
      return check_bool_pattern (rhs1, loop_vinfo);

    CASE_CONVERT:
      if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
	   || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
	  && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
	return false;
      return check_bool_pattern (rhs1, loop_vinfo);

    case BIT_NOT_EXPR:
      return check_bool_pattern (rhs1, loop_vinfo);

    case BIT_AND_EXPR:
    case BIT_IOR_EXPR:
    case BIT_XOR_EXPR:
      if (!check_bool_pattern (rhs1, loop_vinfo))
	return false;
      return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);

    default:
      if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
	{
	  tree vecitype, comp_vectype;

jakub's avatar
jakub committed
1970 1971 1972 1973 1974
	  /* If the comparison can throw, then is_gimple_condexpr will be
	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
	  if (stmt_could_throw_p (def_stmt))
	    return false;

jakub's avatar
jakub committed
1975 1976 1977 1978 1979 1980 1981 1982
	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
	  if (comp_vectype == NULL_TREE)
	    return false;

	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
	    {
	      enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
	      tree itype
jakub's avatar
jakub committed
1983
		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
jakub's avatar
jakub committed
1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998
	      vecitype = get_vectype_for_scalar_type (itype);
	      if (vecitype == NULL_TREE)
		return false;
	    }
	  else
	    vecitype = comp_vectype;
	  return expand_vec_cond_expr_p (vecitype, comp_vectype);
	}
      return false;
    }
}


/* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
   stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
1999
   to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT.  */
jakub's avatar
jakub committed
2000 2001 2002 2003 2004 2005 2006

static tree
adjust_bool_pattern_cast (tree type, tree var)
{
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
  gimple cast_stmt, pattern_stmt;

2007
  gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
jakub's avatar
jakub committed
2008
  pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
jakub's avatar
jakub committed
2009
  new_pattern_def_seq (stmt_vinfo, pattern_stmt);
jakub's avatar
jakub committed
2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113
  cast_stmt
    = gimple_build_assign_with_ops (NOP_EXPR,
				    vect_recog_temp_ssa_var (type, NULL),
				    gimple_assign_lhs (pattern_stmt),
				    NULL_TREE);
  STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
  return gimple_assign_lhs (cast_stmt);
}


/* Helper function of vect_recog_bool_pattern.  Do the actual transformations,
   recursively.  VAR is an SSA_NAME that should be transformed from bool
   to a wider integer type, OUT_TYPE is the desired final integer type of
   the whole pattern, TRUEVAL should be NULL unless optimizing
   BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
   in the then_clause, STMTS is where statements with added pattern stmts
   should be pushed to.  */

static tree
adjust_bool_pattern (tree var, tree out_type, tree trueval,
		     VEC (gimple, heap) **stmts)
{
  gimple stmt = SSA_NAME_DEF_STMT (var);
  enum tree_code rhs_code, def_rhs_code;
  tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
  location_t loc;
  gimple pattern_stmt, def_stmt;

  rhs1 = gimple_assign_rhs1 (stmt);
  rhs2 = gimple_assign_rhs2 (stmt);
  rhs_code = gimple_assign_rhs_code (stmt);
  loc = gimple_location (stmt);
  switch (rhs_code)
    {
    case SSA_NAME:
    CASE_CONVERT:
      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
      itype = TREE_TYPE (irhs1);
      pattern_stmt
	= gimple_build_assign_with_ops (SSA_NAME,
					vect_recog_temp_ssa_var (itype, NULL),
					irhs1, NULL_TREE);
      break;

    case BIT_NOT_EXPR:
      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
      itype = TREE_TYPE (irhs1);
      pattern_stmt
	= gimple_build_assign_with_ops (BIT_XOR_EXPR,
					vect_recog_temp_ssa_var (itype, NULL),
					irhs1, build_int_cst (itype, 1));
      break;

    case BIT_AND_EXPR:
      /* Try to optimize x = y & (a < b ? 1 : 0); into
	 x = (a < b ? y : 0);

	 E.g. for:
	   bool a_b, b_b, c_b;
	   TYPE d_T;

	   S1  a_b = x1 CMP1 y1;
	   S2  b_b = x2 CMP2 y2;
	   S3  c_b = a_b & b_b;
	   S4  d_T = (TYPE) c_b;

	 we would normally emit:

	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
	   S3'  c_T = a_T & b_T;
	   S4'  d_T = c_T;

	 but we can save one stmt by using the
	 result of one of the COND_EXPRs in the other COND_EXPR and leave
	 BIT_AND_EXPR stmt out:

	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
	   S4'  f_T = c_T;

	 At least when VEC_COND_EXPR is implemented using masks
	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
	 computes the comparison masks and ands it, in one case with
	 all ones vector, in the other case with a vector register.
	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
	 often more expensive.  */
      def_stmt = SSA_NAME_DEF_STMT (rhs2);
      def_rhs_code = gimple_assign_rhs_code (def_stmt);
      if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
	{
	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
	  irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
	    {
	      gimple tstmt;
	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
	      irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
	      tstmt = VEC_pop (gimple, *stmts);
	      gcc_assert (tstmt == def_stmt);
	      VEC_quick_push (gimple, *stmts, stmt);
	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2114
	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
jakub's avatar
jakub committed
2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138
	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
	      return irhs2;
	    }
	  else
	    irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
	  goto and_ior_xor;
	}
      def_stmt = SSA_NAME_DEF_STMT (rhs1);
      def_rhs_code = gimple_assign_rhs_code (def_stmt);
      if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
	{
	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
	  irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
	    {
	      gimple tstmt;
	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
	      irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
	      tstmt = VEC_pop (gimple, *stmts);
	      gcc_assert (tstmt == def_stmt);
	      VEC_quick_push (gimple, *stmts, stmt);
	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2139
	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
jakub's avatar
jakub committed
2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178
	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
	      return irhs1;
	    }
	  else
	    irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
	  goto and_ior_xor;
	}
      /* FALLTHRU */
    case BIT_IOR_EXPR:
    case BIT_XOR_EXPR:
      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
      irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
    and_ior_xor:
      if (TYPE_PRECISION (TREE_TYPE (irhs1))
	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
	{
	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
	  int out_prec = TYPE_PRECISION (out_type);
	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
	  else
	    {
	      irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
	      irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
	    }
	}
      itype = TREE_TYPE (irhs1);
      pattern_stmt
	= gimple_build_assign_with_ops (rhs_code,
					vect_recog_temp_ssa_var (itype, NULL),
					irhs1, irhs2);
      break;

    default:
      gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
      if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
jakub's avatar
jakub committed
2179
	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
jakub's avatar
jakub committed
2180 2181 2182
	{
	  enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
	  itype
jakub's avatar
jakub committed
2183
	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
jakub's avatar
jakub committed
2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276
	}
      else
	itype = TREE_TYPE (rhs1);
      cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
      if (trueval == NULL_TREE)
	trueval = build_int_cst (itype, 1);
      else
	gcc_checking_assert (useless_type_conversion_p (itype,
							TREE_TYPE (trueval)));
      pattern_stmt
	= gimple_build_assign_with_ops3 (COND_EXPR,
					 vect_recog_temp_ssa_var (itype, NULL),
					 cond_expr, trueval,
					 build_int_cst (itype, 0));
      break;
    }

  VEC_safe_push (gimple, heap, *stmts, stmt);
  gimple_set_location (pattern_stmt, loc);
  STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
  return gimple_assign_lhs (pattern_stmt);
}


/* Function vect_recog_bool_pattern

   Try to find pattern like following:

     bool a_b, b_b, c_b, d_b, e_b;
     TYPE f_T;
   loop:
     S1  a_b = x1 CMP1 y1;
     S2  b_b = x2 CMP2 y2;
     S3  c_b = a_b & b_b;
     S4  d_b = x3 CMP3 y3;
     S5  e_b = c_b | d_b;
     S6  f_T = (TYPE) e_b;

   where type 'TYPE' is an integral type.

   Input:

   * LAST_STMT: A stmt at the end from which the pattern
		search begins, i.e. cast of a bool to
		an integer type.

   Output:

   * TYPE_IN: The type of the input arguments to the pattern.

   * TYPE_OUT: The type of the output of this pattern.

   * Return value: A new stmt that will be used to replace the pattern.

	Assuming size of TYPE is the same as size of all comparisons
	(otherwise some casts would be added where needed), the above
	sequence we create related pattern stmts:
	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
	S5'  e_T = c_T | d_T;
	S6'  f_T = e_T;

	Instead of the above S3' we could emit:
	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
	S3'  c_T = a_T | b_T;
	but the above is more efficient.  */

static gimple
vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
			 tree *type_out)
{
  gimple last_stmt = VEC_pop (gimple, *stmts);
  enum tree_code rhs_code;
  tree var, lhs, rhs, vectype;
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
  gimple pattern_stmt;

  if (!is_gimple_assign (last_stmt))
    return NULL;

  var = gimple_assign_rhs1 (last_stmt);
  lhs = gimple_assign_lhs (last_stmt);

  if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
       || !TYPE_UNSIGNED (TREE_TYPE (var)))
      && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
    return NULL;

  rhs_code = gimple_assign_rhs_code (last_stmt);
  if (CONVERT_EXPR_CODE_P (rhs_code))
    {
jakub's avatar
jakub committed
2277 2278
      if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
jakub's avatar
jakub committed
2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299
	return NULL;
      vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
      if (vectype == NULL_TREE)
	return NULL;

      if (!check_bool_pattern (var, loop_vinfo))
	return NULL;

      rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
      if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
	pattern_stmt
	  = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
      else
	pattern_stmt
	  = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
      *type_out = vectype;
      *type_in = vectype;
      VEC_safe_push (gimple, heap, *stmts, last_stmt);
      return pattern_stmt;
    }
jakub's avatar
jakub committed
2300 2301 2302 2303 2304 2305
  else if (rhs_code == SSA_NAME
	   && STMT_VINFO_DATA_REF (stmt_vinfo))
    {
      stmt_vec_info pattern_stmt_info;
      vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
      gcc_assert (vectype != NULL_TREE);
jakub's avatar
jakub committed
2306 2307
      if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
	return NULL;
jakub's avatar
jakub committed
2308 2309 2310 2311 2312 2313 2314 2315 2316 2317
      if (!check_bool_pattern (var, loop_vinfo))
	return NULL;

      rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
      lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
      if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
	{
	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
	  gimple cast_stmt
	    = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
jakub's avatar
jakub committed
2318
	  new_pattern_def_seq (stmt_vinfo, cast_stmt);
jakub's avatar
jakub committed
2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334
	  rhs = rhs2;
	}
      pattern_stmt
	= gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
      STMT_VINFO_DATA_REF (pattern_stmt_info)
	= STMT_VINFO_DATA_REF (stmt_vinfo);
      STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
	= STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
      STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
      STMT_VINFO_DR_OFFSET (pattern_stmt_info)
	= STMT_VINFO_DR_OFFSET (stmt_vinfo);
      STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
      STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
	= STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
jakub's avatar
jakub committed
2335
      DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
jakub's avatar
jakub committed
2336 2337 2338 2339 2340
      *type_out = vectype;
      *type_in = vectype;
      VEC_safe_push (gimple, heap, *stmts, last_stmt);
      return pattern_stmt;
    }
jakub's avatar
jakub committed
2341 2342 2343 2344 2345
  else
    return NULL;
}


irar's avatar
 
irar committed
2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357
/* Mark statements that are involved in a pattern.  */

static inline void
vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
                         tree pattern_vectype)
{
  stmt_vec_info pattern_stmt_info, def_stmt_info;
  stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
  gimple def_stmt;

  pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
jakub's avatar
jakub committed
2358 2359 2360 2361 2362 2363
  if (pattern_stmt_info == NULL)
    {
      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
    }
  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
irar's avatar
 
irar committed
2364 2365 2366

  STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
  STMT_VINFO_DEF_TYPE (pattern_stmt_info)
jakub's avatar
jakub committed
2367
    = STMT_VINFO_DEF_TYPE (orig_stmt_info);
irar's avatar
 
irar committed
2368 2369 2370
  STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
  STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
  STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2371 2372 2373
  STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
    = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
  if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
irar's avatar
 
irar committed
2374
    {
2375 2376 2377
      gimple_stmt_iterator si;
      for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
	   !gsi_end_p (si); gsi_next (&si))
jakub's avatar
jakub committed
2378
	{
2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391
	  def_stmt = gsi_stmt (si);
	  def_stmt_info = vinfo_for_stmt (def_stmt);
	  if (def_stmt_info == NULL)
	    {
	      def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
	      set_vinfo_for_stmt (def_stmt, def_stmt_info);
	    }
	  gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
	  STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
	  STMT_VINFO_DEF_TYPE (def_stmt_info)
	    = STMT_VINFO_DEF_TYPE (orig_stmt_info);
	  if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
	    STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
jakub's avatar
jakub committed
2392
	}
irar's avatar
 
irar committed
2393 2394 2395
    }
}

hjl's avatar
hjl committed
2396
/* Function vect_pattern_recog_1
2397 2398 2399 2400 2401 2402 2403

   Input:
   PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
        computation pattern.
   STMT: A stmt from which the pattern search should start.

   If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
hjl's avatar
hjl committed
2404 2405
   expression that computes the same functionality and can be used to
   replace the sequence of stmts that are involved in the pattern.
2406 2407

   Output:
hjl's avatar
hjl committed
2408 2409 2410
   This function checks if the expression returned by PATTERN_RECOG_FUNC is
   supported in vector form by the target.  We use 'TYPE_IN' to obtain the
   relevant vector type. If 'TYPE_IN' is already a vector type, then this
2411 2412 2413 2414
   indicates that target support had already been checked by PATTERN_RECOG_FUNC.
   If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
   to the available target pattern.

hjl's avatar
hjl committed
2415
   This function also does some bookkeeping, as explained in the documentation
2416 2417 2418
   for vect_recog_pattern.  */

static void
2419 2420 2421
vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
		      gimple_stmt_iterator si,
		      VEC (gimple, heap) **stmts_to_replace)
2422
{
2423
  gimple stmt = gsi_stmt (si), pattern_stmt;
irar's avatar
 
irar committed
2424 2425
  stmt_vec_info stmt_info;
  loop_vec_info loop_vinfo;
2426 2427 2428
  tree pattern_vectype;
  tree type_in, type_out;
  enum tree_code code;
irar's avatar
 
irar committed
2429 2430
  int i;
  gimple next;
2431

2432 2433 2434
  VEC_truncate (gimple, *stmts_to_replace, 0);
  VEC_quick_push (gimple, *stmts_to_replace, stmt);
  pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2435
  if (!pattern_stmt)
hjl's avatar
hjl committed
2436 2437
    return;

2438
  stmt = VEC_last (gimple, *stmts_to_replace);
irar's avatar
 
irar committed
2439 2440 2441
  stmt_info = vinfo_for_stmt (stmt);
  loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
 
hjl's avatar
hjl committed
2442 2443 2444 2445
  if (VECTOR_MODE_P (TYPE_MODE (type_in)))
    {
      /* No need to check target support (already checked by the pattern
         recognition function).  */
2446
      pattern_vectype = type_out ? type_out : type_in;
2447 2448 2449
    }
  else
    {
ian's avatar
gcc/:  
ian committed
2450
      enum machine_mode vec_mode;
2451 2452 2453 2454
      enum insn_code icode;
      optab optab;

      /* Check target support  */
2455 2456 2457 2458 2459 2460 2461
      type_in = get_vectype_for_scalar_type (type_in);
      if (!type_in)
	return;
      if (type_out)
	type_out = get_vectype_for_scalar_type (type_out);
      else
	type_out = type_in;
2462 2463
      if (!type_out)
	return;
2464
      pattern_vectype = type_out;
2465

2466 2467 2468 2469 2470 2471 2472 2473
      if (is_gimple_assign (pattern_stmt))
	code = gimple_assign_rhs_code (pattern_stmt);
      else
        {
	  gcc_assert (is_gimple_call (pattern_stmt));
	  code = CALL_EXPR;
	}

2474 2475
      optab = optab_for_tree_code (code, type_in, optab_default);
      vec_mode = TYPE_MODE (type_in);
2476
      if (!optab
rsandifo's avatar
gcc/  
rsandifo committed
2477
          || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2478
          || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2479 2480 2481 2482 2483 2484
	return;
    }

  /* Found a vectorizable pattern.  */
  if (vect_print_dump_info (REPORT_DETAILS))
    {
hjl's avatar
hjl committed
2485
      fprintf (vect_dump, "pattern recognized: ");
2486
      print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2487
    }
hjl's avatar
hjl committed
2488

2489
  /* Mark the stmts that are involved in the pattern. */
irar's avatar
 
irar committed
2490
  vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2491

irar's avatar
 
irar committed
2492 2493
  /* Patterns cannot be vectorized using SLP, because they change the order of
     computation.  */
froydnj's avatar
gcc/  
froydnj committed
2494
  FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
irar's avatar
 
irar committed
2495 2496
    if (next == stmt)
      VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); 
irar's avatar
 
irar committed
2497

irar's avatar
 
irar committed
2498 2499 2500
  /* It is possible that additional pattern stmts are created and inserted in
     STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
     relevant statements.  */
2501 2502
  for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
	      && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
irar's avatar
 
irar committed
2503 2504 2505 2506 2507 2508 2509 2510 2511 2512
       i++)
    {
      stmt_info = vinfo_for_stmt (stmt);
      pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
      if (vect_print_dump_info (REPORT_DETAILS))
        {
          fprintf (vect_dump, "additional pattern stmt: ");
          print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
        }

irar's avatar
 
irar committed
2513
      vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
irar's avatar
 
irar committed
2514
    }
2515 2516 2517 2518 2519 2520 2521 2522 2523
}


/* Function vect_pattern_recog

   Input:
   LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
        computation idioms.

irar's avatar
 
irar committed
2524 2525
   Output - for each computation idiom that is detected we create a new stmt
        that provides the same functionality and that can be vectorized.  We
2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539
        also record some information in the struct_stmt_info of the relevant
        stmts, as explained below:

   At the entry to this function we have the following stmts, with the
   following initial value in the STMT_VINFO fields:

         stmt                     in_pattern_p  related_stmt    vec_stmt
         S1: a_i = ....                 -       -               -
         S2: a_2 = ..use(a_i)..         -       -               -
         S3: a_1 = ..use(a_2)..         -       -               -
         S4: a_0 = ..use(a_1)..         -       -               -
         S5: ... = ..use(a_0)..         -       -               -

   Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
irar's avatar
 
irar committed
2540 2541 2542
   represented by a single stmt.  We then:
   - create a new stmt S6 equivalent to the pattern (the stmt is not
     inserted into the code)
2543 2544 2545
   - fill in the STMT_VINFO fields as follows:

                                  in_pattern_p  related_stmt    vec_stmt
hjl's avatar
hjl committed
2546
         S1: a_i = ....                 -       -               -
2547 2548 2549
         S2: a_2 = ..use(a_i)..         -       -               -
         S3: a_1 = ..use(a_2)..         -       -               -
         S4: a_0 = ..use(a_1)..         true    S6              -
irar's avatar
 
irar committed
2550
          '---> S6: a_new = ....        -       S4              -
2551 2552 2553
         S5: ... = ..use(a_0)..         -       -               -

   (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
irar's avatar
 
irar committed
2554
   to each other through the RELATED_STMT field).
2555 2556 2557 2558 2559 2560

   S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
   of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
   remain irrelevant unless used by stmts other than S4.

   If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
irar's avatar
 
irar committed
2561
   (because they are marked as irrelevant).  It will vectorize S6, and record
irar's avatar
 
irar committed
2562 2563
   a pointer to the new vector stmt VS6 from S6 (as usual).
   S4 will be skipped, and S5 will be vectorized as usual:
2564 2565 2566 2567 2568 2569 2570

                                  in_pattern_p  related_stmt    vec_stmt
         S1: a_i = ....                 -       -               -
         S2: a_2 = ..use(a_i)..         -       -               -
         S3: a_1 = ..use(a_2)..         -       -               -
       > VS6: va_new = ....             -       -               -
         S4: a_0 = ..use(a_1)..         true    S6              VS6
irar's avatar
 
irar committed
2571
          '---> S6: a_new = ....        -       S4              VS6
2572 2573 2574
       > VS5: ... = ..vuse(va_new)..    -       -               -
         S5: ... = ..use(a_0)..         -       -               -

irar's avatar
 
irar committed
2575
   DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2576 2577
   elsewhere), and we'll end up with:

hjl's avatar
hjl committed
2578
        VS6: va_new = ....
irar's avatar
 
irar committed
2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593
        VS5: ... = ..vuse(va_new)..

   In case of more than one pattern statements, e.g., widen-mult with
   intermediate type:

     S1  a_t = ;
     S2  a_T = (TYPE) a_t;
           '--> S3: a_it = (interm_type) a_t;
     S4  prod_T = a_T * CONST;
           '--> S5: prod_T' = a_it w* CONST;

   there may be other users of a_T outside the pattern.  In that case S2 will
   be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
   and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
   be recorded in S3.  */
2594 2595 2596 2597 2598 2599 2600

void
vect_pattern_recog (loop_vec_info loop_vinfo)
{
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
  unsigned int nbbs = loop->num_nodes;
2601
  gimple_stmt_iterator si;
2602
  unsigned int i, j;
2603
  vect_recog_func_ptr vect_recog_func;
2604
  VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2605 2606 2607 2608 2609 2610 2611 2612 2613

  if (vect_print_dump_info (REPORT_DETAILS))
    fprintf (vect_dump, "=== vect_pattern_recog ===");

  /* Scan through the loop stmts, applying the pattern recognition
     functions starting at each stmt visited:  */
  for (i = 0; i < nbbs; i++)
    {
      basic_block bb = bbs[i];
2614
      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2615 2616 2617 2618
        {
          /* Scan over all generic vect_recog_xxx_pattern functions.  */
          for (j = 0; j < NUM_PATTERNS; j++)
            {
2619 2620
	      vect_recog_func = vect_vect_recog_func_ptrs[j];
	      vect_pattern_recog_1 (vect_recog_func, si,
2621
				    &stmts_to_replace);
2622 2623 2624
            }
        }
    }
2625 2626

  VEC_free (gimple, heap, stmts_to_replace);
2627
}