Commit dcca4f04 authored by Doug Gilbert's avatar Doug Gilbert Committed by Patrick Williams

64 bit load/store peepholes

parent 9869efd8
......@@ -223,4 +223,5 @@ extern unsigned char rs6000_class_max_nregs[][LIM_REG_CLASSES];
extern unsigned char rs6000_hard_regno_nregs[][FIRST_PSEUDO_REGISTER];
extern bool rs6000_linux_float_exceptions_rounding_supported_p (void);
extern bool mem_contiguous(rtx, rtx);
#endif /* rs6000-protos.h */
......@@ -23402,13 +23402,33 @@ rs6000_emit_prologue (void)
if ((strategy & SAVE_INLINE_GPRS))
{
for (i = 0; i < 32 - info->first_gp_reg_save; i++)
if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
emit_frame_save (spe_save_area_ptr, reg_mode,
info->first_gp_reg_save + i,
(info->spe_gp_save_offset + save_off
+ reg_size * i),
sp_off - save_off);
// ppe42 - use 64 bit stores - No evidence that this gained anything
i = 0;
if((info->first_gp_reg_save & 0x01) == 1) // odd reg num
{
if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save))
emit_frame_save (spe_save_area_ptr,
reg_mode,
info->first_gp_reg_save,
(info->spe_gp_save_offset + save_off),
sp_off - save_off);
i = 1;
}
for(;i < 32 - info->first_gp_reg_save; i += 2)
if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
emit_frame_save (spe_save_area_ptr, DImode,
info->first_gp_reg_save + i,
(info->spe_gp_save_offset + save_off
+ reg_size * i),
sp_off - save_off);
//for (i = 0; i < 32 - info->first_gp_reg_save; i++)
// if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
// emit_frame_save (spe_save_area_ptr, reg_mode,
// info->first_gp_reg_save + i,
// (info->spe_gp_save_offset + save_off
// + reg_size * i),
// sp_off - save_off);
}
else
{
......@@ -23489,12 +23509,32 @@ rs6000_emit_prologue (void)
else if (!WORLD_SAVE_P (info))
{
int i;
for (i = 0; i < 32 - info->first_gp_reg_save; i++)
if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
emit_frame_save (frame_reg_rtx, reg_mode,
info->first_gp_reg_save + i,
info->gp_save_offset + frame_off + reg_size * i,
sp_off - frame_off);
// ppe42 save using 64-bit stores
i = 0;
if((info->first_gp_reg_save & 0x1) == 1) // odd regnum
{
if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save))
emit_frame_save (frame_reg_rtx, reg_mode,
info->first_gp_reg_save,
info->gp_save_offset + frame_off,
sp_off - frame_off);
i = 1;
}
for (; i < 32 - info->first_gp_reg_save; i += 2)
if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
emit_frame_save (frame_reg_rtx, DImode,
info->first_gp_reg_save + i,
info->gp_save_offset + frame_off + reg_size * i,
sp_off - frame_off);
// for (i = 0; i < 32 - info->first_gp_reg_save; i++)
// if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
// emit_frame_save (frame_reg_rtx, reg_mode,
// info->first_gp_reg_save + i,
// info->gp_save_offset + frame_off + reg_size * i,
// sp_off - frame_off);
}
if (crtl->calls_eh_return)
......@@ -24921,12 +24961,46 @@ rs6000_emit_epilogue (int sibcall)
}
else
{
// ppe42 - use 64 bit loads
i = 0;
if((info->first_gp_reg_save & 0x1) == 1) // odd reg
{
if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save))
emit_insn (gen_frame_load
(gen_rtx_REG (reg_mode, info->first_gp_reg_save),
frame_reg_rtx,
info->gp_save_offset + frame_off));
i = 1;
}
reg_mode = DImode;
for(; i < 32 - info->first_gp_reg_save; i += 2)
{
if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
{
emit_insn
(gen_rtx_SET
(VOIDmode,
gen_rtx_REG(reg_mode, info->first_gp_reg_save + i),
gen_frame_mem(reg_mode,
gen_rtx_PLUS
(Pmode,
frame_reg_rtx,
GEN_INT(info->gp_save_offset +
frame_off +
reg_size * i)
))));
}
}
reg_mode = Pmode;
/*
for (i = 0; i < 32 - info->first_gp_reg_save; i++)
if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
emit_insn (gen_frame_load
(gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
frame_reg_rtx,
info->gp_save_offset + frame_off + reg_size * i));
*/
}
if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
......@@ -33233,6 +33307,58 @@ emit_fusion_gpr_load (rtx target, rtx mem)
return "";
}
bool mem_contiguous(rtx mem1, rtx mem2)
{
bool result = false;
int regno1 = -1;
int regno2 = -2;
int offset1 = -1;
int offset2 = -2;
debug_rtx(mem1);
debug_rtx(mem2);
int code = GET_CODE(XEXP(mem1,0));
if(code == PLUS)
{
if(GET_CODE(XEXP(XEXP(mem1,0),0)) == REG)
{
regno1 = REGNO(XEXP(XEXP(mem1,0),0));
if ( GET_CODE(XEXP(XEXP(mem1,0),1)) == CONST_INT)
offset1 = INTVAL(XEXP(XEXP(mem1,0),1));
}
}
else if (code == REG)
{
regno1 = REGNO(XEXP(mem1,0));
offset1 = 0;
}
code = GET_CODE(XEXP(mem2,0));
if(code == PLUS)
{
if(GET_CODE(XEXP(XEXP(mem2,0),0)) == REG)
{
regno2 = REGNO(XEXP(XEXP(mem2,0),0));
if ( GET_CODE(XEXP(XEXP(mem2,0),1)) == CONST_INT)
offset2 = INTVAL(XEXP(XEXP(mem2,0),1));
}
}
else if (code == REG)
{
regno2 = REGNO(XEXP(mem2,0));
offset2 = 0;
}
if((regno1 == regno2) && // same base reg
((offset1 & 0x7) == 0) && // 8 byte aligned
((offset1+4) == offset2)) // contiguous memory
{
result = true;
}
fprintf(stderr,"Return %s\n",(result ? "true":"false"));
return result;
}
struct gcc_target targetm = TARGET_INITIALIZER;
......
......@@ -13504,6 +13504,54 @@
(const_int 0)))]
"")
;; ppe convert 2 si moves to one di move
;; gpr numbers in sequence
;; mem base reg the same
;; first mem offset is 8 byte aligned
;; second mem offset == first mem offset + 4
;; FIXME
(define_peephole
[(set (match_operand:SI 0 "gpc_reg_operand" "r")
(match_operand:SI 1 "offsettable_mem_operand" "m"))
(set (match_operand:SI 2 "gpc_reg_operand" "r")
(match_operand:SI 3 "offsettable_mem_operand" "m"))]
"((REGNO(operands[0]) + 1) == REGNO(operands[2])) &&
mem_contiguous(operands[1],operands[3])"
"lvd %0, %1 #peephole %0 %1 %2 %3"
[(set_attr "type" "load")])
(define_peephole
[(set (match_operand:SI 0 "gpc_reg_operand" "r")
(match_operand:SI 1 "offsettable_mem_operand" "m"))
(set (match_operand:SI 2 "gpc_reg_operand" "r")
(match_operand:SI 3 "offsettable_mem_operand" "m"))]
"((REGNO(operands[0]) - 1) == REGNO(operands[2])) &&
mem_contiguous(operands[3],operands[1])"
"lvd %2, %3 #peephole %0 %1 %2 %3"
[(set_attr "type" "load")])
(define_peephole
[(set (match_operand:SI 0 "offsettable_mem_operand" "m")
(match_operand:SI 1 "gpc_reg_operand" "r"))
(set (match_operand:SI 2 "offsettable_mem_operand" "m")
(match_operand:SI 3 "gpc_reg_operand" "r"))]
"((REGNO(operands[1]) + 1) == REGNO(operands[3])) &&
mem_contiguous(operands[0],operands[2])"
"stvd %1, %0 # peephole %0 %1 %2 %3"
[(set_attr "type" "store")])
(define_peephole
[(set (match_operand:SI 0 "offsettable_mem_operand" "m")
(match_operand:SI 1 "gpc_reg_operand" "r"))
(set (match_operand:SI 2 "offsettable_mem_operand" "m")
(match_operand:SI 3 "gpc_reg_operand" "r"))]
"((REGNO(operands[1]) - 1) == REGNO(operands[3])) &&
mem_contiguous(operands[2],operands[0])"
"stvd %3, %2 # peephole %0 %1 %2 %3"
[(set_attr "type" "store")])
;; There is a 3 cycle delay between consecutive mfcr instructions
;; so it is useful to combine 2 scc instructions to use only one mfcr.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment