Unverified Commit b90a0a21 authored by Michael Neuling's avatar Michael Neuling Committed by GitHub

Merge pull request #208 from paulusmack/faster

Make the core go faster

Several major improvements in here:
- Simple branch predictor
- Reduced latency for mispredicted branches and interrupts by removing fetch2 stage
- Cache improvements
  o Request critical dword first on refill
  o Handle hits while refilling, including on line being refilled
  o Sizes doubled for both D and I
- Loadstore improvements: can now do one load or store every two cycles in most cases
- Optimized 2-cycle multiplier for Xilinx 7-series parts using DSP slices
- Timing improvements, including:
  o Stash buffer in decode1
  o Reduced width of execute1 result mux
  o Improved SPR decode in decode1
  o Some non-critical operation take a cycle longer so we can break some long combinatorial chains
- Core logging: logs 256 bits of info every cycle into a ring buffer, to help with debugging and performance analysis

This increases the LUT usage for the "synth" + A35 target from 9182 to 10297 = 12%.
parents 1fedc7a8 64efd494
......@@ -42,7 +42,7 @@ all = core_tb icache_tb dcache_tb multiply_tb dmi_dtm_tb divider_tb \
all: $(all)
core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
fetch2.vhdl utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
decode1.vhdl helpers.vhdl insn_helpers.vhdl gpr_hazard.vhdl \
cr_hazard.vhdl control.vhdl decode2.vhdl register_file.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
......
......@@ -93,10 +93,11 @@ package common is
virt_mode : std_ulogic;
priv_mode : std_ulogic;
stop_mark: std_ulogic;
sequential: std_ulogic;
nia: std_ulogic_vector(63 downto 0);
end record;
type IcacheToFetch2Type is record
type IcacheToDecode1Type is record
valid: std_ulogic;
stop_mark: std_ulogic;
fetch_failed: std_ulogic;
......@@ -104,16 +105,6 @@ package common is
insn: std_ulogic_vector(31 downto 0);
end record;
type Fetch2ToDecode1Type is record
valid: std_ulogic;
stop_mark : std_ulogic;
fetch_failed: std_ulogic;
nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0);
end record;
constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', stop_mark => '0', fetch_failed => '0',
nia => (others => '0'), insn => (others => '0'));
type Decode1ToDecode2Type is record
valid: std_ulogic;
stop_mark : std_ulogic;
......@@ -122,8 +113,16 @@ package common is
ispr1: gspr_index_t; -- (G)SPR used for branch condition (CTR) or mfspr
ispr2: gspr_index_t; -- (G)SPR used for branch target (CTR, LR, TAR)
decode: decode_rom_t;
br_pred: std_ulogic; -- Branch was predicted to be taken
end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
(valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
ispr1 => (others => '0'), ispr2 => (others => '0'), decode => decode_rom_init, br_pred => '0');
type Decode1ToFetch1Type is record
redirect : std_ulogic;
redirect_nia : std_ulogic_vector(63 downto 0);
end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'), ispr1 => (others => '0'), ispr2 => (others => '0'), decode => decode_rom_init);
type Decode2ToExecute1Type is record
valid: std_ulogic;
......@@ -158,23 +157,24 @@ package common is
sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction?
reserve : std_ulogic; -- set for larx/stcx
br_pred : std_ulogic;
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => NONE, insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
lr => '0', rc => '0', oe => '0', invert_a => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'), read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), others => (others => '0'));
type Execute1ToMultiplyType is record
valid: std_ulogic;
insn_type: insn_type_t;
data1: std_ulogic_vector(64 downto 0);
data2: std_ulogic_vector(64 downto 0);
data1: std_ulogic_vector(63 downto 0);
data2: std_ulogic_vector(63 downto 0);
is_32bit: std_ulogic;
neg_result: std_ulogic;
end record;
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL,
is_32bit => '0',
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0',
is_32bit => '0', neg_result => '0',
others => (others => '0'));
type Execute1ToDividerType is record
......@@ -253,6 +253,7 @@ package common is
others => (others => '0'));
type Loadstore1ToExecute1Type is record
busy : std_ulogic;
exception : std_ulogic;
invalid : std_ulogic;
perm_error : std_ulogic;
......@@ -366,7 +367,7 @@ package common is
type MultiplyToExecute1Type is record
valid: std_ulogic;
write_reg_data: std_ulogic_vector(63 downto 0);
result: std_ulogic_vector(127 downto 0);
overflow : std_ulogic;
end record;
constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0',
......
......@@ -15,7 +15,8 @@ entity control is
complete_in : in std_ulogic;
valid_in : in std_ulogic;
flush_in : in std_ulogic;
stall_in : in std_ulogic;
busy_in : in std_ulogic;
deferred : in std_ulogic;
sgl_pipe_in : in std_ulogic;
stop_mark_in : in std_ulogic;
......@@ -23,6 +24,9 @@ entity control is
gpr_write_in : in gspr_index_t;
gpr_bypassable : in std_ulogic;
update_gpr_write_valid : in std_ulogic;
update_gpr_write_reg : in gspr_index_t;
gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in gspr_index_t;
......@@ -72,7 +76,11 @@ begin
)
port map (
clk => clk,
stall_in => stall_in,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,
gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
......@@ -80,6 +88,9 @@ begin
gpr_read_valid_in => gpr_a_read_valid_in,
gpr_read_in => gpr_a_read_in,
ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,
stall_out => stall_a_out,
use_bypass => gpr_bypass_a
);
......@@ -90,7 +101,11 @@ begin
)
port map (
clk => clk,
stall_in => stall_in,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,
gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
......@@ -98,6 +113,9 @@ begin
gpr_read_valid_in => gpr_b_read_valid_in,
gpr_read_in => gpr_b_read_in,
ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,
stall_out => stall_b_out,
use_bypass => gpr_bypass_b
);
......@@ -110,7 +128,11 @@ begin
)
port map (
clk => clk,
stall_in => stall_in,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,
gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
......@@ -118,6 +140,9 @@ begin
gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in_fmt,
ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,
stall_out => stall_c_out,
use_bypass => gpr_bypass_c
);
......@@ -128,7 +153,11 @@ begin
)
port map (
clk => clk,
stall_in => stall_in,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,
cr_read_in => cr_read_in,
cr_write_in => cr_write_valid,
......@@ -139,7 +168,8 @@ begin
control0: process(clk)
begin
if rising_edge(clk) then
assert r_int.outstanding >= 0 and r_int.outstanding <= (PIPELINE_DEPTH+1) report "Outstanding bad " & integer'image(r_int.outstanding) severity failure;
assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1)
report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure;
r_int <= rin_int;
end if;
end process;
......@@ -152,17 +182,18 @@ begin
v_int := r_int;
-- asynchronous
valid_tmp := valid_in and not flush_in and not stall_in;
stall_tmp := stall_in;
valid_tmp := valid_in and not flush_in;
stall_tmp := '0';
if complete_in = '1' then
if flush_in = '1' then
-- expect to see complete_in next cycle
v_int.outstanding := 1;
elsif complete_in = '1' then
v_int.outstanding := r_int.outstanding - 1;
end if;
if rst = '1' then
v_int.state := IDLE;
v_int.outstanding := 0;
stall_tmp := '0';
v_int := reg_internal_init;
valid_tmp := '0';
end if;
......@@ -227,7 +258,9 @@ begin
end if;
if valid_tmp = '1' then
v_int.outstanding := v_int.outstanding + 1;
if deferred = '0' then
v_int.outstanding := v_int.outstanding + 1;
end if;
gpr_write_valid <= gpr_write_valid_in;
cr_write_valid <= cr_write_in;
else
......@@ -237,7 +270,7 @@ begin
-- update outputs
valid_out <= valid_tmp;
stall_out <= stall_tmp;
stall_out <= stall_tmp or deferred;
-- update registers
rin_int <= v_int;
......
......@@ -11,7 +11,8 @@ entity core is
SIM : boolean := false;
DISABLE_FLATTEN : boolean := false;
EX1_BYPASS : boolean := true;
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0')
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0');
LOG_LENGTH : natural := 512
);
port (
clk : in std_ulogic;
......@@ -41,16 +42,14 @@ entity core is
end core;
architecture behave of core is
-- fetch signals
signal fetch2_to_decode1: Fetch2ToDecode1Type;
-- icache signals
signal fetch1_to_icache : Fetch1ToIcacheType;
signal icache_to_fetch2 : IcacheToFetch2Type;
signal icache_to_decode1 : IcacheToDecode1Type;
signal mmu_to_icache : MmuToIcacheType;
-- decode signals
signal decode1_to_decode2: Decode1ToDecode2Type;
signal decode1_to_fetch1: Decode1ToFetch1Type;
signal decode2_to_execute1: Decode2ToExecute1Type;
-- register file signals
......@@ -83,16 +82,18 @@ architecture behave of core is
-- local signals
signal fetch1_stall_in : std_ulogic;
signal icache_stall_out : std_ulogic;
signal fetch2_stall_in : std_ulogic;
signal icache_stall_in : std_ulogic;
signal decode1_stall_in : std_ulogic;
signal decode2_stall_in : std_ulogic;
signal decode1_busy : std_ulogic;
signal decode2_busy_in : std_ulogic;
signal decode2_stall_out : std_ulogic;
signal ex1_icache_inval: std_ulogic;
signal ex1_stall_out: std_ulogic;
signal ls1_stall_out: std_ulogic;
signal ex1_busy_out: std_ulogic;
signal dcache_stall_out: std_ulogic;
signal flush: std_ulogic;
signal decode1_flush: std_ulogic;
signal fetch1_flush: std_ulogic;
signal complete: std_ulogic;
signal terminate: std_ulogic;
......@@ -128,6 +129,12 @@ architecture behave of core is
-- Debug status
signal dbg_core_is_stopped: std_ulogic;
-- Logging signals
signal log_data : std_ulogic_vector(255 downto 0);
signal log_rd_addr : std_ulogic_vector(31 downto 0);
signal log_wr_addr : std_ulogic_vector(31 downto 0);
signal log_rd_data : std_ulogic_vector(63 downto 0);
function keep_h(disable : boolean) return string is
begin
if disable then
......@@ -139,7 +146,6 @@ architecture behave of core is
attribute keep_hierarchy : string;
attribute keep_hierarchy of fetch1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of icache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of fetch2_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of decode1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of decode2_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN);
......@@ -180,45 +186,40 @@ begin
rst => rst_fetch1,
alt_reset_in => alt_reset_d,
stall_in => fetch1_stall_in,
flush_in => flush,
flush_in => fetch1_flush,
stop_in => dbg_core_stop,
d_in => decode1_to_fetch1,
e_in => execute1_to_fetch1,
i_out => fetch1_to_icache
i_out => fetch1_to_icache,
log_out => log_data(42 downto 0)
);
fetch1_stall_in <= icache_stall_out or decode2_stall_out;
fetch1_stall_in <= icache_stall_out or decode1_busy;
fetch1_flush <= flush or decode1_flush;
icache_0: entity work.icache
generic map(
SIM => SIM,
LINE_SIZE => 64,
NUM_LINES => 32,
NUM_LINES => 64,
NUM_WAYS => 2
)
port map(
clk => clk,
rst => rst_icache,
i_in => fetch1_to_icache,
i_out => icache_to_fetch2,
i_out => icache_to_decode1,
m_in => mmu_to_icache,
flush_in => flush,
flush_in => fetch1_flush,
inval_in => dbg_icache_rst or ex1_icache_inval,
stall_in => icache_stall_in,
stall_out => icache_stall_out,
wishbone_out => wishbone_insn_out,
wishbone_in => wishbone_insn_in
);
fetch2_0: entity work.fetch2
port map (
clk => clk,
rst => rst_fetch2,
stall_in => fetch2_stall_in,
flush_in => flush,
i_in => icache_to_fetch2,
f_out => fetch2_to_decode1
wishbone_in => wishbone_insn_in,
log_out => log_data(96 downto 43)
);
fetch2_stall_in <= decode2_stall_out;
icache_stall_in <= decode1_busy;
decode1_0: entity work.decode1
port map (
......@@ -226,8 +227,12 @@ begin
rst => rst_dec1,
stall_in => decode1_stall_in,
flush_in => flush,
f_in => fetch2_to_decode1,
d_out => decode1_to_decode2
flush_out => decode1_flush,
busy_out => decode1_busy,
f_in => icache_to_decode1,
d_out => decode1_to_decode2,
f_out => decode1_to_fetch1,
log_out => log_data(109 downto 97)
);
decode1_stall_in <= decode2_stall_out;
......@@ -239,7 +244,7 @@ begin
port map (
clk => clk,
rst => rst_dec2,
stall_in => decode2_stall_in,
busy_in => decode2_busy_in,
stall_out => decode2_stall_out,
flush_in => flush,
complete_in => complete,
......@@ -249,9 +254,10 @@ begin
r_in => register_file_to_decode2,
r_out => decode2_to_register_file,
c_in => cr_file_to_decode2,
c_out => decode2_to_cr_file
c_out => decode2_to_cr_file,
log_out => log_data(119 downto 110)
);
decode2_stall_in <= ex1_stall_out or ls1_stall_out;
decode2_busy_in <= ex1_busy_out;
register_file_0: entity work.register_file
generic map (
......@@ -267,7 +273,8 @@ begin
dbg_gpr_addr => dbg_gpr_addr,
dbg_gpr_data => dbg_gpr_data,
sim_dump => terminate,
sim_dump_done => sim_cr_dump
sim_dump_done => sim_cr_dump,
log_out => log_data(255 downto 185)
);
cr_file_0: entity work.cr_file
......@@ -279,7 +286,8 @@ begin
d_in => decode2_to_cr_file,
d_out => cr_file_to_decode2,
w_in => writeback_to_cr_file,
sim_dump => sim_cr_dump
sim_dump => sim_cr_dump,
log_out => log_data(184 downto 172)
);
execute1_0: entity work.execute1
......@@ -290,7 +298,7 @@ begin
clk => clk,
rst => rst_ex1,
flush_out => flush,
stall_out => ex1_stall_out,
busy_out => ex1_busy_out,
e_in => decode2_to_execute1,
l_in => loadstore1_to_execute1,
ext_irq_in => ext_irq,
......@@ -299,7 +307,11 @@ begin
e_out => execute1_to_writeback,
icache_inval => ex1_icache_inval,
dbg_msr_out => msr,
terminate_out => terminate
terminate_out => terminate,
log_out => log_data(134 downto 120),
log_rd_addr => log_rd_addr,
log_rd_data => log_rd_data,
log_wr_addr => log_wr_addr
);
loadstore1_0: entity work.loadstore1
......@@ -314,7 +326,7 @@ begin
m_out => loadstore1_to_mmu,
m_in => mmu_to_loadstore1,
dc_stall => dcache_stall_out,
stall_out => ls1_stall_out
log_out => log_data(149 downto 140)
);
mmu_0: entity work.mmu
......@@ -331,7 +343,7 @@ begin
dcache_0: entity work.dcache
generic map(
LINE_SIZE => 64,
NUM_LINES => 32,
NUM_LINES => 64,
NUM_WAYS => 2
)
port map (
......@@ -343,7 +355,8 @@ begin
m_out => dcache_to_mmu,
stall_out => dcache_stall_out,
wishbone_in => wishbone_data_in,
wishbone_out => wishbone_data_out
wishbone_out => wishbone_data_out,
log_out => log_data(171 downto 152)
);
writeback_0: entity work.writeback
......@@ -356,7 +369,13 @@ begin
complete_out => complete
);
log_data(151 downto 150) <= "00";
log_data(139 downto 135) <= "00000";
debug_0: entity work.core_debug
generic map (
LOG_LENGTH => LOG_LENGTH
)
port map (
clk => clk,
rst => rst_dbg,
......@@ -377,6 +396,10 @@ begin
dbg_gpr_ack => dbg_gpr_ack,
dbg_gpr_addr => dbg_gpr_addr,
dbg_gpr_data => dbg_gpr_data,
log_data => log_data,
log_read_addr => log_rd_addr,
log_read_data => log_rd_data,
log_write_addr => log_wr_addr,
terminated_out => terminated_out
);
......
......@@ -3,9 +3,14 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.utils.all;
use work.common.all;
entity core_debug is
generic (
-- Length of log buffer
LOG_LENGTH : natural := 512
);
port (
clk : in std_logic;
rst : in std_logic;
......@@ -34,6 +39,12 @@ entity core_debug is
dbg_gpr_addr : out gspr_index_t;
dbg_gpr_data : in std_ulogic_vector(63 downto 0);
-- Core logging data
log_data : in std_ulogic_vector(255 downto 0);
log_read_addr : in std_ulogic_vector(31 downto 0);
log_read_data : out std_ulogic_vector(63 downto 0);
log_write_addr : out std_ulogic_vector(31 downto 0);
-- Misc
terminated_out : out std_ulogic
);
......@@ -77,6 +88,12 @@ architecture behave of core_debug is
-- GSPR register data
constant DBG_CORE_GSPR_DATA : std_ulogic_vector(3 downto 0) := "0101";
-- Log buffer address and data registers
constant DBG_CORE_LOG_ADDR : std_ulogic_vector(3 downto 0) := "0110";
constant DBG_CORE_LOG_DATA : std_ulogic_vector(3 downto 0) := "0111";
constant LOG_INDEX_BITS : natural := log2(LOG_LENGTH);
-- Some internal wires
signal stat_reg : std_ulogic_vector(63 downto 0);
......@@ -89,6 +106,12 @@ architecture behave of core_debug is
signal do_gspr_rd : std_ulogic;
signal gspr_index : gspr_index_t;
signal log_dmi_addr : std_ulogic_vector(31 downto 0) := (others => '0');
signal log_dmi_data : std_ulogic_vector(63 downto 0) := (others => '0');
signal do_dmi_log_rd : std_ulogic;
signal dmi_read_log_data : std_ulogic;
signal dmi_read_log_data_1 : std_ulogic;
begin
-- Single cycle register accesses on DMI except for GSPR data
dmi_ack <= dmi_req when dmi_addr /= DBG_CORE_GSPR_DATA
......@@ -108,6 +131,8 @@ begin
nia when DBG_CORE_NIA,
msr when DBG_CORE_MSR,
dbg_gpr_data when DBG_CORE_GSPR_DATA,
log_write_addr & log_dmi_addr when DBG_CORE_LOG_ADDR,
log_dmi_data when DBG_CORE_LOG_DATA,
(others => '0') when others;
-- DMI writes
......@@ -118,6 +143,7 @@ begin
do_step <= '0';
do_reset <= '0';
do_icreset <= '0';
do_dmi_log_rd <= '0';
if (rst) then
stopping <= '0';
......@@ -151,11 +177,26 @@ begin
end if;
elsif dmi_addr = DBG_CORE_GSPR_INDEX then
gspr_index <= dmi_din(gspr_index_t'left downto 0);
elsif dmi_addr = DBG_CORE_LOG_ADDR then
log_dmi_addr <= dmi_din(31 downto 0);
do_dmi_log_rd <= '1';
end if;
else
report("DMI read from " & to_string(dmi_addr));
end if;
elsif dmi_read_log_data = '0' and dmi_read_log_data_1 = '1' then
-- Increment log_dmi_addr after the end of a read from DBG_CORE_LOG_DATA
log_dmi_addr(LOG_INDEX_BITS + 1 downto 0) <=
std_ulogic_vector(unsigned(log_dmi_addr(LOG_INDEX_BITS+1 downto 0)) + 1);
do_dmi_log_rd <= '1';
end if;
dmi_read_log_data_1 <= dmi_read_log_data;
if dmi_req = '1' and dmi_addr = DBG_CORE_LOG_DATA then
dmi_read_log_data <= '1';
else
dmi_read_log_data <= '0';
end if;
-- Set core stop on terminate. We'll be stopping some time *after*
-- the offending instruction, at least until we can do back flushes
......@@ -175,5 +216,87 @@ begin
core_rst <= do_reset;
icache_rst <= do_icreset;
terminated_out <= terminated;
-- Logging RAM
maybe_log: if LOG_LENGTH > 0 generate
subtype log_ptr_t is unsigned(LOG_INDEX_BITS - 1 downto 0);
type log_array_t is array(0 to LOG_LENGTH - 1) of std_ulogic_vector(255 downto 0);
signal log_array : log_array_t;
signal log_rd_ptr : log_ptr_t;
signal log_wr_ptr : log_ptr_t;
signal log_toggle : std_ulogic;
signal log_wr_enable : std_ulogic;
signal log_rd_ptr_latched : log_ptr_t;
signal log_rd : std_ulogic_vector(255 downto 0);
signal log_dmi_reading : std_ulogic;
signal log_dmi_read_done : std_ulogic;
function select_dword(data : std_ulogic_vector(255 downto 0);
addr : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
variable firstbit : integer;
begin
firstbit := to_integer(unsigned(addr(1 downto 0))) * 64;
return data(firstbit + 63 downto firstbit);
end;
attribute ram_style : string;
attribute ram_style of log_array : signal is "block";
attribute ram_decomp : string;
attribute ram_decomp of log_array : signal is "power";
begin
-- Use MSB of read addresses to stop the logging
log_wr_enable <= not (log_read_addr(31) or log_dmi_addr(31));
log_ram: process(clk)
begin
if rising_edge(clk) then
if log_wr_enable = '1' then
log_array(to_integer(log_wr_ptr)) <= log_data;
end if;
log_rd <= log_array(to_integer(log_rd_ptr_latched));
end if;
end process;
log_buffer: process(clk)
variable b : integer;
variable data : std_ulogic_vector(255 downto 0);
begin
if rising_edge(clk) then
if rst = '1' then
log_wr_ptr <= (others => '0');
log_toggle <= '0';
elsif log_wr_enable = '1' then
if log_wr_ptr = to_unsigned(LOG_LENGTH - 1, LOG_INDEX_BITS) then
log_toggle <= not log_toggle;
end if;
log_wr_ptr <= log_wr_ptr + 1;
end if;
if do_dmi_log_rd = '1' then
log_rd_ptr_latched <= unsigned(log_dmi_addr(LOG_INDEX_BITS + 1 downto 2));
else
log_rd_ptr_latched <= unsigned(log_read_addr(LOG_INDEX_BITS + 1 downto 2));
end if;
if log_dmi_read_done = '1' then
log_dmi_data <= select_dword(log_rd, log_dmi_addr);
else
log_read_data <= select_dword(log_rd, log_read_addr);
end if;
log_dmi_read_done <= log_dmi_reading;
log_dmi_reading <= do_dmi_log_rd;
end if;
end process;
log_write_addr(LOG_INDEX_BITS - 1 downto 0) <= std_ulogic_vector(log_wr_ptr);
log_write_addr(LOG_INDEX_BITS) <= '1';
log_write_addr(31 downto LOG_INDEX_BITS + 1) <= (others => '0');
end generate;
no_log: if LOG_LENGTH = 0 generate
begin
log_read_data <= (others => '0');
log_write_addr <= x"00000001";
end generate;
end behave;
......@@ -18,7 +18,9 @@ entity cr_file is
w_in : in WritebackToCrFileType;
-- debug
sim_dump : in std_ulogic
sim_dump : in std_ulogic;
log_out : out std_ulogic_vector(12 downto 0)
);
end entity cr_file;
......@@ -27,6 +29,7 @@ architecture behaviour of cr_file is
signal crs_updated : std_ulogic_vector(31 downto 0);
signal xerc : xer_common_t := xerc_init;
signal xerc_updated : xer_common_t;
signal log_data : std_ulogic_vector(12 downto 0);
begin
cr_create_0: process(all)
variable hi, lo : integer := 0;
......@@ -88,4 +91,14 @@ begin
end process;
end generate;
cr_log: process(clk)
begin
if rising_edge(clk) then
log_data <= w_in.write_cr_enable &
w_in.write_cr_data(31 downto 28) &
w_in.write_cr_mask;
end if;
end process;