Skip to content

Commit

Permalink
Spandex integration fixes and performance updates (#163)
Browse files Browse the repository at this point in the history
  • Loading branch information
vsuresh95 authored Aug 10, 2022
1 parent cab2163 commit 42903d0
Show file tree
Hide file tree
Showing 9 changed files with 57 additions and 17 deletions.
1 change: 1 addition & 0 deletions rtl/caches/cachepackage.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ package cachepackage is
aq : in std_ulogic;
rl : in std_ulogic;
spandex_conf : in std_logic_vector(31 downto 0);
acc_flush_done : out std_ulogic;
-- backend (cache - NoC)
-- tile->NoC1
coherence_req_wrreq : out std_ulogic;
Expand Down
4 changes: 3 additions & 1 deletion rtl/caches/gencaches.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -120,14 +120,16 @@ package gencaches is
l2_bresp_ready : in std_ulogic;
l2_stats_ready : in std_ulogic;
flush_done : out std_ulogic;
acc_flush_done : out std_ulogic;
l2_cpu_req_ready : out std_ulogic;
l2_fwd_in_ready : out std_ulogic;
l2_rsp_in_ready : out std_ulogic;
l2_flush_ready : out std_ulogic;
l2_rd_rsp_valid : out std_ulogic;
l2_rd_rsp_data_line : out line_t;
l2_inval_valid : out std_ulogic;
l2_inval_data : out line_addr_t;
l2_inval_data_addr : out line_addr_t;
l2_inval_data_hprot : out hprot_t;
l2_bresp_valid : out std_ulogic;
l2_bresp_data : out bresp_t;
l2_req_out_valid : out std_ulogic;
Expand Down
22 changes: 17 additions & 5 deletions rtl/caches/l2_acc_wrapper.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ entity l2_acc_wrapper is
aq : in std_ulogic;
rl : in std_ulogic;
spandex_conf : in std_logic_vector(31 downto 0);
acc_flush_done : out std_ulogic;

-- backend (cache - NoC)
-- tile->NoC1
Expand Down Expand Up @@ -174,8 +175,9 @@ architecture rtl of l2_acc_wrapper is
signal acc_done_l2_valid : std_logic;
signal acc_done_l2_data : std_logic_vector(1 downto 0);

type acc_done_state_t is (idle, valid_acc_done);
type acc_done_state_t is (idle, wait_for_cpu_ready, valid_acc_done);
signal acc_done_state, acc_done_next : acc_done_state_t;
signal flush_sync : std_ulogic;

-------------------------------------------------------------------------------
-- Flush FSM signals
Expand Down Expand Up @@ -571,15 +573,16 @@ begin -- architecture rtl of l2_acc_wrapper
l2_cpu_req_data_dcs => cpu_req_data_dcs,
l2_cpu_req_data_pred_cid => cpu_req_data_pred_cid,
l2_flush_ready => flush_ready,
l2_flush_valid => flush_valid,
l2_flush_valid => '0',
l2_flush_data => flush_data,
-- cache to AHB
l2_rd_rsp_ready => rd_rsp_ready,
l2_rd_rsp_valid => rd_rsp_valid,
l2_rd_rsp_data_line => rd_rsp_data_line,
l2_inval_ready => inval_ready,
l2_inval_valid => inval_valid,
l2_inval_data => open,
l2_inval_data_addr => open,
l2_inval_data_hprot => open,
l2_bresp_ready => '1',
l2_bresp_valid => open,
l2_bresp_data => open,
Expand Down Expand Up @@ -623,6 +626,7 @@ begin -- architecture rtl of l2_acc_wrapper
l2_rsp_in_data_word_mask => rsp_in_data_word_mask,
l2_rsp_in_data_invack_cnt => rsp_in_data_invack_cnt,
flush_done => flush_done,
acc_flush_done => acc_flush_done,
-- debug
--asserts => asserts,
--bookmark => bookmark,
Expand All @@ -638,6 +642,7 @@ begin -- architecture rtl of l2_acc_wrapper

acc_ready_gen: if USE_SPANDEX = 0 generate
acc_done_l2_ready <= '0';
acc_flush_done <= '0';
end generate acc_ready_gen;

----------------------------------------------------------------------------
Expand All @@ -647,20 +652,27 @@ begin -- architecture rtl of l2_acc_wrapper
begin
if rst = '0' then
acc_done_state <= idle;
flush_sync <= '0';
elsif clk'event and clk = '1' then
acc_done_state <= acc_done_next;
flush_sync <= flush;
end if;
end process acc_done_update;

acc_done_state_fsm : process (rl, acc_done_l2_ready, acc_done_state) is
acc_done_state_fsm : process (flush_sync, cpu_req_ready, acc_done_l2_ready, acc_done_state) is
begin
acc_done_next <= acc_done_state;
acc_done_l2_data <= "11";
acc_done_l2_valid <= '0';

case acc_done_state is
when idle =>
if rl = '1' and USE_SPANDEX /= 0 then
if flush_sync = '1' and USE_SPANDEX /= 0 then
acc_done_next <= wait_for_cpu_ready;
end if;

when wait_for_cpu_ready =>
if cpu_req_ready = '1' then
acc_done_l2_valid <= '1';
if acc_done_l2_ready = '0' then
acc_done_next <= valid_acc_done;
Expand Down
5 changes: 4 additions & 1 deletion rtl/caches/l2_wrapper.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ architecture rtl of l2_wrapper is
--signal bookmark : bookmark_t;
--signal custom_dbg : custom_dbg_t;
signal flush_done : std_ulogic;
signal acc_flush_done : std_ulogic;
-- statistics
signal stats_ready : std_ulogic;
signal stats_valid : std_ulogic;
Expand Down Expand Up @@ -713,7 +714,8 @@ begin -- architecture rtl of l2_wrapper
l2_rd_rsp_data_line => rd_rsp_data_line,
l2_inval_ready => inval_ready,
l2_inval_valid => inval_valid,
l2_inval_data => inval_data_addr,
l2_inval_data_addr => inval_data_addr,
l2_inval_data_hprot => inval_data_hprot,
l2_bresp_ready => bresp_ready,
l2_bresp_valid => bresp_valid,
l2_bresp_data => bresp_data,
Expand Down Expand Up @@ -757,6 +759,7 @@ begin -- architecture rtl of l2_wrapper
l2_rsp_in_data_word_mask => rsp_in_data_word_mask,
l2_rsp_in_data_invack_cnt => rsp_in_data_invack_cnt,
flush_done => flush_done,
acc_flush_done => acc_flush_done,
l2_stats_ready => stats_ready,
l2_stats_valid => stats_valid,
l2_stats_data => stats_data,
Expand Down
28 changes: 21 additions & 7 deletions rtl/sockets/proxy/esp_acc_dma.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ entity esp_acc_dma is
bufdout_valid : in std_ulogic;
acc_done : in std_ulogic;
flush : out std_ulogic;
acc_flush_done: in std_ulogic;
mon_dvfs_in : in monitor_dvfs_type;
--Monitor signals
mon_dvfs : out monitor_dvfs_type;
Expand Down Expand Up @@ -230,7 +231,7 @@ architecture rtl of esp_acc_dma is
type dma_fsm is (idle, request_header, request_address, request_length,
request_data, reply_header, reply_data, config,
send_header, rd_handshake, wr_handshake, wait_req_p2p,
running, reset, wait_for_completion, fully_coherent_request);
running, reset, wait_for_completion, wait_flush_done, fully_coherent_request);
signal acc_rst_next : std_ulogic;
signal dma_state, dma_next : dma_fsm;
signal status : std_logic_vector(31 downto 0);
Expand Down Expand Up @@ -659,7 +660,7 @@ begin -- rtl
dma_tran_start, tlb_empty, pending_dma_write,
pending_dma_read, coherent_dma_ready, dvfs_transient,
size_r, coherence,
p2p_req_rcv_empty, p2p_req_rcv_data_out, p2p_rsp_snd_full)
p2p_req_rcv_empty, p2p_req_rcv_data_out, p2p_rsp_snd_full, acc_flush_done)
variable payload_data : noc_flit_type;
variable preamble : noc_preamble_type;
variable msg : noc_msg_type;
Expand Down Expand Up @@ -794,13 +795,18 @@ begin -- rtl
elsif bankreg(CMD_REG)(CMD_BIT_LAST downto 0) = zero(CMD_BIT_LAST downto 0) then
dma_next <= reset;
elsif pending_acc_done = '1' then
status <= (others => '0');
status(STATUS_BIT_DONE) <= '1';
sample_status <= '1';
if coherence = ACC_COH_FULL then
if USE_SPANDEX /= 0 and coherence = ACC_COH_FULL then
flush <= '1';
dma_next <= wait_flush_done;
else
status <= (others => '0');
status(STATUS_BIT_DONE) <= '1';
sample_status <= '1';
if coherence = ACC_COH_FULL then
flush <= '1';
end if;
dma_next <= wait_for_completion;
end if;
dma_next <= wait_for_completion;
elsif rd_request = '1' then
if scatter_gather = 0 then
sample_flits <= '1';
Expand All @@ -815,6 +821,14 @@ begin -- rtl
dma_next <= wr_handshake;
end if;

when wait_flush_done =>
if acc_flush_done = '1' and USE_SPANDEX /= 0 then
status <= (others => '0');
status(STATUS_BIT_DONE) <= '1';
sample_status <= '1';
dma_next <= wait_for_completion;
end if;

when wait_for_completion =>
-- The software must reset the accelerator on completion by writing a 0
-- to the command register
Expand Down
1 change: 1 addition & 0 deletions rtl/sockets/proxy/tile.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,7 @@ package tile is
bufdout_valid : in std_ulogic;
acc_done : in std_ulogic;
flush : out std_ulogic;
acc_flush_done : in std_ulogic;
mon_dvfs_in : in monitor_dvfs_type;
mon_dvfs : out monitor_dvfs_type;
llc_coherent_dma_rcv_rdreq : out std_ulogic;
Expand Down
8 changes: 6 additions & 2 deletions tools/socketgen/socketgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,14 +878,16 @@ def write_cache_interface(f, cac, is_llc):
f.write(" l2_fwd_out_ready : in std_ulogic;\n")
f.write(" l2_stats_ready : in std_ulogic;\n")
f.write(" flush_done : out std_ulogic;\n")
f.write(" acc_flush_done : out std_ulogic;\n")
f.write(" l2_cpu_req_ready : out std_ulogic;\n")
f.write(" l2_fwd_in_ready : out std_ulogic;\n")
f.write(" l2_rsp_in_ready : out std_ulogic;\n")
f.write(" l2_flush_ready : out std_ulogic;\n")
f.write(" l2_rd_rsp_valid : out std_ulogic;\n")
f.write(" l2_rd_rsp_data_line : out std_logic_vector(" + str(bits_per_line - 1) + " downto 0);\n")
f.write(" l2_inval_valid : out std_ulogic;\n")
f.write(" l2_inval_data : out std_logic_vector(" + str(phys_addr_bits - offset_bits - 1) + " downto 0);\n")
f.write(" l2_inval_data_addr : out std_logic_vector(" + str(phys_addr_bits - offset_bits - 1) + " downto 0);\n")
f.write(" l2_inval_data_hprot : out std_logic_vector(1 downto 0);\n")
f.write(" l2_bresp_valid : out std_ulogic;\n")
f.write(" l2_bresp_data : out std_logic_vector(1 downto 0);\n")
f.write(" l2_req_out_valid : out std_ulogic;\n")
Expand Down Expand Up @@ -1154,14 +1156,16 @@ def write_cache_port_map(f, cac, is_llc):
f.write(" l2_fwd_out_ready => l2_rsp_out_ready,\n")
f.write(" l2_stats_ready => l2_stats_ready,\n")
f.write(" flush_done => flush_done,\n")
f.write(" acc_flush_done => acc_flush_done,\n")
f.write(" l2_cpu_req_ready => l2_cpu_req_ready,\n")
f.write(" l2_fwd_in_ready => l2_fwd_in_ready,\n")
f.write(" l2_rsp_in_ready => l2_rsp_in_ready,\n")
f.write(" l2_flush_ready => l2_flush_ready,\n")
f.write(" l2_rd_rsp_valid => l2_rd_rsp_valid,\n")
f.write(" l2_rd_rsp_data_line => l2_rd_rsp_data_line,\n")
f.write(" l2_inval_valid => l2_inval_valid,\n")
f.write(" l2_inval_data => l2_inval_data,\n")
f.write(" l2_inval_data_addr => l2_inval_data_addr,\n")
f.write(" l2_inval_data_hprot => l2_inval_data_hprot,\n")
f.write(" l2_bresp_valid => l2_bresp_valid,\n")
f.write(" l2_bresp_data => l2_bresp_data,\n")
f.write(" l2_req_out_valid => l2_req_out_valid,\n")
Expand Down
3 changes: 3 additions & 0 deletions tools/socketgen/templates/noc_interface.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ end;
signal dma_write_chnl_data : std_logic_vector(ARCH_BITS - 1 downto 0);
signal acc_done : std_ulogic;
signal flush : std_ulogic;
signal acc_flush_done : std_ulogic;
-- Register control, interrupt and monitor signals
signal pllclk_int : std_ulogic;
signal mon_dvfs_feedthru : monitor_dvfs_type;
Expand Down Expand Up @@ -307,6 +308,7 @@ begin
aq => conf_done,
rl => acc_done,
spandex_conf => bank(SPANDEX_REG),
acc_flush_done => acc_flush_done,
coherence_req_wrreq => coherence_req_wrreq,
coherence_req_data_in => coherence_req_data_in,
coherence_req_full => coherence_req_full,
Expand Down Expand Up @@ -396,6 +398,7 @@ begin
bufdout_valid => dma_write_chnl_valid,
acc_done => acc_done,
flush => flush,
acc_flush_done => acc_flush_done,
mon_dvfs_in => mon_dvfs_in,
mon_dvfs => mon_dvfs_feedthru,
llc_coherent_dma_rcv_rdreq => coherent_dma_rcv_rdreq,
Expand Down

0 comments on commit 42903d0

Please sign in to comment.