diff --git a/rtl/caches/cachepackage.vhd b/rtl/caches/cachepackage.vhd index be4364795..d3139df77 100644 --- a/rtl/caches/cachepackage.vhd +++ b/rtl/caches/cachepackage.vhd @@ -275,6 +275,7 @@ package cachepackage is aq : in std_ulogic; rl : in std_ulogic; spandex_conf : in std_logic_vector(31 downto 0); + acc_flush_done : out std_ulogic; -- backend (cache - NoC) -- tile->NoC1 coherence_req_wrreq : out std_ulogic; diff --git a/rtl/caches/gencaches.vhd b/rtl/caches/gencaches.vhd index db1b0fa48..e58014a59 100644 --- a/rtl/caches/gencaches.vhd +++ b/rtl/caches/gencaches.vhd @@ -120,6 +120,7 @@ package gencaches is l2_bresp_ready : in std_ulogic; l2_stats_ready : in std_ulogic; flush_done : out std_ulogic; + acc_flush_done : out std_ulogic; l2_cpu_req_ready : out std_ulogic; l2_fwd_in_ready : out std_ulogic; l2_rsp_in_ready : out std_ulogic; @@ -127,7 +128,8 @@ package gencaches is l2_rd_rsp_valid : out std_ulogic; l2_rd_rsp_data_line : out line_t; l2_inval_valid : out std_ulogic; - l2_inval_data : out line_addr_t; + l2_inval_data_addr : out line_addr_t; + l2_inval_data_hprot : out hprot_t; l2_bresp_valid : out std_ulogic; l2_bresp_data : out bresp_t; l2_req_out_valid : out std_ulogic; diff --git a/rtl/caches/l2_acc_wrapper.vhd b/rtl/caches/l2_acc_wrapper.vhd index deb9c7bfb..4d65bc30a 100644 --- a/rtl/caches/l2_acc_wrapper.vhd +++ b/rtl/caches/l2_acc_wrapper.vhd @@ -69,6 +69,7 @@ entity l2_acc_wrapper is aq : in std_ulogic; rl : in std_ulogic; spandex_conf : in std_logic_vector(31 downto 0); + acc_flush_done : out std_ulogic; -- backend (cache - NoC) -- tile->NoC1 @@ -174,8 +175,9 @@ architecture rtl of l2_acc_wrapper is signal acc_done_l2_valid : std_logic; signal acc_done_l2_data : std_logic_vector(1 downto 0); - type acc_done_state_t is (idle, valid_acc_done); + type acc_done_state_t is (idle, wait_for_cpu_ready, valid_acc_done); signal acc_done_state, acc_done_next : acc_done_state_t; + signal flush_sync : std_ulogic; ------------------------------------------------------------------------------- -- Flush FSM signals @@ -571,7 +573,7 @@ begin -- architecture rtl of l2_acc_wrapper l2_cpu_req_data_dcs => cpu_req_data_dcs, l2_cpu_req_data_pred_cid => cpu_req_data_pred_cid, l2_flush_ready => flush_ready, - l2_flush_valid => flush_valid, + l2_flush_valid => '0', l2_flush_data => flush_data, -- cache to AHB l2_rd_rsp_ready => rd_rsp_ready, @@ -579,7 +581,8 @@ begin -- architecture rtl of l2_acc_wrapper l2_rd_rsp_data_line => rd_rsp_data_line, l2_inval_ready => inval_ready, l2_inval_valid => inval_valid, - l2_inval_data => open, + l2_inval_data_addr => open, + l2_inval_data_hprot => open, l2_bresp_ready => '1', l2_bresp_valid => open, l2_bresp_data => open, @@ -623,6 +626,7 @@ begin -- architecture rtl of l2_acc_wrapper l2_rsp_in_data_word_mask => rsp_in_data_word_mask, l2_rsp_in_data_invack_cnt => rsp_in_data_invack_cnt, flush_done => flush_done, + acc_flush_done => acc_flush_done, -- debug --asserts => asserts, --bookmark => bookmark, @@ -638,6 +642,7 @@ begin -- architecture rtl of l2_acc_wrapper acc_ready_gen: if USE_SPANDEX = 0 generate acc_done_l2_ready <= '0'; + acc_flush_done <= '0'; end generate acc_ready_gen; ---------------------------------------------------------------------------- @@ -647,12 +652,14 @@ begin -- architecture rtl of l2_acc_wrapper begin if rst = '0' then acc_done_state <= idle; + flush_sync <= '0'; elsif clk'event and clk = '1' then acc_done_state <= acc_done_next; + flush_sync <= flush; end if; end process acc_done_update; - acc_done_state_fsm : process (rl, acc_done_l2_ready, acc_done_state) is + acc_done_state_fsm : process (flush_sync, cpu_req_ready, acc_done_l2_ready, acc_done_state) is begin acc_done_next <= acc_done_state; acc_done_l2_data <= "11"; @@ -660,7 +667,12 @@ begin -- architecture rtl of l2_acc_wrapper case acc_done_state is when idle => - if rl = '1' and USE_SPANDEX /= 0 then + if flush_sync = '1' and USE_SPANDEX /= 0 then + acc_done_next <= wait_for_cpu_ready; + end if; + + when wait_for_cpu_ready => + if cpu_req_ready = '1' then acc_done_l2_valid <= '1'; if acc_done_l2_ready = '0' then acc_done_next <= valid_acc_done; diff --git a/rtl/caches/l2_wrapper.vhd b/rtl/caches/l2_wrapper.vhd index ab8a15c4a..4fcd02947 100644 --- a/rtl/caches/l2_wrapper.vhd +++ b/rtl/caches/l2_wrapper.vhd @@ -174,6 +174,7 @@ architecture rtl of l2_wrapper is --signal bookmark : bookmark_t; --signal custom_dbg : custom_dbg_t; signal flush_done : std_ulogic; + signal acc_flush_done : std_ulogic; -- statistics signal stats_ready : std_ulogic; signal stats_valid : std_ulogic; @@ -713,7 +714,8 @@ begin -- architecture rtl of l2_wrapper l2_rd_rsp_data_line => rd_rsp_data_line, l2_inval_ready => inval_ready, l2_inval_valid => inval_valid, - l2_inval_data => inval_data_addr, + l2_inval_data_addr => inval_data_addr, + l2_inval_data_hprot => inval_data_hprot, l2_bresp_ready => bresp_ready, l2_bresp_valid => bresp_valid, l2_bresp_data => bresp_data, @@ -757,6 +759,7 @@ begin -- architecture rtl of l2_wrapper l2_rsp_in_data_word_mask => rsp_in_data_word_mask, l2_rsp_in_data_invack_cnt => rsp_in_data_invack_cnt, flush_done => flush_done, + acc_flush_done => acc_flush_done, l2_stats_ready => stats_ready, l2_stats_valid => stats_valid, l2_stats_data => stats_data, diff --git a/rtl/caches/spandex-caches b/rtl/caches/spandex-caches index a31611f78..1f91e0b08 160000 --- a/rtl/caches/spandex-caches +++ b/rtl/caches/spandex-caches @@ -1 +1 @@ -Subproject commit a31611f783d48c5e39b1740f4147da484fbb46fa +Subproject commit 1f91e0b08d57c5b31fcd30ce6a43e7d15729e2a1 diff --git a/rtl/sockets/proxy/esp_acc_dma.vhd b/rtl/sockets/proxy/esp_acc_dma.vhd index 6663fb46a..a39eff173 100644 --- a/rtl/sockets/proxy/esp_acc_dma.vhd +++ b/rtl/sockets/proxy/esp_acc_dma.vhd @@ -100,6 +100,7 @@ entity esp_acc_dma is bufdout_valid : in std_ulogic; acc_done : in std_ulogic; flush : out std_ulogic; + acc_flush_done: in std_ulogic; mon_dvfs_in : in monitor_dvfs_type; --Monitor signals mon_dvfs : out monitor_dvfs_type; @@ -230,7 +231,7 @@ architecture rtl of esp_acc_dma is type dma_fsm is (idle, request_header, request_address, request_length, request_data, reply_header, reply_data, config, send_header, rd_handshake, wr_handshake, wait_req_p2p, - running, reset, wait_for_completion, fully_coherent_request); + running, reset, wait_for_completion, wait_flush_done, fully_coherent_request); signal acc_rst_next : std_ulogic; signal dma_state, dma_next : dma_fsm; signal status : std_logic_vector(31 downto 0); @@ -659,7 +660,7 @@ begin -- rtl dma_tran_start, tlb_empty, pending_dma_write, pending_dma_read, coherent_dma_ready, dvfs_transient, size_r, coherence, - p2p_req_rcv_empty, p2p_req_rcv_data_out, p2p_rsp_snd_full) + p2p_req_rcv_empty, p2p_req_rcv_data_out, p2p_rsp_snd_full, acc_flush_done) variable payload_data : noc_flit_type; variable preamble : noc_preamble_type; variable msg : noc_msg_type; @@ -794,13 +795,18 @@ begin -- rtl elsif bankreg(CMD_REG)(CMD_BIT_LAST downto 0) = zero(CMD_BIT_LAST downto 0) then dma_next <= reset; elsif pending_acc_done = '1' then - status <= (others => '0'); - status(STATUS_BIT_DONE) <= '1'; - sample_status <= '1'; - if coherence = ACC_COH_FULL then + if USE_SPANDEX /= 0 and coherence = ACC_COH_FULL then flush <= '1'; + dma_next <= wait_flush_done; + else + status <= (others => '0'); + status(STATUS_BIT_DONE) <= '1'; + sample_status <= '1'; + if coherence = ACC_COH_FULL then + flush <= '1'; + end if; + dma_next <= wait_for_completion; end if; - dma_next <= wait_for_completion; elsif rd_request = '1' then if scatter_gather = 0 then sample_flits <= '1'; @@ -815,6 +821,14 @@ begin -- rtl dma_next <= wr_handshake; end if; + when wait_flush_done => + if acc_flush_done = '1' and USE_SPANDEX /= 0 then + status <= (others => '0'); + status(STATUS_BIT_DONE) <= '1'; + sample_status <= '1'; + dma_next <= wait_for_completion; + end if; + when wait_for_completion => -- The software must reset the accelerator on completion by writing a 0 -- to the command register diff --git a/rtl/sockets/proxy/tile.vhd b/rtl/sockets/proxy/tile.vhd index 584457833..4f8132464 100644 --- a/rtl/sockets/proxy/tile.vhd +++ b/rtl/sockets/proxy/tile.vhd @@ -770,6 +770,7 @@ package tile is bufdout_valid : in std_ulogic; acc_done : in std_ulogic; flush : out std_ulogic; + acc_flush_done : in std_ulogic; mon_dvfs_in : in monitor_dvfs_type; mon_dvfs : out monitor_dvfs_type; llc_coherent_dma_rcv_rdreq : out std_ulogic; diff --git a/tools/socketgen/socketgen.py b/tools/socketgen/socketgen.py index 43288ffd1..834d4a7c8 100755 --- a/tools/socketgen/socketgen.py +++ b/tools/socketgen/socketgen.py @@ -878,6 +878,7 @@ def write_cache_interface(f, cac, is_llc): f.write(" l2_fwd_out_ready : in std_ulogic;\n") f.write(" l2_stats_ready : in std_ulogic;\n") f.write(" flush_done : out std_ulogic;\n") + f.write(" acc_flush_done : out std_ulogic;\n") f.write(" l2_cpu_req_ready : out std_ulogic;\n") f.write(" l2_fwd_in_ready : out std_ulogic;\n") f.write(" l2_rsp_in_ready : out std_ulogic;\n") @@ -885,7 +886,8 @@ def write_cache_interface(f, cac, is_llc): f.write(" l2_rd_rsp_valid : out std_ulogic;\n") f.write(" l2_rd_rsp_data_line : out std_logic_vector(" + str(bits_per_line - 1) + " downto 0);\n") f.write(" l2_inval_valid : out std_ulogic;\n") - f.write(" l2_inval_data : out std_logic_vector(" + str(phys_addr_bits - offset_bits - 1) + " downto 0);\n") + f.write(" l2_inval_data_addr : out std_logic_vector(" + str(phys_addr_bits - offset_bits - 1) + " downto 0);\n") + f.write(" l2_inval_data_hprot : out std_logic_vector(1 downto 0);\n") f.write(" l2_bresp_valid : out std_ulogic;\n") f.write(" l2_bresp_data : out std_logic_vector(1 downto 0);\n") f.write(" l2_req_out_valid : out std_ulogic;\n") @@ -1154,6 +1156,7 @@ def write_cache_port_map(f, cac, is_llc): f.write(" l2_fwd_out_ready => l2_rsp_out_ready,\n") f.write(" l2_stats_ready => l2_stats_ready,\n") f.write(" flush_done => flush_done,\n") + f.write(" acc_flush_done => acc_flush_done,\n") f.write(" l2_cpu_req_ready => l2_cpu_req_ready,\n") f.write(" l2_fwd_in_ready => l2_fwd_in_ready,\n") f.write(" l2_rsp_in_ready => l2_rsp_in_ready,\n") @@ -1161,7 +1164,8 @@ def write_cache_port_map(f, cac, is_llc): f.write(" l2_rd_rsp_valid => l2_rd_rsp_valid,\n") f.write(" l2_rd_rsp_data_line => l2_rd_rsp_data_line,\n") f.write(" l2_inval_valid => l2_inval_valid,\n") - f.write(" l2_inval_data => l2_inval_data,\n") + f.write(" l2_inval_data_addr => l2_inval_data_addr,\n") + f.write(" l2_inval_data_hprot => l2_inval_data_hprot,\n") f.write(" l2_bresp_valid => l2_bresp_valid,\n") f.write(" l2_bresp_data => l2_bresp_data,\n") f.write(" l2_req_out_valid => l2_req_out_valid,\n") diff --git a/tools/socketgen/templates/noc_interface.vhd b/tools/socketgen/templates/noc_interface.vhd index 74fb204cb..a00330e7f 100644 --- a/tools/socketgen/templates/noc_interface.vhd +++ b/tools/socketgen/templates/noc_interface.vhd @@ -216,6 +216,7 @@ end; signal dma_write_chnl_data : std_logic_vector(ARCH_BITS - 1 downto 0); signal acc_done : std_ulogic; signal flush : std_ulogic; + signal acc_flush_done : std_ulogic; -- Register control, interrupt and monitor signals signal pllclk_int : std_ulogic; signal mon_dvfs_feedthru : monitor_dvfs_type; @@ -307,6 +308,7 @@ begin aq => conf_done, rl => acc_done, spandex_conf => bank(SPANDEX_REG), + acc_flush_done => acc_flush_done, coherence_req_wrreq => coherence_req_wrreq, coherence_req_data_in => coherence_req_data_in, coherence_req_full => coherence_req_full, @@ -396,6 +398,7 @@ begin bufdout_valid => dma_write_chnl_valid, acc_done => acc_done, flush => flush, + acc_flush_done => acc_flush_done, mon_dvfs_in => mon_dvfs_in, mon_dvfs => mon_dvfs_feedthru, llc_coherent_dma_rcv_rdreq => coherent_dma_rcv_rdreq,