From 9a9370acd07b92f9175ea16b6fd4434240ffa8fe Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 24 Dec 2024 13:29:13 +0800
Subject: [PATCH 001/119] feat: added new ExternalMessage::RequestFromHeight
 and ExternalMessage::RequestFromHash message types.

---
 zilliqa/src/message.rs | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index 25d33d897..65f703a49 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -209,7 +209,7 @@ pub struct BlockRequest {
     pub to_view: u64,
 }
 
-#[derive(Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct BlockResponse {
     pub proposals: Vec<Proposal>,
     pub from_view: u64,
@@ -227,6 +227,13 @@ impl fmt::Debug for BlockResponse {
     }
 }
 
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RequestBlock {
+    pub from_height: u64,
+    pub from_hash: Hash,
+    pub batch_size: u64,
+}
+
 /// Used to convey proposal processing internally, to avoid blocking threads for too long.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ProcessProposal {
@@ -259,6 +266,8 @@ pub enum ExternalMessage {
     /// An acknowledgement of the receipt of a message. Note this is only used as a response when the caller doesn't
     /// require any data in the response.
     Acknowledgement,
+    RequestFromHeight(RequestBlock),
+    RequestFromHash(RequestBlock),
 }
 
 impl ExternalMessage {
@@ -274,6 +283,16 @@ impl ExternalMessage {
 impl Display for ExternalMessage {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
         match self {
+            ExternalMessage::RequestFromHeight(r) => {
+                write!(
+                    f,
+                    "RequestFromHeight({}, num={})",
+                    r.from_height, r.batch_size
+                )
+            }
+            ExternalMessage::RequestFromHash(r) => {
+                write!(f, "RequestFromHash({}, num={})", r.from_hash, r.batch_size)
+            }
             ExternalMessage::Proposal(p) => write!(f, "Proposal({})", p.view()),
             ExternalMessage::Vote(v) => write!(f, "Vote({})", v.view),
             ExternalMessage::NewView(n) => write!(f, "NewView({})", n.view),

From ef104fe431a515f8e904e7b79b38468f8a7fc562 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Wed, 25 Dec 2024 16:35:41 +0800
Subject: [PATCH 002/119] feat: initial blockstore.rs skeleton.

---
 zilliqa/src/message.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index 65f703a49..865574a00 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -234,6 +234,11 @@ pub struct RequestBlock {
     pub batch_size: u64,
 }
 
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ResponseBlock {
+    pub proposals: Vec<Proposal>,
+}
+
 /// Used to convey proposal processing internally, to avoid blocking threads for too long.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ProcessProposal {
@@ -268,6 +273,8 @@ pub enum ExternalMessage {
     Acknowledgement,
     RequestFromHeight(RequestBlock),
     RequestFromHash(RequestBlock),
+    ResponseFromHeight(ResponseBlock),
+    ResponseFromHash(ResponseBlock),
 }
 
 impl ExternalMessage {
@@ -283,6 +290,12 @@ impl ExternalMessage {
 impl Display for ExternalMessage {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
         match self {
+            ExternalMessage::ResponseFromHeight(r) => {
+                write!(f, "ResponseFromHeight({})", r.proposals.len())
+            }
+            ExternalMessage::ResponseFromHash(r) => {
+                write!(f, "ResponseFromHash({})", r.proposals.len())
+            }
             ExternalMessage::RequestFromHeight(r) => {
                 write!(
                     f,

From 24225618a01727f846e8217d950a4f0d42b31beb Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 26 Dec 2024 15:10:25 +0800
Subject: [PATCH 003/119] feat: added request/response skeleton.

---
 zilliqa/src/message.rs |   6 +-
 zilliqa/src/node.rs    | 127 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 129 insertions(+), 4 deletions(-)

diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index 865574a00..9a306f3a2 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -229,9 +229,9 @@ impl fmt::Debug for BlockResponse {
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct RequestBlock {
-    pub from_height: u64,
+    pub from_number: u64,
     pub from_hash: Hash,
-    pub batch_size: u64,
+    pub batch_size: usize,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -300,7 +300,7 @@ impl Display for ExternalMessage {
                 write!(
                     f,
                     "RequestFromHeight({}, num={})",
-                    r.from_height, r.batch_size
+                    r.from_number, r.batch_size
                 )
             }
             ExternalMessage::RequestFromHash(r) => {
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 5eb0db947..956346c2f 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -34,7 +34,7 @@ use crate::{
     inspector::{self, ScillaInspector},
     message::{
         Block, BlockHeader, BlockResponse, ExternalMessage, InternalMessage, IntershardCall,
-        ProcessProposal, Proposal,
+        ProcessProposal, Proposal, ResponseBlock,
     },
     node_launcher::ResponseChannel,
     p2p_node::{LocalMessageTuple, OutboundMessageTuple},
@@ -269,6 +269,131 @@ impl Node {
                 self.request_responses
                     .send((response_channel, ExternalMessage::Acknowledgement))?;
             }
+            ExternalMessage::RequestFromHeight(request) => {
+                if from == self.peer_id {
+                    warn!("block_store::RequestFromHeight : ignoring blocks request to self");
+                    return Ok(());
+                }
+
+                // TODO: Check if we should service this request.
+                // Validators shall not respond to this request.
+
+                trace!(
+                    "block_store::RequestFromHeight : received a block request - {}",
+                    self.peer_id
+                );
+
+                // TODO: Replace this entire block with a single SQL query
+                let Some(alpha) = self.db.get_block_by_hash(&request.from_hash)? else {
+                    // We do not have the starting block
+                    self.request_responses.send((
+                        response_channel,
+                        ExternalMessage::ResponseFromHash(ResponseBlock { proposals: vec![] }),
+                    ))?;
+                    return Ok(());
+                };
+                let mut proposals = Vec::new();
+                for num in alpha.number().saturating_add(1)
+                    ..=alpha.number().saturating_add(request.batch_size as u64)
+                {
+                    let Some(block) = self.db.get_canonical_block_by_number(num)? else {
+                        // that's all we have!
+                        break;
+                    };
+                    proposals.push(self.block_to_proposal(block));
+                }
+
+                self.request_responses.send((
+                    response_channel,
+                    ExternalMessage::ResponseFromHash(ResponseBlock { proposals }),
+                ))?;
+            }
+            ExternalMessage::ResponseFromHeight(response) => {
+                // Check that we have enough to complete the process, otherwise ignore
+                if response.proposals.is_empty() {
+                    // Empty response, downgrade peer
+                    warn!("block_store::ResponseFromHeight : empty blocks in flight {from}",);
+                }
+                if response.proposals.len() < self.config.max_blocks_in_flight as usize {
+                    // Partial response, downgrade peer
+                    warn!("block_store::ResponseFromHeight : insufficient blocks in flight {from}",);
+                }
+
+                // TODO: Inject proposals
+                debug!(
+                    "block_store::ResponseFromHeight : injecting proposals {:?}",
+                    response
+                );
+
+                // Acknowledge this block response. This does nothing because the `BlockResponse` request was sent by
+                // us, but we keep it here for symmetry with the other handlers.
+                self.request_responses
+                    .send((response_channel, ExternalMessage::Acknowledgement))?;
+            }
+            ExternalMessage::RequestFromHash(request) => {
+                if from == self.peer_id {
+                    warn!("block_store::RequestFromHash : ignoring blocks request to self");
+                    return Ok(());
+                }
+
+                trace!(
+                    "block_store::RequestFromHash : received a block request - {}",
+                    self.peer_id
+                );
+
+                // TODO: Check if we should service this request
+                // Validators could respond to this request if there is nothing else to do.
+
+                let Some(omega_block) = self.db.get_block_by_hash(&request.from_hash)? else {
+                    // We do not have the starting block
+                    self.request_responses.send((
+                        response_channel,
+                        ExternalMessage::ResponseFromHash(ResponseBlock { proposals: vec![] }),
+                    ))?;
+                    return Ok(());
+                };
+
+                let mut proposals = Vec::new();
+                let mut hash = omega_block.parent_hash();
+                // grab up to batch_size blocks
+                while proposals.len() < request.batch_size {
+                    // grab the parent
+                    let Some(block) = self.db.get_block_by_hash(&hash)? else {
+                        // that's all we have!
+                        break;
+                    };
+                    hash = block.parent_hash();
+                    proposals.push(self.block_to_proposal(block));
+                }
+
+                self.request_responses.send((
+                    response_channel,
+                    ExternalMessage::ResponseFromHash(ResponseBlock { proposals }),
+                ))?;
+            }
+            ExternalMessage::ResponseFromHash(response) => {
+                // Check that we have enough to complete the process, otherwise ignore
+                if response.proposals.is_empty() {
+                    // Empty response, downgrade peer
+                    warn!("block_store::ResponseFromHeight : empty blocks in flight {from}",);
+                }
+                // Check that we have enough to complete the process, otherwise ignore
+                if response.proposals.len() * 2 < self.config.max_blocks_in_flight as usize {
+                    warn!("block_store::ResponseFromHash : insufficient blocks in flight {from}",);
+                    return Ok(());
+                }
+
+                // TODO: Inject proposals
+                debug!(
+                    "block_store::ResponseFromHash : injecting proposals {:?}",
+                    response
+                );
+
+                // Acknowledge this block response. This does nothing because the `BlockResponse` request was sent by
+                // us, but we keep it here for symmetry with the other handlers.
+                self.request_responses
+                    .send((response_channel, ExternalMessage::Acknowledgement))?;
+            }
             ExternalMessage::BlockRequest(request) => {
                 if from == self.peer_id {
                     debug!("block_store::BlockRequest : ignoring blocks request to self");

From f811b776d9c9c8e7689f72895b570907e3983da8 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 26 Dec 2024 15:43:11 +0800
Subject: [PATCH 004/119] feat: hook up initial wiring of blockstore with
 consensus.

---
 zilliqa/src/consensus.rs | 5 +++++
 zilliqa/src/lib.rs       | 1 +
 2 files changed, 6 insertions(+)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index c5a93c761..efa18ed8a 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -22,6 +22,7 @@ use tracing::*;
 use crate::{
     block_store::BlockStore,
     blockhooks,
+    blockstore::BlockStore as BlockStore2,
     cfg::{ConsensusConfig, NodeConfig},
     constants::TIME_TO_ALLOW_PROPOSAL_BROADCAST,
     contracts,
@@ -151,6 +152,7 @@ pub struct Consensus {
     config: NodeConfig,
     message_sender: MessageSender,
     reset_timeout: UnboundedSender<Duration>,
+    blockstore: BlockStore2,
     pub block_store: BlockStore,
     latest_leader_cache: RefCell<Option<CachedLeader>>,
     votes: BTreeMap<Hash, BlockVotes>,
@@ -206,6 +208,8 @@ impl Consensus {
             )?;
         }
 
+        let blockstore = BlockStore2::new(&config, db.clone(), message_sender.clone())?;
+
         // It is important to create the `BlockStore` after the checkpoint has been loaded into the DB. The
         // `BlockStore` pre-loads and caches information about the currently stored blocks.
         let block_store = BlockStore::new(&config, db.clone(), message_sender.clone())?;
@@ -324,6 +328,7 @@ impl Consensus {
         let mut consensus = Consensus {
             secret_key,
             config,
+            blockstore,
             block_store,
             latest_leader_cache: RefCell::new(None),
             message_sender,
diff --git a/zilliqa/src/lib.rs b/zilliqa/src/lib.rs
index b949e6493..642e82df2 100644
--- a/zilliqa/src/lib.rs
+++ b/zilliqa/src/lib.rs
@@ -27,3 +27,4 @@ pub mod test_util;
 pub mod time;
 pub mod transaction;
 pub mod zq1_proto;
+pub mod blockstore;

From 87c12f9afc81dba7378c671c0733fc51a3b2c73f Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 26 Dec 2024 15:49:27 +0800
Subject: [PATCH 005/119] feat: added blockstore.rs.

---
 zilliqa/src/blockstore.rs | 203 ++++++++++++++++++++++++++++++++++++++
 zilliqa/src/consensus.rs  |   2 +-
 2 files changed, 204 insertions(+), 1 deletion(-)
 create mode 100644 zilliqa/src/blockstore.rs

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
new file mode 100644
index 000000000..7b648d1de
--- /dev/null
+++ b/zilliqa/src/blockstore.rs
@@ -0,0 +1,203 @@
+use std::{
+    cmp::Ordering,
+    collections::BinaryHeap,
+    sync::Arc,
+    time::{Duration, Instant},
+};
+
+use anyhow::Result;
+
+use libp2p::PeerId;
+
+use crate::{
+    cfg::NodeConfig,
+    db::Db,
+    message::{Block, ExternalMessage, Proposal, RequestBlock},
+    node::{MessageSender, RequestId},
+};
+
+/// Stores and manages the node's list of blocks. Also responsible for making requests for new blocks.
+///
+/// # Syncing Algorithm
+///
+/// We rely on [crate::consensus::Consensus] informing us of newly received block proposals via:
+/// * [BlockStore::process_block] for blocks that can be part of our chain, because we already have their parent.
+/// * [BlockStore::buffer_proposal] for blocks that can't (yet) be part of our chain.
+///
+/// Both these code paths also call [BlockStore::request_missing_blocks]. This finds the greatest view of any proposal
+/// we've seen (whether its part of our chain or not).
+
+#[derive(Debug)]
+pub struct BlockStore {
+    // database
+    db: Arc<Db>,
+    // message bus
+    message_sender: MessageSender,
+    // internal peers
+    peers: BinaryHeap<PeerInfo>,
+    // in-flight
+    in_flight: Option<PeerInfo>,
+    // in-flight timeout
+    request_timeout: Duration,
+    // how many blocks to request at once
+    max_blocks_in_flight: usize,
+}
+
+impl BlockStore {
+    pub fn new(
+        config: &NodeConfig,
+        db: Arc<Db>,
+        message_sender: MessageSender,
+        peers: Vec<PeerId>,
+    ) -> Result<Self> {
+        let peers = peers
+            .into_iter()
+            .map(|peer_id| PeerInfo {
+                score: 0,
+                peer_id,
+                last_used: Instant::now(),
+            })
+            .collect();
+
+        Ok(Self {
+            db,
+            message_sender,
+            peers,
+            in_flight: None,
+            request_timeout: config.consensus.consensus_timeout,
+            max_blocks_in_flight: config.max_blocks_in_flight.max(31) as usize, // between 30 seconds and 3 days of blocks.
+        })
+    }
+
+    /// Route each proposal as if it were received.
+    pub fn handle_response_from_height(&mut self, proposals: Vec<Proposal>) -> Result<()> {
+        // Just pump the Proposals back to ourselves, and it will be picked up and processed as if it were received.
+        // Only issue is the timestamp skew. We should probably fix that.
+        for p in proposals {
+            tracing::trace!("Received proposal from height: {:?}", p);
+            self.message_sender.send_external_message(
+                self.message_sender.our_peer_id,
+                ExternalMessage::Proposal(p),
+            )?;
+        }
+        Ok(())
+    }
+
+    pub fn handle_from_hash(&mut self, _: Vec<Proposal>) -> Result<()> {
+        // ...
+        Ok(())
+    }
+
+    pub fn process_proposal(&self, block: Block) -> Result<()> {
+        // ...
+        // check if block parent exists
+        let parent_block = self.db.get_block_by_hash(&block.parent_hash())?;
+
+        // no parent block, trigger sync
+        if parent_block.is_none() {}
+        Ok(())
+    }
+
+    pub fn buffer_proposal(&self, block: Block) {
+        // ...
+    }
+
+    /// Request blocks between the current height and the given block.
+    ///
+    /// The approach is to request blocks in batches of `max_blocks_in_flight` blocks.
+    /// If the block gap is large, we request blocks from the last known canonical block forwards.
+    /// If the block gap is small, we request blocks from the latest block backwards.
+    ///
+    pub fn request_missing_blocks(&mut self, omega_block: Block) -> Result<RequestId> {
+        // highest canonical block we have
+        // TODO: Replace this with a single SQL query.
+        let height = self
+            .db
+            .get_highest_canonical_block_number()?
+            .unwrap_or_default();
+        let alpha_block = self.db.get_canonical_block_by_number(height)?.unwrap();
+
+        // Compute the block gap.
+        let block_gap = omega_block
+            .header
+            .number
+            .saturating_sub(alpha_block.header.number);
+
+        // TODO: Double-check computation
+        let message = if block_gap > self.max_blocks_in_flight as u64 / 2 {
+            // we're far from latest block
+            ExternalMessage::RequestFromHeight(RequestBlock {
+                from_number: alpha_block.header.number,
+                from_hash: alpha_block.header.hash,
+                batch_size: self.max_blocks_in_flight,
+            })
+        } else {
+            // we're close to latest block
+            ExternalMessage::RequestFromHash(RequestBlock {
+                from_number: omega_block.header.number,
+                from_hash: omega_block.header.hash,
+                batch_size: self.max_blocks_in_flight,
+            })
+        };
+
+        let peer = self.in_flight.as_ref().unwrap();
+
+        self.message_sender
+            .send_external_message(peer.peer_id, message)
+    }
+
+    /// Add a peer to the list of peers.
+    pub fn add_peer(&mut self, peer: PeerId) {
+        // new peers should be tried last, which gives them time to sync first.
+        // peers do not need to be unique.
+        let new_peer = PeerInfo {
+            score: self.peers.iter().map(|p| p.score).max().unwrap_or(0),
+            peer_id: peer,
+            last_used: Instant::now(),
+        };
+        self.peers.push(new_peer);
+    }
+
+    /// Remove a peer from the list of peers.
+    pub fn remove_peer(&mut self, peer: PeerId) {
+        self.peers.retain(|p| p.peer_id != peer);
+    }
+
+    pub fn get_next_peer(&mut self, prev_peer: Option<PeerInfo>) -> Option<PeerInfo> {
+        // Push the current peer into the heap, risks spamming the same peer.
+        // TODO: implement a better strategy for this.
+        if let Some(peer) = prev_peer {
+            self.peers.push(peer);
+        }
+
+        let Some(mut peer) = self.peers.pop() else {
+            return None;
+        };
+
+        // used to determine stale in-flight requests.
+        peer.last_used = std::time::Instant::now();
+
+        Some(peer)
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct PeerInfo {
+    score: u32,
+    peer_id: PeerId,
+    last_used: Instant,
+}
+
+impl Ord for PeerInfo {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.score
+            .cmp(&other.score)
+            .then_with(|| self.last_used.cmp(&other.last_used))
+    }
+}
+
+impl PartialOrd for PeerInfo {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index efa18ed8a..879486c3e 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -208,7 +208,7 @@ impl Consensus {
             )?;
         }
 
-        let blockstore = BlockStore2::new(&config, db.clone(), message_sender.clone())?;
+        let blockstore = BlockStore2::new(&config, db.clone(), message_sender.clone(), Vec::new())?;
 
         // It is important to create the `BlockStore` after the checkpoint has been loaded into the DB. The
         // `BlockStore` pre-loads and caches information about the currently stored blocks.

From 4b7097a8cef2b55a3953f3f4fe296e2dc89853f7 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 27 Dec 2024 16:02:42 +0800
Subject: [PATCH 006/119] feat: added in-flight check.

---
 zilliqa/src/blockstore.rs | 43 +++++++++++++++++++++++++++++++++------
 zilliqa/src/consensus.rs  |  3 +++
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 7b648d1de..3dabce05a 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -88,13 +88,20 @@ impl BlockStore {
         Ok(())
     }
 
-    pub fn process_proposal(&self, block: Block) -> Result<()> {
+    pub fn process_proposal(&mut self, block: Block) -> Result<()> {
         // ...
         // check if block parent exists
         let parent_block = self.db.get_block_by_hash(&block.parent_hash())?;
 
         // no parent block, trigger sync
-        if parent_block.is_none() {}
+        let peer = self.in_flight.take();
+        self.in_flight = self.get_next_peer(peer);
+
+        if parent_block.is_none() && self.in_flight.is_some() {
+            self.request_missing_blocks(block)?;
+            tracing::debug!("Parent block not found, requesting missing blocks",);
+            return Ok(());
+        }
         Ok(())
     }
 
@@ -108,7 +115,28 @@ impl BlockStore {
     /// If the block gap is large, we request blocks from the last known canonical block forwards.
     /// If the block gap is small, we request blocks from the latest block backwards.
     ///
-    pub fn request_missing_blocks(&mut self, omega_block: Block) -> Result<RequestId> {
+    pub fn request_missing_blocks(&mut self, omega_block: Block) -> Result<()> {
+        // Early exit if there's a request in-flight; and if it has not expired.
+        if let Some(peer) = self.in_flight.as_ref() {
+            if peer.last_used.elapsed() > self.request_timeout {
+                tracing::warn!(
+                    "In-flight request {} timed out, requesting from new peer",
+                    peer.peer_id
+                );
+                let mut peer = self.in_flight.take().unwrap();
+                peer.score += 1; // TODO: Downgrade score if we keep timing out.
+                self.in_flight = self.get_next_peer(Some(peer));
+            } else {
+                return Ok(());
+            }
+        } else {
+            self.in_flight = self.get_next_peer(None);
+            if self.in_flight.is_none() {
+                tracing::error!("No peers available to request missing blocks");
+                return Ok(());
+            }
+        }
+
         // highest canonical block we have
         // TODO: Replace this with a single SQL query.
         let height = self
@@ -123,7 +151,7 @@ impl BlockStore {
             .number
             .saturating_sub(alpha_block.header.number);
 
-        // TODO: Double-check computation
+        // TODO: Double-check hysteresis logic.
         let message = if block_gap > self.max_blocks_in_flight as u64 / 2 {
             // we're far from latest block
             ExternalMessage::RequestFromHeight(RequestBlock {
@@ -142,8 +170,11 @@ impl BlockStore {
 
         let peer = self.in_flight.as_ref().unwrap();
 
+        tracing::debug!(?message, "Requesting missing blocks from {}", peer.peer_id);
+
         self.message_sender
-            .send_external_message(peer.peer_id, message)
+            .send_external_message(peer.peer_id, message)?;
+        Ok(())
     }
 
     /// Add a peer to the list of peers.
@@ -163,7 +194,7 @@ impl BlockStore {
         self.peers.retain(|p| p.peer_id != peer);
     }
 
-    pub fn get_next_peer(&mut self, prev_peer: Option<PeerInfo>) -> Option<PeerInfo> {
+    fn get_next_peer(&mut self, prev_peer: Option<PeerInfo>) -> Option<PeerInfo> {
         // Push the current peer into the heap, risks spamming the same peer.
         // TODO: implement a better strategy for this.
         if let Some(peer) = prev_peer {
diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index 879486c3e..7e9ebb58b 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -650,6 +650,9 @@ impl Consensus {
             block.hash()
         );
 
+        // FIXME: Cleanup
+        self.blockstore.process_proposal(block.clone())?;
+
         if self.block_store.contains_block(&block.hash())? {
             trace!("ignoring block proposal, block store contains this block already");
             return Ok(None);

From 58f229008e8e82d6e0e3edd59191f269ed787989 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 27 Dec 2024 16:23:47 +0800
Subject: [PATCH 007/119] feat: added debug/warn/trace messages.

---
 zilliqa/src/blockstore.rs |  18 +++--
 zilliqa/src/node.rs       | 140 +++++++++++++++++++++++---------------
 2 files changed, 94 insertions(+), 64 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 3dabce05a..e3e3d5f82 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -13,7 +13,7 @@ use crate::{
     cfg::NodeConfig,
     db::Db,
     message::{Block, ExternalMessage, Proposal, RequestBlock},
-    node::{MessageSender, RequestId},
+    node::MessageSender,
 };
 
 /// Stores and manages the node's list of blocks. Also responsible for making requests for new blocks.
@@ -94,18 +94,18 @@ impl BlockStore {
         let parent_block = self.db.get_block_by_hash(&block.parent_hash())?;
 
         // no parent block, trigger sync
-        let peer = self.in_flight.take();
-        self.in_flight = self.get_next_peer(peer);
-
-        if parent_block.is_none() && self.in_flight.is_some() {
+        if parent_block.is_none() {
+            tracing::warn!(
+                "blockstore::ProcessProposal : Parent block {} not found, requesting missing blocks",
+                block.parent_hash()
+            );
             self.request_missing_blocks(block)?;
-            tracing::debug!("Parent block not found, requesting missing blocks",);
             return Ok(());
         }
         Ok(())
     }
 
-    pub fn buffer_proposal(&self, block: Block) {
+    pub fn buffer_proposal(&self, _block: Block) {
         // ...
     }
 
@@ -201,9 +201,7 @@ impl BlockStore {
             self.peers.push(peer);
         }
 
-        let Some(mut peer) = self.peers.pop() else {
-            return None;
-        };
+        let mut peer = self.peers.pop()?;
 
         // used to determine stale in-flight requests.
         peer.last_used = std::time::Instant::now();
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 956346c2f..9cabf3e8a 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -270,31 +270,40 @@ impl Node {
                     .send((response_channel, ExternalMessage::Acknowledgement))?;
             }
             ExternalMessage::RequestFromHeight(request) => {
+                tracing::debug!(
+                    "blockstore::RequestFromHeight : received a block request from {}",
+                    from
+                );
+
                 if from == self.peer_id {
-                    warn!("block_store::RequestFromHeight : ignoring blocks request to self");
+                    warn!("blockstore::RequestFromHeight : ignoring blocks from self");
                     return Ok(());
                 }
 
                 // TODO: Check if we should service this request.
                 // Validators shall not respond to this request.
 
-                trace!(
-                    "block_store::RequestFromHeight : received a block request - {}",
-                    self.peer_id
-                );
-
-                // TODO: Replace this entire block with a single SQL query
                 let Some(alpha) = self.db.get_block_by_hash(&request.from_hash)? else {
                     // We do not have the starting block
+                    tracing::warn!(
+                        "blockstore::RequestFromHeight : missing starting block {}",
+                        request.from_hash
+                    );
                     self.request_responses.send((
                         response_channel,
                         ExternalMessage::ResponseFromHash(ResponseBlock { proposals: vec![] }),
                     ))?;
                     return Ok(());
                 };
+
+                // TODO: Replace this with a single SQL query
                 let mut proposals = Vec::new();
-                for num in alpha.number().saturating_add(1)
-                    ..=alpha.number().saturating_add(request.batch_size as u64)
+                let batch_size = self
+                    .config
+                    .max_blocks_in_flight
+                    .min(request.batch_size as u64);
+                for num in
+                    alpha.number().saturating_add(1)..=alpha.number().saturating_add(batch_size)
                 {
                     let Some(block) = self.db.get_canonical_block_by_number(num)? else {
                         // that's all we have!
@@ -303,10 +312,12 @@ impl Node {
                     proposals.push(self.block_to_proposal(block));
                 }
 
-                self.request_responses.send((
-                    response_channel,
-                    ExternalMessage::ResponseFromHash(ResponseBlock { proposals }),
-                ))?;
+                let message = ExternalMessage::ResponseFromHash(ResponseBlock { proposals });
+                tracing::trace!(
+                    ?message,
+                    "blockstore::RequestFromHeight : responding to block request from height"
+                );
+                self.request_responses.send((response_channel, message))?;
             }
             ExternalMessage::ResponseFromHeight(response) => {
                 // Check that we have enough to complete the process, otherwise ignore
@@ -331,21 +342,25 @@ impl Node {
                     .send((response_channel, ExternalMessage::Acknowledgement))?;
             }
             ExternalMessage::RequestFromHash(request) => {
+                debug!(
+                    "blockstore::RequestFromHash : received a block request from {}",
+                    from
+                );
+
                 if from == self.peer_id {
-                    warn!("block_store::RequestFromHash : ignoring blocks request to self");
+                    warn!("blockstore::RequestFromHash : ignoring request from self");
                     return Ok(());
                 }
 
-                trace!(
-                    "block_store::RequestFromHash : received a block request - {}",
-                    self.peer_id
-                );
-
                 // TODO: Check if we should service this request
                 // Validators could respond to this request if there is nothing else to do.
 
                 let Some(omega_block) = self.db.get_block_by_hash(&request.from_hash)? else {
                     // We do not have the starting block
+                    tracing::warn!(
+                        "blockstore::RequestFromHash : missing starting block {}",
+                        request.from_hash
+                    );
                     self.request_responses.send((
                         response_channel,
                         ExternalMessage::ResponseFromHash(ResponseBlock { proposals: vec![] }),
@@ -356,7 +371,10 @@ impl Node {
                 let mut proposals = Vec::new();
                 let mut hash = omega_block.parent_hash();
                 // grab up to batch_size blocks
-                while proposals.len() < request.batch_size {
+                let batch_size = request
+                    .batch_size
+                    .min(self.config.max_blocks_in_flight as usize);
+                while proposals.len() < batch_size {
                     // grab the parent
                     let Some(block) = self.db.get_block_by_hash(&hash)? else {
                         // that's all we have!
@@ -366,10 +384,12 @@ impl Node {
                     proposals.push(self.block_to_proposal(block));
                 }
 
-                self.request_responses.send((
-                    response_channel,
-                    ExternalMessage::ResponseFromHash(ResponseBlock { proposals }),
-                ))?;
+                let message = ExternalMessage::ResponseFromHash(ResponseBlock { proposals });
+                tracing::trace!(
+                    ?message,
+                    "blockstore::RequestFromHash : responding to block request from height"
+                );
+                self.request_responses.send((response_channel, message))?;
             }
             ExternalMessage::ResponseFromHash(response) => {
                 // Check that we have enough to complete the process, otherwise ignore
@@ -394,43 +414,55 @@ impl Node {
                 self.request_responses
                     .send((response_channel, ExternalMessage::Acknowledgement))?;
             }
-            ExternalMessage::BlockRequest(request) => {
-                if from == self.peer_id {
-                    debug!("block_store::BlockRequest : ignoring blocks request to self");
-                    return Ok(());
-                }
 
-                trace!(
-                    "block_store::BlockRequest : received a block request - {}",
-                    self.peer_id
-                );
-                // Note that it is very important that we limit this by number of blocks
-                // returned, _not_ by max view range returned. If we don't, then any
-                // view gap larger than block_request_limit will never be filliable
-                // because no node will ever be prepared to return the block after it.
-                let proposals: Vec<Proposal> = (request.from_view..=request.to_view)
-                    .take(self.config.block_request_limit)
-                    .filter_map(|view| {
-                        self.consensus
-                            .get_block_by_view(view)
-                            .transpose()
-                            .map(|block| Ok(self.block_to_proposal(block?)))
-                    })
-                    .collect::<Result<_>>()?;
-
-                let availability = self.consensus.block_store.availability()?;
-                trace!("block_store::BlockRequest - responding to new blocks request {id:?} from {from:?} of {request:?} with props {0:?} availability {availability:?}",
-                       proposals.iter().fold("".to_string(), |state, x| format!("{},{}", state, x.header.view)));
-
-                // Send the response to this block request.
+            // Respond negatively to old BlockRequests.
+            ExternalMessage::BlockRequest(request) => {
                 self.request_responses.send((
                     response_channel,
                     ExternalMessage::BlockResponse(BlockResponse {
-                        proposals,
+                        proposals: vec![],
                         from_view: request.from_view,
-                        availability,
+                        availability: None,
                     }),
                 ))?;
+                return Ok(());
+
+                // if from == self.peer_id {
+                //     debug!("block_store::BlockRequest : ignoring blocks request to self");
+                //     return Ok(());
+                // }
+
+                // trace!(
+                //     "block_store::BlockRequest : received a block request - {}",
+                //     self.peer_id
+                // );
+                // // Note that it is very important that we limit this by number of blocks
+                // // returned, _not_ by max view range returned. If we don't, then any
+                // // view gap larger than block_request_limit will never be filliable
+                // // because no node will ever be prepared to return the block after it.
+                // let proposals: Vec<Proposal> = (request.from_view..=request.to_view)
+                //     .take(self.config.block_request_limit)
+                //     .filter_map(|view| {
+                //         self.consensus
+                //             .get_block_by_view(view)
+                //             .transpose()
+                //             .map(|block| Ok(self.block_to_proposal(block?)))
+                //     })
+                //     .collect::<Result<_>>()?;
+
+                // let availability = self.consensus.block_store.availability()?;
+                // trace!("block_store::BlockRequest - responding to new blocks request {id:?} from {from:?} of {request:?} with props {0:?} availability {availability:?}",
+                //        proposals.iter().fold("".to_string(), |state, x| format!("{},{}", state, x.header.view)));
+
+                // // Send the response to this block request.
+                // self.request_responses.send((
+                //     response_channel,
+                //     ExternalMessage::BlockResponse(BlockResponse {
+                //         proposals,
+                //         from_view: request.from_view,
+                //         availability,
+                //     }),
+                // ))?;
             }
             // We don't usually expect a [BlockResponse] to be received as a request, however this can occur when our
             // [BlockStore] has re-sent a previously unusable block because we didn't (yet) have the block's parent.

From b6959ad1ebe3eacb8004a140da411bc5663c3a27 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 27 Dec 2024 16:41:27 +0800
Subject: [PATCH 008/119] feat: initial requests firing.

---
 zilliqa/src/blockstore.rs | 9 +++++----
 zilliqa/src/consensus.rs  | 2 +-
 zilliqa/src/message.rs    | 4 ++++
 zilliqa/src/node.rs       | 6 ++++++
 zilliqa/src/p2p_node.rs   | 8 ++++++++
 5 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index e3e3d5f82..804a752ed 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -96,7 +96,7 @@ impl BlockStore {
         // no parent block, trigger sync
         if parent_block.is_none() {
             tracing::warn!(
-                "blockstore::ProcessProposal : Parent block {} not found, requesting missing blocks",
+                "blockstore::ProcessProposal : Parent block {} not found",
                 block.parent_hash()
             );
             self.request_missing_blocks(block)?;
@@ -219,9 +219,10 @@ struct PeerInfo {
 
 impl Ord for PeerInfo {
     fn cmp(&self, other: &Self) -> Ordering {
-        self.score
-            .cmp(&other.score)
-            .then_with(|| self.last_used.cmp(&other.last_used))
+        other
+            .score
+            .cmp(&self.score)
+            .then_with(|| other.last_used.cmp(&self.last_used))
     }
 }
 
diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index 7e9ebb58b..e1f5db700 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -152,7 +152,7 @@ pub struct Consensus {
     config: NodeConfig,
     message_sender: MessageSender,
     reset_timeout: UnboundedSender<Duration>,
-    blockstore: BlockStore2,
+    pub blockstore: BlockStore2,
     pub block_store: BlockStore,
     latest_leader_cache: RefCell<Option<CachedLeader>>,
     votes: BTreeMap<Hash, BlockVotes>,
diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index 9a306f3a2..8689c6f80 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -271,6 +271,8 @@ pub enum ExternalMessage {
     /// An acknowledgement of the receipt of a message. Note this is only used as a response when the caller doesn't
     /// require any data in the response.
     Acknowledgement,
+    AddPeer,
+    RemovePeer,
     RequestFromHeight(RequestBlock),
     RequestFromHash(RequestBlock),
     ResponseFromHeight(ResponseBlock),
@@ -290,6 +292,8 @@ impl ExternalMessage {
 impl Display for ExternalMessage {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
         match self {
+            ExternalMessage::AddPeer => write!(f, "AddPeer"),
+            ExternalMessage::RemovePeer => write!(f, "RemovePeer"),
             ExternalMessage::ResponseFromHeight(r) => {
                 write!(f, "ResponseFromHeight({})", r.proposals.len())
             }
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 9cabf3e8a..43af7bff3 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -227,6 +227,12 @@ impl Node {
                         )))?;
                 }
             }
+            ExternalMessage::AddPeer => {
+                self.consensus.blockstore.add_peer(from);
+            }
+            ExternalMessage::RemovePeer => {
+                self.consensus.blockstore.remove_peer(from);
+            }
             // `Proposals` are re-routed to `handle_request()`
             _ => {
                 warn!("unexpected message type");
diff --git a/zilliqa/src/p2p_node.rs b/zilliqa/src/p2p_node.rs
index 3e3d5e127..e353d6cf5 100644
--- a/zilliqa/src/p2p_node.rs
+++ b/zilliqa/src/p2p_node.rs
@@ -264,6 +264,14 @@ impl P2pNode {
                                 .kademlia
                                 .add_address(&peer_id, address.clone());
                         }
+                        SwarmEvent::Behaviour(BehaviourEvent::Gossipsub(gossipsub::Event::Subscribed { peer_id, topic })) => {
+                            let message = ExternalMessage::AddPeer;
+                            self.send_to(&topic, |c| c.broadcasts.send((peer_id, message)))?;
+                        }
+                        SwarmEvent::Behaviour(BehaviourEvent::Gossipsub(gossipsub::Event::Unsubscribed { peer_id, topic })) => {
+                            let message = ExternalMessage::RemovePeer;
+                            self.send_to(&topic, |c| c.broadcasts.send((peer_id, message)))?;
+                        }                        
                         SwarmEvent::Behaviour(BehaviourEvent::Gossipsub(gossipsub::Event::Message{
                             message_id: msg_id,
                             message: gossipsub::Message {

From 8bd7214d8de894028bd6dd4590a2d9775be4efb3 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 27 Dec 2024 16:51:53 +0800
Subject: [PATCH 009/119] feat: convert config value to usize for simplicity.

---
 zilliqa/src/block_store.rs |  4 ++--
 zilliqa/src/blockstore.rs  |  6 +++++-
 zilliqa/src/cfg.rs         |  4 ++--
 zilliqa/src/node.rs        | 21 ++++++++-------------
 4 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/zilliqa/src/block_store.rs b/zilliqa/src/block_store.rs
index 24756eefe..32bf71a71 100644
--- a/zilliqa/src/block_store.rs
+++ b/zilliqa/src/block_store.rs
@@ -567,11 +567,11 @@ impl BlockStore {
             highest_known_view: 0,
             highest_confirmed_view: 0,
             peers: BTreeMap::new(),
-            max_blocks_in_flight: config.max_blocks_in_flight,
+            max_blocks_in_flight: config.max_blocks_in_flight as u64,
             failed_request_sleep_duration: config.failed_request_sleep_duration,
             strategies: vec![BlockStrategy::Latest(constants::RETAINS_LAST_N_BLOCKS)],
             available_blocks,
-            buffered: BlockCache::new(config.max_blocks_in_flight),
+            buffered: BlockCache::new(config.max_blocks_in_flight as u64),
             unserviceable_requests: None,
             message_sender,
             clock: 0,
diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 804a752ed..dc3415aab 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -41,6 +41,8 @@ pub struct BlockStore {
     request_timeout: Duration,
     // how many blocks to request at once
     max_blocks_in_flight: usize,
+    // our peer id
+    peer_id: PeerId,
 }
 
 impl BlockStore {
@@ -58,7 +60,8 @@ impl BlockStore {
                 last_used: Instant::now(),
             })
             .collect();
-
+        let peer_id = message_sender.our_peer_id;
+        
         Ok(Self {
             db,
             message_sender,
@@ -66,6 +69,7 @@ impl BlockStore {
             in_flight: None,
             request_timeout: config.consensus.consensus_timeout,
             max_blocks_in_flight: config.max_blocks_in_flight.max(31) as usize, // between 30 seconds and 3 days of blocks.
+            peer_id,
         })
     }
 
diff --git a/zilliqa/src/cfg.rs b/zilliqa/src/cfg.rs
index 4e6f9f5d8..c08c0bc88 100644
--- a/zilliqa/src/cfg.rs
+++ b/zilliqa/src/cfg.rs
@@ -101,7 +101,7 @@ pub struct NodeConfig {
     pub block_request_limit: usize,
     /// The maximum number of blocks to have outstanding requests for at a time when syncing.
     #[serde(default = "max_blocks_in_flight_default")]
-    pub max_blocks_in_flight: u64,
+    pub max_blocks_in_flight: usize,
     /// The maximum number of blocks to request in a single message when syncing.
     #[serde(default = "block_request_batch_size_default")]
     pub block_request_batch_size: u64,
@@ -204,7 +204,7 @@ pub fn block_request_limit_default() -> usize {
     100
 }
 
-pub fn max_blocks_in_flight_default() -> u64 {
+pub fn max_blocks_in_flight_default() -> usize {
     1000
 }
 
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 43af7bff3..9b0e481f7 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -297,17 +297,14 @@ impl Node {
                     );
                     self.request_responses.send((
                         response_channel,
-                        ExternalMessage::ResponseFromHash(ResponseBlock { proposals: vec![] }),
+                        ExternalMessage::ResponseFromHeight(ResponseBlock { proposals: vec![] }),
                     ))?;
                     return Ok(());
                 };
 
                 // TODO: Replace this with a single SQL query
                 let mut proposals = Vec::new();
-                let batch_size = self
-                    .config
-                    .max_blocks_in_flight
-                    .min(request.batch_size as u64);
+                let batch_size = self.config.max_blocks_in_flight.min(request.batch_size) as u64;
                 for num in
                     alpha.number().saturating_add(1)..=alpha.number().saturating_add(batch_size)
                 {
@@ -318,7 +315,7 @@ impl Node {
                     proposals.push(self.block_to_proposal(block));
                 }
 
-                let message = ExternalMessage::ResponseFromHash(ResponseBlock { proposals });
+                let message = ExternalMessage::ResponseFromHeight(ResponseBlock { proposals });
                 tracing::trace!(
                     ?message,
                     "blockstore::RequestFromHeight : responding to block request from height"
@@ -329,16 +326,16 @@ impl Node {
                 // Check that we have enough to complete the process, otherwise ignore
                 if response.proposals.is_empty() {
                     // Empty response, downgrade peer
-                    warn!("block_store::ResponseFromHeight : empty blocks in flight {from}",);
+                    warn!("blockstore::ResponseFromHeight : empty blocks in flight {from}",);
                 }
-                if response.proposals.len() < self.config.max_blocks_in_flight as usize {
+                if response.proposals.len() < self.config.max_blocks_in_flight {
                     // Partial response, downgrade peer
-                    warn!("block_store::ResponseFromHeight : insufficient blocks in flight {from}",);
+                    warn!("blockstore::ResponseFromHeight : insufficient blocks in flight {from}",);
                 }
 
                 // TODO: Inject proposals
                 debug!(
-                    "block_store::ResponseFromHeight : injecting proposals {:?}",
+                    "blockstore::ResponseFromHeight : injecting proposals {:?}",
                     response
                 );
 
@@ -377,9 +374,7 @@ impl Node {
                 let mut proposals = Vec::new();
                 let mut hash = omega_block.parent_hash();
                 // grab up to batch_size blocks
-                let batch_size = request
-                    .batch_size
-                    .min(self.config.max_blocks_in_flight as usize);
+                let batch_size = request.batch_size.min(self.config.max_blocks_in_flight);
                 while proposals.len() < batch_size {
                     // grab the parent
                     let Some(block) = self.db.get_block_by_hash(&hash)? else {

From 7edca1cdb630f10b9a12dac5016d9618719c9a21 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 27 Dec 2024 17:01:24 +0800
Subject: [PATCH 010/119] feat: added
 blockstore::handle_response_from_height().

---
 zilliqa/src/blockstore.rs | 50 ++++++++++++++++++++++++++-------------
 zilliqa/src/node.rs       | 17 +------------
 2 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index dc3415aab..7b14b3db7 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -12,7 +12,7 @@ use libp2p::PeerId;
 use crate::{
     cfg::NodeConfig,
     db::Db,
-    message::{Block, ExternalMessage, Proposal, RequestBlock},
+    message::{Block, ExternalMessage, Proposal, RequestBlock, ResponseBlock},
     node::MessageSender,
 };
 
@@ -61,7 +61,7 @@ impl BlockStore {
             })
             .collect();
         let peer_id = message_sender.our_peer_id;
-        
+
         Ok(Self {
             db,
             message_sender,
@@ -73,20 +73,6 @@ impl BlockStore {
         })
     }
 
-    /// Route each proposal as if it were received.
-    pub fn handle_response_from_height(&mut self, proposals: Vec<Proposal>) -> Result<()> {
-        // Just pump the Proposals back to ourselves, and it will be picked up and processed as if it were received.
-        // Only issue is the timestamp skew. We should probably fix that.
-        for p in proposals {
-            tracing::trace!("Received proposal from height: {:?}", p);
-            self.message_sender.send_external_message(
-                self.message_sender.our_peer_id,
-                ExternalMessage::Proposal(p),
-            )?;
-        }
-        Ok(())
-    }
-
     pub fn handle_from_hash(&mut self, _: Vec<Proposal>) -> Result<()> {
         // ...
         Ok(())
@@ -113,6 +99,38 @@ impl BlockStore {
         // ...
     }
 
+    pub fn handle_response_from_height(
+        &mut self,
+        from: PeerId,
+        response: ResponseBlock,
+    ) -> Result<()> {
+        // Check that we have enough to complete the process, otherwise ignore
+        if response.proposals.is_empty() {
+            // Empty response, downgrade peer
+            tracing::warn!("blockstore::ResponseFromHeight : empty blocks {from}",);
+        }
+        if response.proposals.len() < self.max_blocks_in_flight {
+            // Partial response, downgrade peer
+            tracing::warn!("blockstore::ResponseFromHeight : partial blocks {from}",);
+        }
+
+        // TODO: Inject proposals
+        tracing::debug!(
+            "blockstore::ResponseFromHeight : injecting proposals {:?}",
+            response.proposals
+        );
+
+        // Just pump the Proposals back to ourselves, and it will be picked up and processed as if it were received.
+        // Only issue is the timestamp skew. We should probably fix that.
+        for p in response.proposals {
+            tracing::trace!("Received proposal from height: {:?}", p);
+            self.message_sender
+                .send_external_message(self.peer_id, ExternalMessage::Proposal(p))?;
+        }
+        // ...
+        Ok(())
+    }
+
     /// Request blocks between the current height and the given block.
     ///
     /// The approach is to request blocks in batches of `max_blocks_in_flight` blocks.
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 9b0e481f7..305666489 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -323,22 +323,7 @@ impl Node {
                 self.request_responses.send((response_channel, message))?;
             }
             ExternalMessage::ResponseFromHeight(response) => {
-                // Check that we have enough to complete the process, otherwise ignore
-                if response.proposals.is_empty() {
-                    // Empty response, downgrade peer
-                    warn!("blockstore::ResponseFromHeight : empty blocks in flight {from}",);
-                }
-                if response.proposals.len() < self.config.max_blocks_in_flight {
-                    // Partial response, downgrade peer
-                    warn!("blockstore::ResponseFromHeight : insufficient blocks in flight {from}",);
-                }
-
-                // TODO: Inject proposals
-                debug!(
-                    "blockstore::ResponseFromHeight : injecting proposals {:?}",
-                    response
-                );
-
+                self.consensus.blockstore.handle_response_from_height(from, response)?;
                 // Acknowledge this block response. This does nothing because the `BlockResponse` request was sent by
                 // us, but we keep it here for symmetry with the other handlers.
                 self.request_responses

From b5b7c35682515efa07c30490a6834a66e8146e5b Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 27 Dec 2024 17:43:44 +0800
Subject: [PATCH 011/119] feat: checkpoint, successful
 RequestFromHeight-ResponseFromHeight.

---
 zilliqa/src/blockstore.rs | 83 +++++++++++++++++++++++++++++++++++----
 zilliqa/src/message.rs    |  3 +-
 zilliqa/src/node.rs       | 60 +++++-----------------------
 3 files changed, 85 insertions(+), 61 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 7b14b3db7..abe076842 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -7,6 +7,7 @@ use std::{
 
 use anyhow::Result;
 
+use itertools::Itertools;
 use libp2p::PeerId;
 
 use crate::{
@@ -99,6 +100,61 @@ impl BlockStore {
         // ...
     }
 
+    fn block_to_proposal(&self, block: Block) -> Proposal {
+        let txs = block
+            .transactions
+            .iter()
+            .map(|hash| self.db.get_transaction(hash).unwrap().unwrap())
+            .map(|tx| tx.verify().unwrap())
+            .collect();
+
+        Proposal::from_parts(block, txs)
+    }
+
+    pub fn handle_request_from_height(
+        &mut self,
+        from: PeerId,
+        request: RequestBlock,
+    ) -> Result<ExternalMessage> {
+        // ...
+        tracing::debug!(
+            "blockstore::RequestFromHeight : received a block request from {}",
+            from
+        );
+
+        // TODO: Check if we should service this request.
+        // Validators shall not respond to this request.
+
+        let Some(alpha) = self.db.get_block_by_hash(&request.from_hash)? else {
+            // We do not have the starting block
+            tracing::warn!(
+                "blockstore::RequestFromHeight : missing starting block {}",
+                request.from_hash
+            );
+            let message: ExternalMessage =
+                ExternalMessage::ResponseFromHeight(ResponseBlock { proposals: vec![] });
+            return Ok(message);
+        };
+
+        // TODO: Replace this with a single SQL query
+        let mut proposals = Vec::new();
+        let batch_size = self.max_blocks_in_flight.min(request.batch_size) as u64;
+        for num in alpha.number().saturating_add(1)..=alpha.number().saturating_add(batch_size) {
+            let Some(block) = self.db.get_canonical_block_by_number(num)? else {
+                // that's all we have!
+                break;
+            };
+            proposals.push(self.block_to_proposal(block));
+        }
+
+        let message = ExternalMessage::ResponseFromHeight(ResponseBlock { proposals });
+        tracing::trace!(
+            ?message,
+            "blockstore::RequestFromHeight : responding to block request from height"
+        );
+        Ok(message)
+    }
+
     pub fn handle_response_from_height(
         &mut self,
         from: PeerId,
@@ -116,17 +172,30 @@ impl BlockStore {
 
         // TODO: Inject proposals
         tracing::debug!(
-            "blockstore::ResponseFromHeight : injecting proposals {:?}",
-            response.proposals
+            "blockstore::ResponseFromHeight : injecting {} proposals",
+            response.proposals.len()
         );
 
+        // Sort proposals by number
+        let proposals = response
+            .proposals
+            .into_iter()
+            .sorted_by_key(|p| p.number())
+            .collect_vec();
+
         // Just pump the Proposals back to ourselves, and it will be picked up and processed as if it were received.
         // Only issue is the timestamp skew. We should probably fix that.
-        for p in response.proposals {
-            tracing::trace!("Received proposal from height: {:?}", p);
-            self.message_sender
-                .send_external_message(self.peer_id, ExternalMessage::Proposal(p))?;
+        for p in proposals {
+            tracing::trace!(
+                "Received proposal number: {} hash: {}",
+                p.number(),
+                p.hash(),
+            );
+            // replay the proposals
         }
+
+        // We're done with this peer
+        self.peers.push(self.in_flight.take().unwrap());
         // ...
         Ok(())
     }
@@ -177,14 +246,12 @@ impl BlockStore {
         let message = if block_gap > self.max_blocks_in_flight as u64 / 2 {
             // we're far from latest block
             ExternalMessage::RequestFromHeight(RequestBlock {
-                from_number: alpha_block.header.number,
                 from_hash: alpha_block.header.hash,
                 batch_size: self.max_blocks_in_flight,
             })
         } else {
             // we're close to latest block
             ExternalMessage::RequestFromHash(RequestBlock {
-                from_number: omega_block.header.number,
                 from_hash: omega_block.header.hash,
                 batch_size: self.max_blocks_in_flight,
             })
diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index 8689c6f80..e51fbdc7b 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -229,7 +229,6 @@ impl fmt::Debug for BlockResponse {
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct RequestBlock {
-    pub from_number: u64,
     pub from_hash: Hash,
     pub batch_size: usize,
 }
@@ -304,7 +303,7 @@ impl Display for ExternalMessage {
                 write!(
                     f,
                     "RequestFromHeight({}, num={})",
-                    r.from_number, r.batch_size
+                    r.from_hash, r.batch_size
                 )
             }
             ExternalMessage::RequestFromHash(r) => {
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 305666489..48d8fabf3 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -276,59 +276,12 @@ impl Node {
                     .send((response_channel, ExternalMessage::Acknowledgement))?;
             }
             ExternalMessage::RequestFromHeight(request) => {
-                tracing::debug!(
-                    "blockstore::RequestFromHeight : received a block request from {}",
-                    from
-                );
-
-                if from == self.peer_id {
-                    warn!("blockstore::RequestFromHeight : ignoring blocks from self");
-                    return Ok(());
-                }
-
-                // TODO: Check if we should service this request.
-                // Validators shall not respond to this request.
-
-                let Some(alpha) = self.db.get_block_by_hash(&request.from_hash)? else {
-                    // We do not have the starting block
-                    tracing::warn!(
-                        "blockstore::RequestFromHeight : missing starting block {}",
-                        request.from_hash
-                    );
-                    self.request_responses.send((
-                        response_channel,
-                        ExternalMessage::ResponseFromHeight(ResponseBlock { proposals: vec![] }),
-                    ))?;
-                    return Ok(());
-                };
-
-                // TODO: Replace this with a single SQL query
-                let mut proposals = Vec::new();
-                let batch_size = self.config.max_blocks_in_flight.min(request.batch_size) as u64;
-                for num in
-                    alpha.number().saturating_add(1)..=alpha.number().saturating_add(batch_size)
-                {
-                    let Some(block) = self.db.get_canonical_block_by_number(num)? else {
-                        // that's all we have!
-                        break;
-                    };
-                    proposals.push(self.block_to_proposal(block));
-                }
-
-                let message = ExternalMessage::ResponseFromHeight(ResponseBlock { proposals });
-                tracing::trace!(
-                    ?message,
-                    "blockstore::RequestFromHeight : responding to block request from height"
-                );
+                let message = self
+                    .consensus
+                    .blockstore
+                    .handle_request_from_height(from, request)?;
                 self.request_responses.send((response_channel, message))?;
             }
-            ExternalMessage::ResponseFromHeight(response) => {
-                self.consensus.blockstore.handle_response_from_height(from, response)?;
-                // Acknowledge this block response. This does nothing because the `BlockResponse` request was sent by
-                // us, but we keep it here for symmetry with the other handlers.
-                self.request_responses
-                    .send((response_channel, ExternalMessage::Acknowledgement))?;
-            }
             ExternalMessage::RequestFromHash(request) => {
                 debug!(
                     "blockstore::RequestFromHash : received a block request from {}",
@@ -497,6 +450,11 @@ impl Node {
     pub fn handle_response(&mut self, from: PeerId, message: ExternalMessage) -> Result<()> {
         debug!(%from, to = %self.peer_id, %message, "handling response");
         match message {
+            ExternalMessage::ResponseFromHeight(response) => {
+                self.consensus
+                    .blockstore
+                    .handle_response_from_height(from, response)?;
+            }
             ExternalMessage::BlockResponse(m) => self.handle_block_response(from, m)?,
             ExternalMessage::Acknowledgement => {}
             _ => {

From 3fa48405bc90ccf5ab90db782eb6910225b837bd Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 27 Dec 2024 20:17:01 +0800
Subject: [PATCH 012/119] feat: direct insert into DB, without
 receipts/touched/state.

---
 zilliqa/src/blockstore.rs | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index abe076842..dd06bf57c 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -13,7 +13,7 @@ use libp2p::PeerId;
 use crate::{
     cfg::NodeConfig,
     db::Db,
-    message::{Block, ExternalMessage, Proposal, RequestBlock, ResponseBlock},
+    message::{Block, ExternalMessage, InternalMessage, Proposal, RequestBlock, ResponseBlock},
     node::MessageSender,
 };
 
@@ -187,11 +187,18 @@ impl BlockStore {
         // Only issue is the timestamp skew. We should probably fix that.
         for p in proposals {
             tracing::trace!(
-                "Received proposal number: {} hash: {}",
+                "Inserting proposal number: {} hash: {}",
                 p.number(),
                 p.hash(),
             );
-            // replay the proposals
+
+            let (block, transactions) = p.into_parts();
+
+            // TODO: Bulk SQL insert
+            for tx in transactions {
+                self.db.insert_transaction(&tx.calculate_hash(), &tx)?;
+            }
+            self.db.insert_block(&block)?;
         }
 
         // We're done with this peer

From dcfeed1a84dd57dd1579d80e7cb0740defc1e042 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 27 Dec 2024 23:05:33 +0800
Subject: [PATCH 013/119] feat: successfully injecting blocks/state_trie

---
 zilliqa/src/blockstore.rs | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index dd06bf57c..e6b6706d5 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -13,7 +13,7 @@ use libp2p::PeerId;
 use crate::{
     cfg::NodeConfig,
     db::Db,
-    message::{Block, ExternalMessage, InternalMessage, Proposal, RequestBlock, ResponseBlock},
+    message::{Block, ExternalMessage, ProcessProposal, Proposal, RequestBlock, ResponseBlock},
     node::MessageSender,
 };
 
@@ -170,9 +170,9 @@ impl BlockStore {
             tracing::warn!("blockstore::ResponseFromHeight : partial blocks {from}",);
         }
 
-        // TODO: Inject proposals
-        tracing::debug!(
-            "blockstore::ResponseFromHeight : injecting {} proposals",
+        // TODO: Any additional checks we should do here?
+        tracing::info!(
+            "blockstore::ResponseFromHeight : injecting {} proposals from {from}",
             response.proposals.len()
         );
 
@@ -183,22 +183,21 @@ impl BlockStore {
             .sorted_by_key(|p| p.number())
             .collect_vec();
 
-        // Just pump the Proposals back to ourselves, and it will be picked up and processed as if it were received.
-        // Only issue is the timestamp skew. We should probably fix that.
+        // Just pump the Proposals back to ourselves.
         for p in proposals {
             tracing::trace!(
-                "Inserting proposal number: {} hash: {}",
+                "Injecting proposal number: {} hash: {}",
                 p.number(),
                 p.hash(),
             );
 
-            let (block, transactions) = p.into_parts();
-
-            // TODO: Bulk SQL insert
-            for tx in transactions {
-                self.db.insert_transaction(&tx.calculate_hash(), &tx)?;
-            }
-            self.db.insert_block(&block)?;
+            self.message_sender.send_external_message(
+                self.peer_id,
+                ExternalMessage::ProcessProposal(ProcessProposal {
+                    from: self.peer_id.to_bytes(), // FIXME: change this to PeerId instead of Vec<u8>
+                    block: p,
+                }),
+            )?;
         }
 
         // We're done with this peer
@@ -249,7 +248,7 @@ impl BlockStore {
             .number
             .saturating_sub(alpha_block.header.number);
 
-        // TODO: Double-check hysteresis logic.
+        // TODO: Double-check hysteresis logic - may not even be necessary to do RequestFromHash
         let message = if block_gap > self.max_blocks_in_flight as u64 / 2 {
             // we're far from latest block
             ExternalMessage::RequestFromHeight(RequestBlock {
@@ -266,7 +265,7 @@ impl BlockStore {
 
         let peer = self.in_flight.as_ref().unwrap();
 
-        tracing::debug!(?message, "Requesting missing blocks from {}", peer.peer_id);
+        tracing::info!(?message, "Requesting missing blocks from {}", peer.peer_id);
 
         self.message_sender
             .send_external_message(peer.peer_id, message)?;

From b3fb9992145def38aad11a6eca2b664bc3b25ee6 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Sat, 28 Dec 2024 10:03:47 +0800
Subject: [PATCH 014/119] feat: small refactor blockstore.rs

---
 zilliqa/src/blockstore.rs | 124 +++++++++++++++++++++++++++++---------
 zilliqa/src/node.rs       |   5 ++
 2 files changed, 102 insertions(+), 27 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index e6b6706d5..0845baf6d 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -17,6 +17,13 @@ use crate::{
     node::MessageSender,
 };
 
+enum DownGrade {
+    None,
+    Partial,
+    Empty,
+    Timeout,
+}
+
 /// Stores and manages the node's list of blocks. Also responsible for making requests for new blocks.
 ///
 /// # Syncing Algorithm
@@ -155,34 +162,23 @@ impl BlockStore {
         Ok(message)
     }
 
-    pub fn handle_response_from_height(
-        &mut self,
-        from: PeerId,
-        response: ResponseBlock,
-    ) -> Result<()> {
-        // Check that we have enough to complete the process, otherwise ignore
-        if response.proposals.is_empty() {
-            // Empty response, downgrade peer
-            tracing::warn!("blockstore::ResponseFromHeight : empty blocks {from}",);
-        }
-        if response.proposals.len() < self.max_blocks_in_flight {
-            // Partial response, downgrade peer
-            tracing::warn!("blockstore::ResponseFromHeight : partial blocks {from}",);
-        }
-
-        // TODO: Any additional checks we should do here?
+    fn inject_proposals(&mut self, proposals: Vec<Proposal>) -> Result<Option<Proposal>> {
         tracing::info!(
-            "blockstore::ResponseFromHeight : injecting {} proposals from {from}",
-            response.proposals.len()
+            "blockstore::InjectProposals : injecting {} proposals",
+            proposals.len()
         );
 
+        if proposals.is_empty() {
+            return Ok(None);
+        }
         // Sort proposals by number
-        let proposals = response
-            .proposals
+        let proposals = proposals
             .into_iter()
             .sorted_by_key(|p| p.number())
             .collect_vec();
 
+        let last_proposal = proposals.last().cloned();
+
         // Just pump the Proposals back to ourselves.
         for p in proposals {
             tracing::trace!(
@@ -199,9 +195,80 @@ impl BlockStore {
                 }),
             )?;
         }
+        // return last proposal
+        Ok(last_proposal)
+    }
+
+    fn done_with_peer(&mut self, downgrade: DownGrade) {
+        // ...
+        if let Some(mut peer) = self.in_flight.take() {
+            peer.score += downgrade as u32;
+            self.peers.push(peer);
+        }
+    }
+
+    pub fn handle_response_from_height(
+        &mut self,
+        from: PeerId,
+        response: ResponseBlock,
+    ) -> Result<()> {
+        // Check that we have enough to complete the process, otherwise ignore
+        if response.proposals.is_empty() {
+            // Empty response, downgrade peer
+            tracing::warn!("blockstore::ResponseFromHeight : empty blocks {from}",);
+            self.done_with_peer(DownGrade::Empty);
+            return Ok(());
+        } else if response.proposals.len() < self.max_blocks_in_flight {
+            // Partial response, downgrade peer
+            tracing::warn!("blockstore::ResponseFromHeight : partial blocks {from}",);
+            self.done_with_peer(DownGrade::Partial);
+        } else {
+            self.done_with_peer(DownGrade::None);
+        }
+
+        tracing::info!(
+            "blockstore::ResponseFromHeight : received {} blocks from {}",
+            response.proposals.len(),
+            from
+        );
+
+        // TODO: Any additional checks we should do here?
+        self.inject_proposals(response.proposals)?;
+
+        // Speculatively request more blocks
+        Ok(())
+    }
+
+    pub fn handle_response_from_hash(
+        &mut self,
+        from: PeerId,
+        response: ResponseBlock,
+    ) -> Result<()> {
+        if response.proposals.is_empty() {
+            // Empty response, downgrade peer
+            tracing::warn!("blockstore::ResponseFromHash : empty blocks {from}",);
+            self.done_with_peer(DownGrade::Empty);
+            return Ok(());
+        } else if response.proposals.len() <= self.max_blocks_in_flight / 2 {
+            // Partial response, downgrade peer
+            tracing::warn!("blockstore::ResponseFromHash : partial blocks {from}",);
+            self.done_with_peer(DownGrade::Partial);
+            return Ok(());
+        } else {
+            // only process full responses
+            self.done_with_peer(DownGrade::None);
+        }
+
+        tracing::info!(
+            "blockstore::ResponseFromHash : received {} blocks from {}",
+            response.proposals.len(),
+            from
+        );
+
+        // TODO: Any additional checks we should do here?
+
+        self.inject_proposals(response.proposals)?;
 
-        // We're done with this peer
-        self.peers.push(self.in_flight.take().unwrap());
         // ...
         Ok(())
     }
@@ -220,9 +287,8 @@ impl BlockStore {
                     "In-flight request {} timed out, requesting from new peer",
                     peer.peer_id
                 );
-                let mut peer = self.in_flight.take().unwrap();
-                peer.score += 1; // TODO: Downgrade score if we keep timing out.
-                self.in_flight = self.get_next_peer(Some(peer));
+                self.done_with_peer(DownGrade::Timeout);
+                self.in_flight = self.get_next_peer(None);
             } else {
                 return Ok(());
             }
@@ -265,7 +331,11 @@ impl BlockStore {
 
         let peer = self.in_flight.as_ref().unwrap();
 
-        tracing::info!(?message, "Requesting missing blocks from {}", peer.peer_id);
+        tracing::info!(
+            "Requesting {} missing blocks from {}",
+            self.max_blocks_in_flight,
+            peer.peer_id,
+        );
 
         self.message_sender
             .send_external_message(peer.peer_id, message)?;
@@ -277,7 +347,7 @@ impl BlockStore {
         // new peers should be tried last, which gives them time to sync first.
         // peers do not need to be unique.
         let new_peer = PeerInfo {
-            score: self.peers.iter().map(|p| p.score).max().unwrap_or(0),
+            score: self.peers.iter().map(|p| p.score).max().unwrap_or_default(),
             peer_id: peer,
             last_used: Instant::now(),
         };
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 48d8fabf3..1086fe460 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -455,6 +455,11 @@ impl Node {
                     .blockstore
                     .handle_response_from_height(from, response)?;
             }
+            ExternalMessage::ResponseFromHash(response) => {
+                self.consensus
+                    .blockstore
+                    .handle_response_from_hash(from, response)?;
+            }
             ExternalMessage::BlockResponse(m) => self.handle_block_response(from, m)?,
             ExternalMessage::Acknowledgement => {}
             _ => {

From ef3a8fbb4b4329645c42076d2fdf578527b6aa55 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Sat, 28 Dec 2024 10:49:12 +0800
Subject: [PATCH 015/119] sec: make RequestId random, to mitigate response
 injections.

---
 zilliqa/src/node.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 1086fe460..9d125fc15 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -84,9 +84,7 @@ impl MessageSender {
     }
 
     pub fn next_request_id(&mut self) -> RequestId {
-        let request_id = self.request_id;
-        self.request_id.0 = self.request_id.0.wrapping_add(1);
-        request_id
+        RequestId(rand::random()) // TODO: make this more secure, non-predictable
     }
 
     /// Send a message to a remote node of the same shard.

From 36449fa186df54add1c35111ed8866d3be2c4580 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Sat, 28 Dec 2024 16:26:10 +0800
Subject: [PATCH 016/119] feat: minor reorg.

---
 zilliqa/src/blockstore.rs | 149 +++++++++++++++++++++++++++-----------
 zilliqa/src/node.rs       |  69 ++----------------
 2 files changed, 113 insertions(+), 105 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 0845baf6d..bd82a015a 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -20,13 +20,11 @@ use crate::{
 enum DownGrade {
     None,
     Partial,
-    Empty,
     Timeout,
+    Empty,
 }
 
-/// Stores and manages the node's list of blocks. Also responsible for making requests for new blocks.
-///
-/// # Syncing Algorithm
+/// Syncing Algorithm
 ///
 /// We rely on [crate::consensus::Consensus] informing us of newly received block proposals via:
 /// * [BlockStore::process_block] for blocks that can be part of our chain, because we already have their parent.
@@ -35,6 +33,9 @@ enum DownGrade {
 /// Both these code paths also call [BlockStore::request_missing_blocks]. This finds the greatest view of any proposal
 /// we've seen (whether its part of our chain or not).
 
+// TODO: What if we receive a fork
+// TODO: How to start syncing at the start
+
 #[derive(Debug)]
 pub struct BlockStore {
     // database
@@ -81,11 +82,8 @@ impl BlockStore {
         })
     }
 
-    pub fn handle_from_hash(&mut self, _: Vec<Proposal>) -> Result<()> {
-        // ...
-        Ok(())
-    }
-
+    /// Process a block proposal.
+    /// Checks if the parent block exists, and if not, triggers a sync.
     pub fn process_proposal(&mut self, block: Block) -> Result<()> {
         // ...
         // check if block parent exists
@@ -97,7 +95,7 @@ impl BlockStore {
                 "blockstore::ProcessProposal : Parent block {} not found",
                 block.parent_hash()
             );
-            self.request_missing_blocks(block)?;
+            self.request_missing_blocks(Some(block))?;
             return Ok(());
         }
         Ok(())
@@ -107,17 +105,65 @@ impl BlockStore {
         // ...
     }
 
+    /// Convenience function to convert a block to a proposal (add full txs)
+    /// NOTE: Includes intershard transactions. Should only be used for syncing history,
+    /// not for consensus messages regarding new blocks.
     fn block_to_proposal(&self, block: Block) -> Proposal {
+        // since block must be valid, unwrap(s) are safe
         let txs = block
             .transactions
             .iter()
             .map(|hash| self.db.get_transaction(hash).unwrap().unwrap())
             .map(|tx| tx.verify().unwrap())
-            .collect();
+            .collect_vec();
 
         Proposal::from_parts(block, txs)
     }
 
+    pub fn handle_request_from_hash(
+        &mut self,
+        from: PeerId,
+        request: RequestBlock,
+    ) -> Result<ExternalMessage> {
+        tracing::debug!(
+            "blockstore::RequestFromHash : received a block request from {}",
+            from
+        );
+
+        // TODO: Check if we should service this request
+        // Validators could respond to this request if there is nothing else to do.
+
+        let Some(omega_block) = self.db.get_block_by_hash(&request.from_hash)? else {
+            // We do not have the starting block
+            tracing::warn!(
+                "blockstore::RequestFromHash : missing starting block {}",
+                request.from_hash
+            );
+            let message = ExternalMessage::ResponseFromHash(ResponseBlock { proposals: vec![] });
+            return Ok(message);
+        };
+
+        let batch_size = self.max_blocks_in_flight.min(request.batch_size); // mitigate DOS attacks by limiting the number of blocks we send
+        let mut proposals: Vec<Proposal> = Vec::new();
+        let mut hash = omega_block.parent_hash();
+        while proposals.len() < batch_size {
+            // grab the parent
+            let Some(block) = self.db.get_block_by_hash(&hash)? else {
+                // that's all we have!
+                break;
+            };
+            hash = block.parent_hash();
+            proposals.push(self.block_to_proposal(block));
+        }
+
+        let message = ExternalMessage::ResponseFromHash(ResponseBlock { proposals });
+        tracing::trace!(
+            ?message,
+            "blockstore::RequestFromHash : responding to block request from height"
+        );
+        Ok(message)
+    }
+
     pub fn handle_request_from_height(
         &mut self,
         from: PeerId,
@@ -144,8 +190,8 @@ impl BlockStore {
         };
 
         // TODO: Replace this with a single SQL query
+        let batch_size = self.max_blocks_in_flight.min(request.batch_size) as u64; // mitigate DOS attacks by limiting the number of blocks we send
         let mut proposals = Vec::new();
-        let batch_size = self.max_blocks_in_flight.min(request.batch_size) as u64;
         for num in alpha.number().saturating_add(1)..=alpha.number().saturating_add(batch_size) {
             let Some(block) = self.db.get_canonical_block_by_number(num)? else {
                 // that's all we have!
@@ -162,14 +208,15 @@ impl BlockStore {
         Ok(message)
     }
 
-    fn inject_proposals(&mut self, proposals: Vec<Proposal>) -> Result<Option<Proposal>> {
+    /// Pump the proposals into the chain.
+    fn inject_proposals(&mut self, proposals: Vec<Proposal>) -> Result<()> {
         tracing::info!(
             "blockstore::InjectProposals : injecting {} proposals",
             proposals.len()
         );
 
         if proposals.is_empty() {
-            return Ok(None);
+            return Ok(());
         }
         // Sort proposals by number
         let proposals = proposals
@@ -177,8 +224,6 @@ impl BlockStore {
             .sorted_by_key(|p| p.number())
             .collect_vec();
 
-        let last_proposal = proposals.last().cloned();
-
         // Just pump the Proposals back to ourselves.
         for p in proposals {
             tracing::trace!(
@@ -196,9 +241,10 @@ impl BlockStore {
             )?;
         }
         // return last proposal
-        Ok(last_proposal)
+        Ok(())
     }
 
+    /// Downgrade a peer based on the response received.
     fn done_with_peer(&mut self, downgrade: DownGrade) {
         // ...
         if let Some(mut peer) = self.in_flight.take() {
@@ -212,7 +258,7 @@ impl BlockStore {
         from: PeerId,
         response: ResponseBlock,
     ) -> Result<()> {
-        // Check that we have enough to complete the process, otherwise ignore
+        // Process whatever we have received.
         if response.proposals.is_empty() {
             // Empty response, downgrade peer
             tracing::warn!("blockstore::ResponseFromHeight : empty blocks {from}",);
@@ -233,9 +279,29 @@ impl BlockStore {
         );
 
         // TODO: Any additional checks we should do here?
+
+        // Inject received proposals
+        let next_hash = response.proposals.last().unwrap().hash();
         self.inject_proposals(response.proposals)?;
 
-        // Speculatively request more blocks
+        // Speculatively request more blocks, as there might be more
+        self.in_flight = self.get_next_peer();
+        if let Some(peer) = self.in_flight.as_ref() {
+            let message = ExternalMessage::RequestFromHeight(RequestBlock {
+                batch_size: self.max_blocks_in_flight,
+                from_hash: next_hash,
+            });
+
+            tracing::info!(
+                "Requesting {} missing blocks from {}",
+                self.max_blocks_in_flight,
+                peer.peer_id,
+            );
+
+            self.message_sender
+                .send_external_message(peer.peer_id, message)?;
+        }
+
         Ok(())
     }
 
@@ -244,13 +310,15 @@ impl BlockStore {
         from: PeerId,
         response: ResponseBlock,
     ) -> Result<()> {
+        // Check that we have enough to complete the process, otherwise ignore
         if response.proposals.is_empty() {
-            // Empty response, downgrade peer
+            // Empty response, downgrade peer, skip
             tracing::warn!("blockstore::ResponseFromHash : empty blocks {from}",);
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
         } else if response.proposals.len() <= self.max_blocks_in_flight / 2 {
             // Partial response, downgrade peer
+            // Skip processing because we want to ensure that we have ALL the needed blocks to sync up.
             tracing::warn!("blockstore::ResponseFromHash : partial blocks {from}",);
             self.done_with_peer(DownGrade::Partial);
             return Ok(());
@@ -267,19 +335,18 @@ impl BlockStore {
 
         // TODO: Any additional checks we should do here?
 
+        // Inject the proposals
         self.inject_proposals(response.proposals)?;
-
-        // ...
         Ok(())
     }
 
     /// Request blocks between the current height and the given block.
     ///
     /// The approach is to request blocks in batches of `max_blocks_in_flight` blocks.
+    /// If None block is provided, we request blocks from the last known canonical block forwards.
     /// If the block gap is large, we request blocks from the last known canonical block forwards.
     /// If the block gap is small, we request blocks from the latest block backwards.
-    ///
-    pub fn request_missing_blocks(&mut self, omega_block: Block) -> Result<()> {
+    pub fn request_missing_blocks(&mut self, omega_block: Option<Block>) -> Result<()> {
         // Early exit if there's a request in-flight; and if it has not expired.
         if let Some(peer) = self.in_flight.as_ref() {
             if peer.last_used.elapsed() > self.request_timeout {
@@ -288,14 +355,14 @@ impl BlockStore {
                     peer.peer_id
                 );
                 self.done_with_peer(DownGrade::Timeout);
-                self.in_flight = self.get_next_peer(None);
+                self.in_flight = self.get_next_peer();
             } else {
                 return Ok(());
             }
         } else {
-            self.in_flight = self.get_next_peer(None);
+            self.in_flight = self.get_next_peer();
             if self.in_flight.is_none() {
-                tracing::error!("No peers available to request missing blocks");
+                tracing::warn!("Insufficient peers available to request missing blocks");
                 return Ok(());
             }
         }
@@ -309,10 +376,15 @@ impl BlockStore {
         let alpha_block = self.db.get_canonical_block_by_number(height)?.unwrap();
 
         // Compute the block gap.
-        let block_gap = omega_block
-            .header
-            .number
-            .saturating_sub(alpha_block.header.number);
+        let block_gap = if let Some(omega_block) = omega_block.as_ref() {
+            omega_block
+                .header
+                .number
+                .saturating_sub(alpha_block.header.number)
+        } else {
+            // Trigger a RequestFromHeight if the source block is None
+            self.max_blocks_in_flight as u64
+        };
 
         // TODO: Double-check hysteresis logic - may not even be necessary to do RequestFromHash
         let message = if block_gap > self.max_blocks_in_flight as u64 / 2 {
@@ -324,7 +396,7 @@ impl BlockStore {
         } else {
             // we're close to latest block
             ExternalMessage::RequestFromHash(RequestBlock {
-                from_hash: omega_block.header.hash,
+                from_hash: omega_block.unwrap().header.hash,
                 batch_size: self.max_blocks_in_flight,
             })
         };
@@ -345,7 +417,6 @@ impl BlockStore {
     /// Add a peer to the list of peers.
     pub fn add_peer(&mut self, peer: PeerId) {
         // new peers should be tried last, which gives them time to sync first.
-        // peers do not need to be unique.
         let new_peer = PeerInfo {
             score: self.peers.iter().map(|p| p.score).max().unwrap_or_default(),
             peer_id: peer,
@@ -359,18 +430,14 @@ impl BlockStore {
         self.peers.retain(|p| p.peer_id != peer);
     }
 
-    fn get_next_peer(&mut self, prev_peer: Option<PeerInfo>) -> Option<PeerInfo> {
-        // Push the current peer into the heap, risks spamming the same peer.
+    fn get_next_peer(&mut self) -> Option<PeerInfo> {
         // TODO: implement a better strategy for this.
-        if let Some(peer) = prev_peer {
-            self.peers.push(peer);
+        if self.peers.len() < 2 {
+            return None;
         }
 
         let mut peer = self.peers.pop()?;
-
-        // used to determine stale in-flight requests.
-        peer.last_used = std::time::Instant::now();
-
+        peer.last_used = std::time::Instant::now(); // used to determine stale in-flight requests.
         Some(peer)
     }
 }
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 9d125fc15..2c0c730fa 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -34,7 +34,7 @@ use crate::{
     inspector::{self, ScillaInspector},
     message::{
         Block, BlockHeader, BlockResponse, ExternalMessage, InternalMessage, IntershardCall,
-        ProcessProposal, Proposal, ResponseBlock,
+        ProcessProposal, Proposal,
     },
     node_launcher::ResponseChannel,
     p2p_node::{LocalMessageTuple, OutboundMessageTuple},
@@ -281,51 +281,10 @@ impl Node {
                 self.request_responses.send((response_channel, message))?;
             }
             ExternalMessage::RequestFromHash(request) => {
-                debug!(
-                    "blockstore::RequestFromHash : received a block request from {}",
-                    from
-                );
-
-                if from == self.peer_id {
-                    warn!("blockstore::RequestFromHash : ignoring request from self");
-                    return Ok(());
-                }
-
-                // TODO: Check if we should service this request
-                // Validators could respond to this request if there is nothing else to do.
-
-                let Some(omega_block) = self.db.get_block_by_hash(&request.from_hash)? else {
-                    // We do not have the starting block
-                    tracing::warn!(
-                        "blockstore::RequestFromHash : missing starting block {}",
-                        request.from_hash
-                    );
-                    self.request_responses.send((
-                        response_channel,
-                        ExternalMessage::ResponseFromHash(ResponseBlock { proposals: vec![] }),
-                    ))?;
-                    return Ok(());
-                };
-
-                let mut proposals = Vec::new();
-                let mut hash = omega_block.parent_hash();
-                // grab up to batch_size blocks
-                let batch_size = request.batch_size.min(self.config.max_blocks_in_flight);
-                while proposals.len() < batch_size {
-                    // grab the parent
-                    let Some(block) = self.db.get_block_by_hash(&hash)? else {
-                        // that's all we have!
-                        break;
-                    };
-                    hash = block.parent_hash();
-                    proposals.push(self.block_to_proposal(block));
-                }
-
-                let message = ExternalMessage::ResponseFromHash(ResponseBlock { proposals });
-                tracing::trace!(
-                    ?message,
-                    "blockstore::RequestFromHash : responding to block request from height"
-                );
+                let message = self
+                    .consensus
+                    .blockstore
+                    .handle_request_from_hash(from, request)?;
                 self.request_responses.send((response_channel, message))?;
             }
             ExternalMessage::ResponseFromHash(response) => {
@@ -1008,24 +967,6 @@ impl Node {
         self.peer_num.load(std::sync::atomic::Ordering::Relaxed)
     }
 
-    /// Convenience function to convert a block to a proposal (add full txs)
-    /// NOTE: Includes intershard transactions. Should only be used for syncing history,
-    /// not for consensus messages regarding new blocks.
-    fn block_to_proposal(&self, block: Block) -> Proposal {
-        let txs: Vec<_> = block
-            .transactions
-            .iter()
-            .map(|tx_hash| {
-                self.consensus
-                    .get_transaction_by_hash(*tx_hash)
-                    .unwrap()
-                    .unwrap()
-            })
-            .collect();
-
-        Proposal::from_parts(block, txs)
-    }
-
     fn handle_proposal(&mut self, from: PeerId, proposal: Proposal) -> Result<()> {
         if let Some((to, message)) = self.consensus.proposal(from, proposal, false)? {
             self.reset_timeout

From a8abeac1da63b98c365f79edd86e19e08b355a5b Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Sat, 28 Dec 2024 17:09:15 +0800
Subject: [PATCH 017/119] feat: disable speculative requests for now, until we
 have a better way to limit it.

---
 zilliqa/src/blockstore.rs | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index bd82a015a..a1fe815ab 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -35,6 +35,7 @@ enum DownGrade {
 
 // TODO: What if we receive a fork
 // TODO: How to start syncing at the start
+// TODO: Do speculative fetches
 
 #[derive(Debug)]
 pub struct BlockStore {
@@ -281,26 +282,26 @@ impl BlockStore {
         // TODO: Any additional checks we should do here?
 
         // Inject received proposals
-        let next_hash = response.proposals.last().unwrap().hash();
+        // let next_hash = response.proposals.last().unwrap().hash();
         self.inject_proposals(response.proposals)?;
 
         // Speculatively request more blocks, as there might be more
-        self.in_flight = self.get_next_peer();
-        if let Some(peer) = self.in_flight.as_ref() {
-            let message = ExternalMessage::RequestFromHeight(RequestBlock {
-                batch_size: self.max_blocks_in_flight,
-                from_hash: next_hash,
-            });
-
-            tracing::info!(
-                "Requesting {} missing blocks from {}",
-                self.max_blocks_in_flight,
-                peer.peer_id,
-            );
-
-            self.message_sender
-                .send_external_message(peer.peer_id, message)?;
-        }
+        // self.in_flight = self.get_next_peer();
+        // if let Some(peer) = self.in_flight.as_ref() {
+        //     let message = ExternalMessage::RequestFromHeight(RequestBlock {
+        //         batch_size: self.max_blocks_in_flight,
+        //         from_hash: next_hash,
+        //     });
+
+        //     tracing::info!(
+        //         "Requesting {} missing blocks from {}",
+        //         self.max_blocks_in_flight,
+        //         peer.peer_id,
+        //     );
+
+        //     self.message_sender
+        //         .send_external_message(peer.peer_id, message)?;
+        // }
 
         Ok(())
     }

From 1f65f75c5963066dae06debc2b594a7422b192fe Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Sat, 28 Dec 2024 21:25:59 +0800
Subject: [PATCH 018/119] feat: re-enabled speculative fetch.

---
 zilliqa/src/blockstore.rs | 71 ++++++++++++++++++++++++---------------
 1 file changed, 44 insertions(+), 27 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index a1fe815ab..3238bde93 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -34,8 +34,8 @@ enum DownGrade {
 /// we've seen (whether its part of our chain or not).
 
 // TODO: What if we receive a fork
-// TODO: How to start syncing at the start
-// TODO: Do speculative fetches
+// TODO: How to start syncing validators
+// TODO: How to handle restarting new blocks, while injected blocks are still in-queue.
 
 #[derive(Debug)]
 pub struct BlockStore {
@@ -51,8 +51,12 @@ pub struct BlockStore {
     request_timeout: Duration,
     // how many blocks to request at once
     max_blocks_in_flight: usize,
+    // how many blocks to inject into the queue
+    max_blocks_injected: usize,
     // our peer id
     peer_id: PeerId,
+    // how many injected proposals
+    injected: usize,
 }
 
 impl BlockStore {
@@ -71,18 +75,27 @@ impl BlockStore {
             })
             .collect();
         let peer_id = message_sender.our_peer_id;
+        let max_blocks = config.max_blocks_in_flight.max(31) as usize; // between 30 seconds and 3 days of blocks.
 
         Ok(Self {
             db,
             message_sender,
             peers,
-            in_flight: None,
-            request_timeout: config.consensus.consensus_timeout,
-            max_blocks_in_flight: config.max_blocks_in_flight.max(31) as usize, // between 30 seconds and 3 days of blocks.
             peer_id,
+            request_timeout: config.consensus.consensus_timeout,
+            max_blocks_in_flight: max_blocks,
+            max_blocks_injected: max_blocks * 10, // fire 10 speculative requests
+            in_flight: None,
+            injected: 0,
         })
     }
 
+    /// Handle an injected proposal
+    ///
+    pub fn handle_injected_proposal(&mut self, proposal: Proposal) -> Result<()> {
+        Ok(())
+    }
+
     /// Process a block proposal.
     /// Checks if the parent block exists, and if not, triggers a sync.
     pub fn process_proposal(&mut self, block: Block) -> Result<()> {
@@ -102,10 +115,6 @@ impl BlockStore {
         Ok(())
     }
 
-    pub fn buffer_proposal(&self, _block: Block) {
-        // ...
-    }
-
     /// Convenience function to convert a block to a proposal (add full txs)
     /// NOTE: Includes intershard transactions. Should only be used for syncing history,
     /// not for consensus messages regarding new blocks.
@@ -121,6 +130,9 @@ impl BlockStore {
         Proposal::from_parts(block, txs)
     }
 
+    /// Request blocks from a hash, backwards.
+    ///
+    /// It will collect N blocks by following the block.parent_hash() of the requested block.
     pub fn handle_request_from_hash(
         &mut self,
         from: PeerId,
@@ -165,6 +177,7 @@ impl BlockStore {
         Ok(message)
     }
 
+    /// Request for blocks from a height, forwards.
     pub fn handle_request_from_height(
         &mut self,
         from: PeerId,
@@ -225,6 +238,9 @@ impl BlockStore {
             .sorted_by_key(|p| p.number())
             .collect_vec();
 
+        // Increment propoals injected
+        self.injected += proposals.len();
+
         // Just pump the Proposals back to ourselves.
         for p in proposals {
             tracing::trace!(
@@ -282,27 +298,28 @@ impl BlockStore {
         // TODO: Any additional checks we should do here?
 
         // Inject received proposals
-        // let next_hash = response.proposals.last().unwrap().hash();
+        let next_hash = response.proposals.last().unwrap().hash();
         self.inject_proposals(response.proposals)?;
 
         // Speculatively request more blocks, as there might be more
-        // self.in_flight = self.get_next_peer();
-        // if let Some(peer) = self.in_flight.as_ref() {
-        //     let message = ExternalMessage::RequestFromHeight(RequestBlock {
-        //         batch_size: self.max_blocks_in_flight,
-        //         from_hash: next_hash,
-        //     });
-
-        //     tracing::info!(
-        //         "Requesting {} missing blocks from {}",
-        //         self.max_blocks_in_flight,
-        //         peer.peer_id,
-        //     );
-
-        //     self.message_sender
-        //         .send_external_message(peer.peer_id, message)?;
-        // }
+        if self.injected < self.max_blocks_injected {
+            self.in_flight = self.get_next_peer();
+            if let Some(peer) = self.in_flight.as_ref() {
+                let message = ExternalMessage::RequestFromHeight(RequestBlock {
+                    batch_size: self.max_blocks_in_flight,
+                    from_hash: next_hash,
+                });
+
+                tracing::info!(
+                    "Requesting {} future blocks from {}",
+                    self.max_blocks_in_flight,
+                    peer.peer_id,
+                );
 
+                self.message_sender
+                    .send_external_message(peer.peer_id, message)?;
+            }
+        }
         Ok(())
     }
 
@@ -432,7 +449,7 @@ impl BlockStore {
     }
 
     fn get_next_peer(&mut self) -> Option<PeerInfo> {
-        // TODO: implement a better strategy for this.
+        // Minimum of 2 peers to avoid single source of truth.
         if self.peers.len() < 2 {
             return None;
         }

From 093054036a6289ca4c6324fafdd061a50686ba47 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 30 Dec 2024 10:12:12 +0800
Subject: [PATCH 019/119] feat: use InjectedProposals instead of
 ProcessProposals.

---
 zilliqa/src/blockstore.rs | 33 +++++++++++++++++++--------------
 zilliqa/src/cfg.rs        |  4 ++--
 zilliqa/src/message.rs    | 12 ++++++++++++
 zilliqa/src/node.rs       | 13 +++++++------
 4 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 3238bde93..6000a8d34 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -13,7 +13,7 @@ use libp2p::PeerId;
 use crate::{
     cfg::NodeConfig,
     db::Db,
-    message::{Block, ExternalMessage, ProcessProposal, Proposal, RequestBlock, ResponseBlock},
+    message::{Block, ExternalMessage, InjectedProposal, Proposal, RequestBlock, ResponseBlock},
     node::MessageSender,
 };
 
@@ -34,8 +34,6 @@ enum DownGrade {
 /// we've seen (whether its part of our chain or not).
 
 // TODO: What if we receive a fork
-// TODO: How to start syncing validators
-// TODO: How to handle restarting new blocks, while injected blocks are still in-queue.
 
 #[derive(Debug)]
 pub struct BlockStore {
@@ -75,7 +73,6 @@ impl BlockStore {
             })
             .collect();
         let peer_id = message_sender.our_peer_id;
-        let max_blocks = config.max_blocks_in_flight.max(31) as usize; // between 30 seconds and 3 days of blocks.
 
         Ok(Self {
             db,
@@ -83,16 +80,19 @@ impl BlockStore {
             peers,
             peer_id,
             request_timeout: config.consensus.consensus_timeout,
-            max_blocks_in_flight: max_blocks,
-            max_blocks_injected: max_blocks * 10, // fire 10 speculative requests
+            max_blocks_in_flight: config.block_request_batch_size.max(31), // between 30 seconds and 3 days of blocks.
+            max_blocks_injected: config.max_blocks_in_flight.min(3600), // cap to 1-hr worth of blocks
             in_flight: None,
             injected: 0,
         })
     }
 
-    /// Handle an injected proposal
-    ///
-    pub fn handle_injected_proposal(&mut self, proposal: Proposal) -> Result<()> {
+    /// Match a received proposal
+    pub fn mark_received_proposal(&mut self, prop: &InjectedProposal) -> Result<()> {
+        if prop.from != self.peer_id {
+            tracing::warn!("Received a foreign InjectedProposal from {}", prop.from);
+        }
+        self.injected = self.injected.saturating_sub(1);
         Ok(())
     }
 
@@ -238,8 +238,8 @@ impl BlockStore {
             .sorted_by_key(|p| p.number())
             .collect_vec();
 
-        // Increment propoals injected
-        self.injected += proposals.len();
+        // Increment proposals injected
+        self.injected = self.injected.saturating_add(proposals.len());
 
         // Just pump the Proposals back to ourselves.
         for p in proposals {
@@ -251,8 +251,8 @@ impl BlockStore {
 
             self.message_sender.send_external_message(
                 self.peer_id,
-                ExternalMessage::ProcessProposal(ProcessProposal {
-                    from: self.peer_id.to_bytes(), // FIXME: change this to PeerId instead of Vec<u8>
+                ExternalMessage::InjectedProposal(InjectedProposal {
+                    from: self.peer_id,
                     block: p,
                 }),
             )?;
@@ -297,8 +297,10 @@ impl BlockStore {
 
         // TODO: Any additional checks we should do here?
 
-        // Inject received proposals
+        // Last known proposal
         let next_hash = response.proposals.last().unwrap().hash();
+
+        // Inject received proposals
         self.inject_proposals(response.proposals)?;
 
         // Speculatively request more blocks, as there might be more
@@ -378,6 +380,9 @@ impl BlockStore {
                 return Ok(());
             }
         } else {
+            if self.injected > 0 {
+                return Ok(());
+            }
             self.in_flight = self.get_next_peer();
             if self.in_flight.is_none() {
                 tracing::warn!("Insufficient peers available to request missing blocks");
diff --git a/zilliqa/src/cfg.rs b/zilliqa/src/cfg.rs
index c08c0bc88..cf137e4a4 100644
--- a/zilliqa/src/cfg.rs
+++ b/zilliqa/src/cfg.rs
@@ -104,7 +104,7 @@ pub struct NodeConfig {
     pub max_blocks_in_flight: usize,
     /// The maximum number of blocks to request in a single message when syncing.
     #[serde(default = "block_request_batch_size_default")]
-    pub block_request_batch_size: u64,
+    pub block_request_batch_size: usize,
     /// The maximum number of key value pairs allowed to be returned withing the response of the `GetSmartContractState` RPC. Defaults to no limit.
     #[serde(default = "state_rpc_limit_default")]
     pub state_rpc_limit: usize,
@@ -208,7 +208,7 @@ pub fn max_blocks_in_flight_default() -> usize {
     1000
 }
 
-pub fn block_request_batch_size_default() -> u64 {
+pub fn block_request_batch_size_default() -> usize {
     100
 }
 
diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index e51fbdc7b..c2724b3cc 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -238,6 +238,14 @@ pub struct ResponseBlock {
     pub proposals: Vec<Proposal>,
 }
 
+/// Used to convey proposal processing internally, to avoid blocking threads for too long.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct InjectedProposal {
+    // An encoded PeerId
+    pub from: PeerId,
+    pub block: Proposal,
+}
+
 /// Used to convey proposal processing internally, to avoid blocking threads for too long.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ProcessProposal {
@@ -276,6 +284,7 @@ pub enum ExternalMessage {
     RequestFromHash(RequestBlock),
     ResponseFromHeight(ResponseBlock),
     ResponseFromHash(ResponseBlock),
+    InjectedProposal(InjectedProposal),
 }
 
 impl ExternalMessage {
@@ -291,6 +300,9 @@ impl ExternalMessage {
 impl Display for ExternalMessage {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
         match self {
+            ExternalMessage::InjectedProposal(p) => {
+                write!(f, "InjectedProposal {}", p.block.number())
+            }
             ExternalMessage::AddPeer => write!(f, "AddPeer"),
             ExternalMessage::RemovePeer => write!(f, "RemovePeer"),
             ExternalMessage::ResponseFromHeight(r) => {
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 2c0c730fa..dfe830354 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -33,8 +33,8 @@ use crate::{
     exec::{PendingState, TransactionApplyResult},
     inspector::{self, ScillaInspector},
     message::{
-        Block, BlockHeader, BlockResponse, ExternalMessage, InternalMessage, IntershardCall,
-        ProcessProposal, Proposal,
+        Block, BlockHeader, BlockResponse, ExternalMessage, InjectedProposal, InternalMessage,
+        IntershardCall, Proposal,
     },
     node_launcher::ResponseChannel,
     p2p_node::{LocalMessageTuple, OutboundMessageTuple},
@@ -373,8 +373,8 @@ impl Node {
             }
             // This just breaks down group block messages into individual messages to stop them blocking threads
             // for long periods.
-            ExternalMessage::ProcessProposal(m) => {
-                self.handle_process_proposal(from, m)?;
+            ExternalMessage::InjectedProposal(p) => {
+                self.handle_injected_proposal(from, p)?;
             }
             // Handle requests which contain a block proposal. Initially sent as a broadcast, it is re-routed into
             // a Request by the underlying layer, with a faux request-id. This is to mitigate issues when there are
@@ -1001,12 +1001,13 @@ impl Node {
         Ok(())
     }
 
-    fn handle_process_proposal(&mut self, from: PeerId, req: ProcessProposal) -> Result<()> {
+    fn handle_injected_proposal(&mut self, from: PeerId, req: InjectedProposal) -> Result<()> {
         if from != self.consensus.peer_id() {
-            warn!("Someone ({from}) sent me a ProcessProposal; illegal- ignoring");
+            warn!("Someone ({from}) sent me a InjectedProposal; illegal- ignoring");
             return Ok(());
         }
         trace!("Handling proposal for view {0}", req.block.header.view);
+        self.consensus.blockstore.mark_received_proposal(&req)?;
         let proposal = self.consensus.receive_block(from, req.block)?;
         if let Some(proposal) = proposal {
             trace!(

From 45c202e9fcb66378f9a0a74467045d3ad949e7d1 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 30 Dec 2024 10:35:38 +0800
Subject: [PATCH 020/119] chore: minor cleanups.

---
 zilliqa/src/blockstore.rs |  1 -
 zilliqa/src/lib.rs        |  2 +-
 zilliqa/src/node.rs       | 24 ------------------------
 zilliqa/src/p2p_node.rs   |  2 +-
 4 files changed, 2 insertions(+), 27 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 6000a8d34..d555651ea 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -6,7 +6,6 @@ use std::{
 };
 
 use anyhow::Result;
-
 use itertools::Itertools;
 use libp2p::PeerId;
 
diff --git a/zilliqa/src/lib.rs b/zilliqa/src/lib.rs
index 642e82df2..28445f822 100644
--- a/zilliqa/src/lib.rs
+++ b/zilliqa/src/lib.rs
@@ -1,6 +1,7 @@
 pub mod api;
 pub mod block_store;
 mod blockhooks;
+pub mod blockstore;
 pub mod cfg;
 pub mod consensus;
 pub mod constants;
@@ -27,4 +28,3 @@ pub mod test_util;
 pub mod time;
 pub mod transaction;
 pub mod zq1_proto;
-pub mod blockstore;
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index dfe830354..ce792ca5a 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -287,30 +287,6 @@ impl Node {
                     .handle_request_from_hash(from, request)?;
                 self.request_responses.send((response_channel, message))?;
             }
-            ExternalMessage::ResponseFromHash(response) => {
-                // Check that we have enough to complete the process, otherwise ignore
-                if response.proposals.is_empty() {
-                    // Empty response, downgrade peer
-                    warn!("block_store::ResponseFromHeight : empty blocks in flight {from}",);
-                }
-                // Check that we have enough to complete the process, otherwise ignore
-                if response.proposals.len() * 2 < self.config.max_blocks_in_flight as usize {
-                    warn!("block_store::ResponseFromHash : insufficient blocks in flight {from}",);
-                    return Ok(());
-                }
-
-                // TODO: Inject proposals
-                debug!(
-                    "block_store::ResponseFromHash : injecting proposals {:?}",
-                    response
-                );
-
-                // Acknowledge this block response. This does nothing because the `BlockResponse` request was sent by
-                // us, but we keep it here for symmetry with the other handlers.
-                self.request_responses
-                    .send((response_channel, ExternalMessage::Acknowledgement))?;
-            }
-
             // Respond negatively to old BlockRequests.
             ExternalMessage::BlockRequest(request) => {
                 self.request_responses.send((
diff --git a/zilliqa/src/p2p_node.rs b/zilliqa/src/p2p_node.rs
index e353d6cf5..a2632ad34 100644
--- a/zilliqa/src/p2p_node.rs
+++ b/zilliqa/src/p2p_node.rs
@@ -271,7 +271,7 @@ impl P2pNode {
                         SwarmEvent::Behaviour(BehaviourEvent::Gossipsub(gossipsub::Event::Unsubscribed { peer_id, topic })) => {
                             let message = ExternalMessage::RemovePeer;
                             self.send_to(&topic, |c| c.broadcasts.send((peer_id, message)))?;
-                        }                        
+                        }
                         SwarmEvent::Behaviour(BehaviourEvent::Gossipsub(gossipsub::Event::Message{
                             message_id: msg_id,
                             message: gossipsub::Message {

From 3f379b77439dc55c5f63ce270f479aafb8341f67 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 30 Dec 2024 11:12:43 +0800
Subject: [PATCH 021/119] feat: avoid single source of truth.

---
 zilliqa/src/blockstore.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index d555651ea..8140b1618 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -262,9 +262,11 @@ impl BlockStore {
 
     /// Downgrade a peer based on the response received.
     fn done_with_peer(&mut self, downgrade: DownGrade) {
-        // ...
         if let Some(mut peer) = self.in_flight.take() {
-            peer.score += downgrade as u32;
+            // Downgrade peer, if necessary
+            peer.score = peer.score.saturating_add(downgrade as u32);
+            // Ensure that the next peer is equal or better, to avoid a single source of truth.
+            peer.score = peer.score.max(self.peers.peek().unwrap().score);
             self.peers.push(peer);
         }
     }

From 7a522d96d56226b1b41a884499baaf9defe4c3fa Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 30 Dec 2024 14:15:14 +0800
Subject: [PATCH 022/119] fix: insufficient peers in GCP.

---
 zilliqa/src/p2p_node.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/zilliqa/src/p2p_node.rs b/zilliqa/src/p2p_node.rs
index a2632ad34..6fd941b23 100644
--- a/zilliqa/src/p2p_node.rs
+++ b/zilliqa/src/p2p_node.rs
@@ -133,7 +133,8 @@ impl P2pNode {
                     // So, the nodes are unable to see each other directly and remain isolated, defeating kademlia and autonat.
                     identify: identify::Behaviour::new(
                         identify::Config::new("zilliqa/1.0.0".into(), key_pair.public())
-                            .with_hide_listen_addrs(!cfg!(debug_assertions)),
+                            .with_hide_listen_addrs(false)
+                            .with_push_listen_addr_updates(true),
                     ),
                 })
             })?

From 85a797532ad5e2229858f8a6dffa71cd063acd78 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 30 Dec 2024 15:46:34 +0800
Subject: [PATCH 023/119] feat: only inject blocks sourced from two peers -
 impossible to sync, atm.

---
 zilliqa/src/blockstore.rs | 168 +++++++++++++++++++++++---------------
 zilliqa/src/message.rs    |  13 +--
 zilliqa/src/node.rs       |   8 +-
 3 files changed, 114 insertions(+), 75 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 8140b1618..466d362c7 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -1,6 +1,6 @@
 use std::{
     cmp::Ordering,
-    collections::BinaryHeap,
+    collections::{BinaryHeap, HashMap},
     sync::Arc,
     time::{Duration, Instant},
 };
@@ -25,14 +25,17 @@ enum DownGrade {
 
 /// Syncing Algorithm
 ///
-/// We rely on [crate::consensus::Consensus] informing us of newly received block proposals via:
-/// * [BlockStore::process_block] for blocks that can be part of our chain, because we already have their parent.
-/// * [BlockStore::buffer_proposal] for blocks that can't (yet) be part of our chain.
+/// When a Proposal is received by Consensus, we check if the parent exists in our DB.
+/// If not, then it triggers a syncing algorithm.
+///
+/// 1. We check if the gap between our last canonical block and the latest Proposal.
+///     a. If it is a small gap, we request for history, going backwards from Proposal.
+///     b. If it is a big gap, we request for history, going forwards from Canonical.
+/// 2. When we receive a response, we inject the Proposals into our processing pipeline.
 ///
-/// Both these code paths also call [BlockStore::request_missing_blocks]. This finds the greatest view of any proposal
-/// we've seen (whether its part of our chain or not).
 
 // TODO: What if we receive a fork
+// TODO: How to handle adverserial history
 
 #[derive(Debug)]
 pub struct BlockStore {
@@ -54,6 +57,9 @@ pub struct BlockStore {
     peer_id: PeerId,
     // how many injected proposals
     injected: usize,
+    // cache
+    cache: HashMap<u64, (PeerId, Proposal)>,
+    latest_block: Option<Block>,
 }
 
 impl BlockStore {
@@ -83,13 +89,24 @@ impl BlockStore {
             max_blocks_injected: config.max_blocks_in_flight.min(3600), // cap to 1-hr worth of blocks
             in_flight: None,
             injected: 0,
+            cache: HashMap::new(),
+            latest_block: None,
         })
     }
 
     /// Match a received proposal
     pub fn mark_received_proposal(&mut self, prop: &InjectedProposal) -> Result<()> {
         if prop.from != self.peer_id {
-            tracing::warn!("Received a foreign InjectedProposal from {}", prop.from);
+            tracing::error!(
+                "blockstore::MarkReceivedProposal : foreign InjectedProposal from {}",
+                prop.from
+            );
+        }
+        if let Some((_, p)) = self.cache.remove(&prop.block.number()) {
+            tracing::warn!(
+                "blockstore::MarkReceivedProposal : removing stale cache proposal {}",
+                p.number()
+            );
         }
         self.injected = self.injected.saturating_sub(1);
         Ok(())
@@ -177,28 +194,28 @@ impl BlockStore {
     }
 
     /// Request for blocks from a height, forwards.
-    pub fn handle_request_from_height(
+    pub fn handle_request_from_number(
         &mut self,
         from: PeerId,
         request: RequestBlock,
     ) -> Result<ExternalMessage> {
         // ...
         tracing::debug!(
-            "blockstore::RequestFromHeight : received a block request from {}",
+            "blockstore::RequestFromNumber : received a block request from {}",
             from
         );
 
         // TODO: Check if we should service this request.
         // Validators shall not respond to this request.
 
-        let Some(alpha) = self.db.get_block_by_hash(&request.from_hash)? else {
+        let Some(alpha) = self.db.get_canonical_block_by_number(request.from_number)? else {
             // We do not have the starting block
             tracing::warn!(
-                "blockstore::RequestFromHeight : missing starting block {}",
-                request.from_hash
+                "blockstore::RequestFromNumber : missing starting block {}",
+                request.from_number
             );
             let message: ExternalMessage =
-                ExternalMessage::ResponseFromHeight(ResponseBlock { proposals: vec![] });
+                ExternalMessage::ResponseFromNumber(ResponseBlock { proposals: vec![] });
             return Ok(message);
         };
 
@@ -213,15 +230,19 @@ impl BlockStore {
             proposals.push(self.block_to_proposal(block));
         }
 
-        let message = ExternalMessage::ResponseFromHeight(ResponseBlock { proposals });
+        let message = ExternalMessage::ResponseFromNumber(ResponseBlock { proposals });
         tracing::trace!(
             ?message,
-            "blockstore::RequestFromHeight : responding to block request from height"
+            "blockstore::RequestFromNumber : responding to block request from height"
         );
         Ok(message)
     }
 
-    /// Pump the proposals into the chain.
+    /// Inject the proposals into the chain.
+    ///
+    /// Besides pumping the set of Proposals into the processing pipeline, it also records the
+    /// last known Proposal in the pipeline. This is used for speculative fetches, and also for
+    /// knowing where to continue fetching from.
     fn inject_proposals(&mut self, proposals: Vec<Proposal>) -> Result<()> {
         tracing::info!(
             "blockstore::InjectProposals : injecting {} proposals",
@@ -231,11 +252,10 @@ impl BlockStore {
         if proposals.is_empty() {
             return Ok(());
         }
-        // Sort proposals by number
-        let proposals = proposals
-            .into_iter()
-            .sorted_by_key(|p| p.number())
-            .collect_vec();
+
+        // Store the tip
+        let (last_block, _) = proposals.last().unwrap().clone().into_parts();
+        self.latest_block = Some(last_block);
 
         // Increment proposals injected
         self.injected = self.injected.saturating_add(proposals.len());
@@ -271,7 +291,7 @@ impl BlockStore {
         }
     }
 
-    pub fn handle_response_from_height(
+    pub fn handle_response_from_number(
         &mut self,
         from: PeerId,
         response: ResponseBlock,
@@ -279,50 +299,54 @@ impl BlockStore {
         // Process whatever we have received.
         if response.proposals.is_empty() {
             // Empty response, downgrade peer
-            tracing::warn!("blockstore::ResponseFromHeight : empty blocks {from}",);
+            tracing::warn!("blockstore::ResponseFromNumber : empty blocks {from}",);
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
         } else if response.proposals.len() < self.max_blocks_in_flight {
             // Partial response, downgrade peer
-            tracing::warn!("blockstore::ResponseFromHeight : partial blocks {from}",);
+            tracing::warn!("blockstore::ResponseFromNumber : partial blocks {from}",);
             self.done_with_peer(DownGrade::Partial);
         } else {
             self.done_with_peer(DownGrade::None);
         }
 
         tracing::info!(
-            "blockstore::ResponseFromHeight : received {} blocks from {}",
+            "blockstore::ResponseFromNumber : received {} blocks from {}",
             response.proposals.len(),
             from
         );
 
         // TODO: Any additional checks we should do here?
 
-        // Last known proposal
-        let next_hash = response.proposals.last().unwrap().hash();
-
-        // Inject received proposals
-        self.inject_proposals(response.proposals)?;
-
-        // Speculatively request more blocks, as there might be more
-        if self.injected < self.max_blocks_injected {
-            self.in_flight = self.get_next_peer();
-            if let Some(peer) = self.in_flight.as_ref() {
-                let message = ExternalMessage::RequestFromHeight(RequestBlock {
-                    batch_size: self.max_blocks_in_flight,
-                    from_hash: next_hash,
-                });
-
-                tracing::info!(
-                    "Requesting {} future blocks from {}",
-                    self.max_blocks_in_flight,
-                    peer.peer_id,
-                );
+        // Sort proposals by number
+        let proposals = response
+            .proposals
+            .into_iter()
+            .sorted_by_key(|p| p.number())
+            .collect_vec();
 
-                self.message_sender
-                    .send_external_message(peer.peer_id, message)?;
+        // Insert into the cache.
+        // If current proposal matches another one in cache, from a different peer, inject the proposal.
+        // Else, replace the cached values with the new ones.
+        let mut injections = Vec::new();
+        for p in proposals {
+            // If the proposal already exists
+            if let Some((peer, proposal)) = self.cache.remove(&p.number()) {
+                if peer != from && proposal.hash() == p.hash() {
+                    injections.push(proposal);
+                } else {
+                    // insert the new one and;
+                    self.cache.insert(p.number(), (from, p));
+                    break; // TODO: Replace the rest
+                }
+            } else {
+                self.cache.insert(p.number(), (from, p));
             }
         }
+
+        // Inject matched proposals
+        self.inject_proposals(injections)?;
+
         Ok(())
     }
 
@@ -391,13 +415,18 @@ impl BlockStore {
             }
         }
 
-        // highest canonical block we have
-        // TODO: Replace this with a single SQL query.
-        let height = self
-            .db
-            .get_highest_canonical_block_number()?
-            .unwrap_or_default();
-        let alpha_block = self.db.get_canonical_block_by_number(height)?.unwrap();
+        // highest canonical block we know
+        let alpha_block = if self.latest_block.is_some() {
+            self.latest_block.as_ref().unwrap().clone()
+        } else {
+            // TODO: Replace this with a single SQL query.
+            let height = self
+                .db
+                .get_highest_canonical_block_number()?
+                .unwrap_or_default();
+            let alpha_block = self.db.get_canonical_block_by_number(height)?.unwrap();
+            alpha_block
+        };
 
         // Compute the block gap.
         let block_gap = if let Some(omega_block) = omega_block.as_ref() {
@@ -406,23 +435,32 @@ impl BlockStore {
                 .number
                 .saturating_sub(alpha_block.header.number)
         } else {
-            // Trigger a RequestFromHeight if the source block is None
+            // Trigger a RequestFromNumber if the source block is None
             self.max_blocks_in_flight as u64
         };
 
         // TODO: Double-check hysteresis logic - may not even be necessary to do RequestFromHash
-        let message = if block_gap > self.max_blocks_in_flight as u64 / 2 {
+        let (message, hash) = if block_gap > self.max_blocks_in_flight as u64 / 2 {
             // we're far from latest block
-            ExternalMessage::RequestFromHeight(RequestBlock {
-                from_hash: alpha_block.header.hash,
-                batch_size: self.max_blocks_in_flight,
-            })
+            (
+                ExternalMessage::RequestFromNumber(RequestBlock {
+                    from_number: alpha_block.number(),
+                    from_hash: alpha_block.hash(),
+                    batch_size: self.max_blocks_in_flight,
+                }),
+                alpha_block.hash(),
+            )
         } else {
             // we're close to latest block
-            ExternalMessage::RequestFromHash(RequestBlock {
-                from_hash: omega_block.unwrap().header.hash,
-                batch_size: self.max_blocks_in_flight,
-            })
+            let omega_block = omega_block.unwrap();
+            (
+                ExternalMessage::RequestFromHash(RequestBlock {
+                    from_hash: omega_block.hash(),
+                    from_number: omega_block.number(),
+                    batch_size: self.max_blocks_in_flight,
+                }),
+                omega_block.hash(),
+            )
         };
 
         let peer = self.in_flight.as_ref().unwrap();
@@ -430,7 +468,7 @@ impl BlockStore {
         tracing::info!(
             "Requesting {} missing blocks from {}",
             self.max_blocks_in_flight,
-            peer.peer_id,
+            hash,
         );
 
         self.message_sender
diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index c2724b3cc..6672f51ac 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -229,6 +229,7 @@ impl fmt::Debug for BlockResponse {
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct RequestBlock {
+    pub from_number: u64,
     pub from_hash: Hash,
     pub batch_size: usize,
 }
@@ -280,9 +281,9 @@ pub enum ExternalMessage {
     Acknowledgement,
     AddPeer,
     RemovePeer,
-    RequestFromHeight(RequestBlock),
+    RequestFromNumber(RequestBlock),
     RequestFromHash(RequestBlock),
-    ResponseFromHeight(ResponseBlock),
+    ResponseFromNumber(ResponseBlock),
     ResponseFromHash(ResponseBlock),
     InjectedProposal(InjectedProposal),
 }
@@ -305,16 +306,16 @@ impl Display for ExternalMessage {
             }
             ExternalMessage::AddPeer => write!(f, "AddPeer"),
             ExternalMessage::RemovePeer => write!(f, "RemovePeer"),
-            ExternalMessage::ResponseFromHeight(r) => {
-                write!(f, "ResponseFromHeight({})", r.proposals.len())
+            ExternalMessage::ResponseFromNumber(r) => {
+                write!(f, "ResponseFromNumber({})", r.proposals.len())
             }
             ExternalMessage::ResponseFromHash(r) => {
                 write!(f, "ResponseFromHash({})", r.proposals.len())
             }
-            ExternalMessage::RequestFromHeight(r) => {
+            ExternalMessage::RequestFromNumber(r) => {
                 write!(
                     f,
-                    "RequestFromHeight({}, num={})",
+                    "RequestFromNumber({}, num={})",
                     r.from_hash, r.batch_size
                 )
             }
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index ce792ca5a..563312e0a 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -273,11 +273,11 @@ impl Node {
                 self.request_responses
                     .send((response_channel, ExternalMessage::Acknowledgement))?;
             }
-            ExternalMessage::RequestFromHeight(request) => {
+            ExternalMessage::RequestFromNumber(request) => {
                 let message = self
                     .consensus
                     .blockstore
-                    .handle_request_from_height(from, request)?;
+                    .handle_request_from_number(from, request)?;
                 self.request_responses.send((response_channel, message))?;
             }
             ExternalMessage::RequestFromHash(request) => {
@@ -383,10 +383,10 @@ impl Node {
     pub fn handle_response(&mut self, from: PeerId, message: ExternalMessage) -> Result<()> {
         debug!(%from, to = %self.peer_id, %message, "handling response");
         match message {
-            ExternalMessage::ResponseFromHeight(response) => {
+            ExternalMessage::ResponseFromNumber(response) => {
                 self.consensus
                     .blockstore
-                    .handle_response_from_height(from, response)?;
+                    .handle_response_from_number(from, response)?;
             }
             ExternalMessage::ResponseFromHash(response) => {
                 self.consensus

From 486056df022f9742d7bbcb1f7d175e8c202a4c7b Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 30 Dec 2024 16:29:38 +0800
Subject: [PATCH 024/119] feat: sort-of working sync with multiple sources of
 truth.

---
 zilliqa/src/blockstore.rs | 78 +++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 466d362c7..37dc3d2c3 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -1,6 +1,7 @@
 use std::{
     cmp::Ordering,
     collections::{BinaryHeap, HashMap},
+    ops::Sub,
     sync::Arc,
     time::{Duration, Instant},
 };
@@ -37,6 +38,8 @@ enum DownGrade {
 // TODO: What if we receive a fork
 // TODO: How to handle adverserial history
 
+const GAP_THRESHOLD: usize = 5; // How big is big/small gap.
+
 #[derive(Debug)]
 pub struct BlockStore {
     // database
@@ -162,19 +165,9 @@ impl BlockStore {
         // TODO: Check if we should service this request
         // Validators could respond to this request if there is nothing else to do.
 
-        let Some(omega_block) = self.db.get_block_by_hash(&request.from_hash)? else {
-            // We do not have the starting block
-            tracing::warn!(
-                "blockstore::RequestFromHash : missing starting block {}",
-                request.from_hash
-            );
-            let message = ExternalMessage::ResponseFromHash(ResponseBlock { proposals: vec![] });
-            return Ok(message);
-        };
-
         let batch_size = self.max_blocks_in_flight.min(request.batch_size); // mitigate DOS attacks by limiting the number of blocks we send
         let mut proposals: Vec<Proposal> = Vec::new();
-        let mut hash = omega_block.parent_hash();
+        let mut hash = request.from_hash;
         while proposals.len() < batch_size {
             // grab the parent
             let Some(block) = self.db.get_block_by_hash(&hash)? else {
@@ -361,7 +354,7 @@ impl BlockStore {
             tracing::warn!("blockstore::ResponseFromHash : empty blocks {from}",);
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
-        } else if response.proposals.len() <= self.max_blocks_in_flight / 2 {
+        } else if response.proposals.len() < GAP_THRESHOLD {
             // Partial response, downgrade peer
             // Skip processing because we want to ensure that we have ALL the needed blocks to sync up.
             tracing::warn!("blockstore::ResponseFromHash : partial blocks {from}",);
@@ -379,9 +372,15 @@ impl BlockStore {
         );
 
         // TODO: Any additional checks we should do here?
+        // Sort proposals by number
+        let proposals = response
+            .proposals
+            .into_iter()
+            .sorted_by_key(|p| p.number())
+            .collect_vec();
 
         // Inject the proposals
-        self.inject_proposals(response.proposals)?;
+        self.inject_proposals(proposals)?;
         Ok(())
     }
 
@@ -439,38 +438,39 @@ impl BlockStore {
             self.max_blocks_in_flight as u64
         };
 
-        // TODO: Double-check hysteresis logic - may not even be necessary to do RequestFromHash
-        let (message, hash) = if block_gap > self.max_blocks_in_flight as u64 / 2 {
+        let peer = self.in_flight.as_ref().unwrap();
+
+        let message = if block_gap > self.max_blocks_in_flight.sub(GAP_THRESHOLD) as u64 {
             // we're far from latest block
-            (
-                ExternalMessage::RequestFromNumber(RequestBlock {
-                    from_number: alpha_block.number(),
-                    from_hash: alpha_block.hash(),
-                    batch_size: self.max_blocks_in_flight,
-                }),
-                alpha_block.hash(),
-            )
+            let message = RequestBlock {
+                from_number: alpha_block.number(),
+                from_hash: alpha_block.hash(),
+                batch_size: self.max_blocks_in_flight,
+            };
+            tracing::info!(
+                "blockstore::RequestMissingBlocks : requesting {} blocks at {} from {}",
+                message.batch_size,
+                message.from_number,
+                peer.peer_id,
+            );
+            ExternalMessage::RequestFromNumber(message)
         } else {
             // we're close to latest block
             let omega_block = omega_block.unwrap();
-            (
-                ExternalMessage::RequestFromHash(RequestBlock {
-                    from_hash: omega_block.hash(),
-                    from_number: omega_block.number(),
-                    batch_size: self.max_blocks_in_flight,
-                }),
-                omega_block.hash(),
-            )
+            let message = RequestBlock {
+                from_hash: omega_block.hash(),
+                from_number: omega_block.number(),
+                batch_size: GAP_THRESHOLD * 2,
+            };
+            tracing::info!(
+                "blockstore::RequestMissingBlocks : requesting {} blocks at {} from {}",
+                message.batch_size,
+                message.from_hash,
+                peer.peer_id,
+            );
+            ExternalMessage::RequestFromHash(message)
         };
 
-        let peer = self.in_flight.as_ref().unwrap();
-
-        tracing::info!(
-            "Requesting {} missing blocks from {}",
-            self.max_blocks_in_flight,
-            hash,
-        );
-
         self.message_sender
             .send_external_message(peer.peer_id, message)?;
         Ok(())

From de60034f29f22901d0c7b3ec56638520cbd7dc2a Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 30 Dec 2024 17:05:02 +0800
Subject: [PATCH 025/119] feat: pre-allocate enough capacity; corrected
 block_gap check.

---
 zilliqa/src/blockstore.rs | 43 ++++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 25 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 37dc3d2c3..f97f66e4a 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -1,7 +1,6 @@
 use std::{
     cmp::Ordering,
     collections::{BinaryHeap, HashMap},
-    ops::Sub,
     sync::Arc,
     time::{Duration, Instant},
 };
@@ -30,9 +29,15 @@ enum DownGrade {
 /// If not, then it triggers a syncing algorithm.
 ///
 /// 1. We check if the gap between our last canonical block and the latest Proposal.
-///     a. If it is a small gap, we request for history, going backwards from Proposal.
-///     b. If it is a big gap, we request for history, going forwards from Canonical.
-/// 2. When we receive a response, we inject the Proposals into our processing pipeline.
+///     a. If it is a small gap, we request for blocks, going backwards from Proposal.
+///     b. If it is a big gap, we request for blocks, going forwards from Canonical.
+/// 2. When we receive a forwards history response, we check for matches against the cache.
+///    This means that for a proposal to be injected, it must be corroborated by 2 sources.
+///     a. If it matches the cached Proposal, we inject the proposal into the pipeline.
+///     b. If it does not exist in the cache, we cache the new value.
+///     c. If it does not match the cached Proposal, something is up and we stop there and request for more.
+/// 3. When we receive a backwards history response, we inject it into the pipeline.
+///     a. If it does not link up with the existing Canonical, then it will be dropped.
 ///
 
 // TODO: What if we receive a fork
@@ -165,8 +170,8 @@ impl BlockStore {
         // TODO: Check if we should service this request
         // Validators could respond to this request if there is nothing else to do.
 
-        let batch_size = self.max_blocks_in_flight.min(request.batch_size); // mitigate DOS attacks by limiting the number of blocks we send
-        let mut proposals: Vec<Proposal> = Vec::new();
+        let batch_size = self.max_blocks_in_flight.min(request.batch_size); // mitigate DOS by limiting the number of blocks we return
+        let mut proposals = Vec::with_capacity(batch_size);
         let mut hash = request.from_hash;
         while proposals.len() < batch_size {
             // grab the parent
@@ -201,21 +206,12 @@ impl BlockStore {
         // TODO: Check if we should service this request.
         // Validators shall not respond to this request.
 
-        let Some(alpha) = self.db.get_canonical_block_by_number(request.from_number)? else {
-            // We do not have the starting block
-            tracing::warn!(
-                "blockstore::RequestFromNumber : missing starting block {}",
-                request.from_number
-            );
-            let message: ExternalMessage =
-                ExternalMessage::ResponseFromNumber(ResponseBlock { proposals: vec![] });
-            return Ok(message);
-        };
-
         // TODO: Replace this with a single SQL query
-        let batch_size = self.max_blocks_in_flight.min(request.batch_size) as u64; // mitigate DOS attacks by limiting the number of blocks we send
-        let mut proposals = Vec::new();
-        for num in alpha.number().saturating_add(1)..=alpha.number().saturating_add(batch_size) {
+        let batch_size = self.max_blocks_in_flight.min(request.batch_size); // mitigate DOS attacks by limiting the number of blocks we send
+        let mut proposals = Vec::with_capacity(batch_size);
+        for num in request.from_number.saturating_add(1)
+            ..=request.from_number.saturating_add(batch_size as u64)
+        {
             let Some(block) = self.db.get_canonical_block_by_number(num)? else {
                 // that's all we have!
                 break;
@@ -321,7 +317,7 @@ impl BlockStore {
         // Insert into the cache.
         // If current proposal matches another one in cache, from a different peer, inject the proposal.
         // Else, replace the cached values with the new ones.
-        let mut injections = Vec::new();
+        let mut injections = Vec::with_capacity(proposals.len());
         for p in proposals {
             // If the proposal already exists
             if let Some((peer, proposal)) = self.cache.remove(&p.number()) {
@@ -404,9 +400,6 @@ impl BlockStore {
                 return Ok(());
             }
         } else {
-            if self.injected > 0 {
-                return Ok(());
-            }
             self.in_flight = self.get_next_peer();
             if self.in_flight.is_none() {
                 tracing::warn!("Insufficient peers available to request missing blocks");
@@ -440,7 +433,7 @@ impl BlockStore {
 
         let peer = self.in_flight.as_ref().unwrap();
 
-        let message = if block_gap > self.max_blocks_in_flight.sub(GAP_THRESHOLD) as u64 {
+        let message = if block_gap > GAP_THRESHOLD as u64 {
             // we're far from latest block
             let message = RequestBlock {
                 from_number: alpha_block.number(),

From 247c504317b4abe7b554b0997668b092f0fc7da9 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 30 Dec 2024 17:31:15 +0800
Subject: [PATCH 026/119] feat: replace non-corroborated blocks in cache.

---
 zilliqa/src/blockstore.rs | 56 ++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index f97f66e4a..977498bee 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -33,15 +33,14 @@ enum DownGrade {
 ///     b. If it is a big gap, we request for blocks, going forwards from Canonical.
 /// 2. When we receive a forwards history response, we check for matches against the cache.
 ///    This means that for a proposal to be injected, it must be corroborated by 2 sources.
-///     a. If it matches the cached Proposal, we inject the proposal into the pipeline.
-///     b. If it does not exist in the cache, we cache the new value.
-///     c. If it does not match the cached Proposal, something is up and we stop there and request for more.
+///     a. If it matches the cached value, we inject the proposal into the pipeline.
+///     b. If it does not match, we replace the cached value and request for more.
+///     b. If it does not exist in the cache, we cache the proposal.
 /// 3. When we receive a backwards history response, we inject it into the pipeline.
-///     a. If it does not link up with the existing Canonical, then it will be dropped.
+///     a. If it does not line up with the existing Canonical, then it will be dropped.
 ///
 
-// TODO: What if we receive a fork
-// TODO: How to handle adverserial history
+// TODO: Speculative fetch, to speed things up.
 
 const GAP_THRESHOLD: usize = 5; // How big is big/small gap.
 
@@ -58,9 +57,9 @@ pub struct BlockStore {
     // in-flight timeout
     request_timeout: Duration,
     // how many blocks to request at once
-    max_blocks_in_flight: usize,
+    max_batch_size: usize,
     // how many blocks to inject into the queue
-    max_blocks_injected: usize,
+    max_blocks_in_flight: usize,
     // our peer id
     peer_id: PeerId,
     // how many injected proposals
@@ -93,8 +92,8 @@ impl BlockStore {
             peers,
             peer_id,
             request_timeout: config.consensus.consensus_timeout,
-            max_blocks_in_flight: config.block_request_batch_size.max(31), // between 30 seconds and 3 days of blocks.
-            max_blocks_injected: config.max_blocks_in_flight.min(3600), // cap to 1-hr worth of blocks
+            max_batch_size: config.block_request_batch_size.max(31), // between 30 seconds and 3 days of blocks.
+            max_blocks_in_flight: config.max_blocks_in_flight.min(3600), // cap to 1-hr worth of blocks
             in_flight: None,
             injected: 0,
             cache: HashMap::new(),
@@ -170,7 +169,7 @@ impl BlockStore {
         // TODO: Check if we should service this request
         // Validators could respond to this request if there is nothing else to do.
 
-        let batch_size = self.max_blocks_in_flight.min(request.batch_size); // mitigate DOS by limiting the number of blocks we return
+        let batch_size = self.max_batch_size.min(request.batch_size); // mitigate DOS by limiting the number of blocks we return
         let mut proposals = Vec::with_capacity(batch_size);
         let mut hash = request.from_hash;
         while proposals.len() < batch_size {
@@ -207,7 +206,7 @@ impl BlockStore {
         // Validators shall not respond to this request.
 
         // TODO: Replace this with a single SQL query
-        let batch_size = self.max_blocks_in_flight.min(request.batch_size); // mitigate DOS attacks by limiting the number of blocks we send
+        let batch_size = self.max_batch_size.min(request.batch_size); // mitigate DOS attacks by limiting the number of blocks we send
         let mut proposals = Vec::with_capacity(batch_size);
         for num in request.from_number.saturating_add(1)
             ..=request.from_number.saturating_add(batch_size as u64)
@@ -291,7 +290,7 @@ impl BlockStore {
             tracing::warn!("blockstore::ResponseFromNumber : empty blocks {from}",);
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
-        } else if response.proposals.len() < self.max_blocks_in_flight {
+        } else if response.proposals.len() < self.max_batch_size {
             // Partial response, downgrade peer
             tracing::warn!("blockstore::ResponseFromNumber : partial blocks {from}",);
             self.done_with_peer(DownGrade::Partial);
@@ -316,25 +315,34 @@ impl BlockStore {
 
         // Insert into the cache.
         // If current proposal matches another one in cache, from a different peer, inject the proposal.
-        // Else, replace the cached values with the new ones.
-        let mut injections = Vec::with_capacity(proposals.len());
-        for p in proposals {
-            // If the proposal already exists
+        // Else, replace the cached Proposal with the new one.
+        let mut corroborated_proposals = Vec::with_capacity(proposals.len());
+        let mut props = proposals.into_iter();
+
+        // Collect corroborated proposals
+        for p in props.by_ref() {
             if let Some((peer, proposal)) = self.cache.remove(&p.number()) {
+                // If the proposal already exists
                 if peer != from && proposal.hash() == p.hash() {
-                    injections.push(proposal);
+                    // is corroborated proposal
+                    corroborated_proposals.push(proposal);
                 } else {
-                    // insert the new one and;
+                    // insert the different one and;
                     self.cache.insert(p.number(), (from, p));
-                    break; // TODO: Replace the rest
+                    break; // replace the rest in the next loop
                 }
             } else {
                 self.cache.insert(p.number(), (from, p));
             }
         }
 
+        // Replace/insert the rest of the proposals in the cache
+        for p in props {
+            self.cache.insert(p.number(), (from, p));
+        }
+
         // Inject matched proposals
-        self.inject_proposals(injections)?;
+        self.inject_proposals(corroborated_proposals)?;
 
         Ok(())
     }
@@ -382,7 +390,7 @@ impl BlockStore {
 
     /// Request blocks between the current height and the given block.
     ///
-    /// The approach is to request blocks in batches of `max_blocks_in_flight` blocks.
+    /// The approach is to request blocks in batches of `max_batch_size` blocks.
     /// If None block is provided, we request blocks from the last known canonical block forwards.
     /// If the block gap is large, we request blocks from the last known canonical block forwards.
     /// If the block gap is small, we request blocks from the latest block backwards.
@@ -428,7 +436,7 @@ impl BlockStore {
                 .saturating_sub(alpha_block.header.number)
         } else {
             // Trigger a RequestFromNumber if the source block is None
-            self.max_blocks_in_flight as u64
+            self.max_batch_size as u64
         };
 
         let peer = self.in_flight.as_ref().unwrap();
@@ -438,7 +446,7 @@ impl BlockStore {
             let message = RequestBlock {
                 from_number: alpha_block.number(),
                 from_hash: alpha_block.hash(),
-                batch_size: self.max_blocks_in_flight,
+                batch_size: self.max_batch_size,
             };
             tracing::info!(
                 "blockstore::RequestMissingBlocks : requesting {} blocks at {} from {}",

From d87d09ed5734704b05099ee9b94674e91d370eaa Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 30 Dec 2024 17:59:21 +0800
Subject: [PATCH 027/119] chore: clippy. checkpoint. [corroborate proposals]

---
 zilliqa/src/blockstore.rs | 40 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 977498bee..d8a5eff13 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -23,23 +23,22 @@ enum DownGrade {
     Empty,
 }
 
-/// Syncing Algorithm
-///
-/// When a Proposal is received by Consensus, we check if the parent exists in our DB.
-/// If not, then it triggers a syncing algorithm.
-///
-/// 1. We check if the gap between our last canonical block and the latest Proposal.
-///     a. If it is a small gap, we request for blocks, going backwards from Proposal.
-///     b. If it is a big gap, we request for blocks, going forwards from Canonical.
-/// 2. When we receive a forwards history response, we check for matches against the cache.
-///    This means that for a proposal to be injected, it must be corroborated by 2 sources.
-///     a. If it matches the cached value, we inject the proposal into the pipeline.
-///     b. If it does not match, we replace the cached value and request for more.
-///     b. If it does not exist in the cache, we cache the proposal.
-/// 3. When we receive a backwards history response, we inject it into the pipeline.
-///     a. If it does not line up with the existing Canonical, then it will be dropped.
-///
-
+// Syncing Algorithm
+//
+// When a Proposal is received by Consensus, we check if the parent exists in our DB.
+// If not, then it triggers a syncing algorithm.
+//
+// 1. We check if the gap between our last canonical block and the latest Proposal.
+//     a. If it is a small gap, we request for blocks, going backwards from Proposal.
+//     b. If it is a big gap, we request for blocks, going forwards from Canonical.
+// 2. When we receive a forwards history response, we check for matches against the cache.
+//    This means that for a proposal to be injected, it must be corroborated by 2 sources.
+//     a. If it matches the cached value, we inject the proposal into the pipeline.
+//     b. If it does not match, we replace the cached value and request for more.
+//     b. If it does not exist in the cache, we cache the proposal.
+// 3. When we receive a backwards history response, we inject it into the pipeline.
+//     a. If it does not line up with the existing Canonical, then it will be dropped.
+//
 // TODO: Speculative fetch, to speed things up.
 
 const GAP_THRESHOLD: usize = 5; // How big is big/small gap.
@@ -59,7 +58,7 @@ pub struct BlockStore {
     // how many blocks to request at once
     max_batch_size: usize,
     // how many blocks to inject into the queue
-    max_blocks_in_flight: usize,
+    _max_blocks_in_flight: usize,
     // our peer id
     peer_id: PeerId,
     // how many injected proposals
@@ -93,7 +92,7 @@ impl BlockStore {
             peer_id,
             request_timeout: config.consensus.consensus_timeout,
             max_batch_size: config.block_request_batch_size.max(31), // between 30 seconds and 3 days of blocks.
-            max_blocks_in_flight: config.max_blocks_in_flight.min(3600), // cap to 1-hr worth of blocks
+            _max_blocks_in_flight: config.max_blocks_in_flight.min(3600), // cap to 1-hr worth of blocks
             in_flight: None,
             injected: 0,
             cache: HashMap::new(),
@@ -424,8 +423,7 @@ impl BlockStore {
                 .db
                 .get_highest_canonical_block_number()?
                 .unwrap_or_default();
-            let alpha_block = self.db.get_canonical_block_by_number(height)?.unwrap();
-            alpha_block
+            self.db.get_canonical_block_by_number(height)?.unwrap()
         };
 
         // Compute the block gap.

From eed2ee658ab0a71af22759148340ee1ef5535a0f Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 30 Dec 2024 18:56:03 +0800
Subject: [PATCH 028/119] feat: [speculative fetch]

---
 zilliqa/src/blockstore.rs | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index d8a5eff13..67cb9a6c9 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -39,7 +39,7 @@ enum DownGrade {
 // 3. When we receive a backwards history response, we inject it into the pipeline.
 //     a. If it does not line up with the existing Canonical, then it will be dropped.
 //
-// TODO: Speculative fetch, to speed things up.
+// TODO: How to handle case where only single source of truth i.e. bootstrap node?
 
 const GAP_THRESHOLD: usize = 5; // How big is big/small gap.
 
@@ -58,7 +58,7 @@ pub struct BlockStore {
     // how many blocks to request at once
     max_batch_size: usize,
     // how many blocks to inject into the queue
-    _max_blocks_in_flight: usize,
+    max_blocks_in_flight: usize,
     // our peer id
     peer_id: PeerId,
     // how many injected proposals
@@ -92,7 +92,7 @@ impl BlockStore {
             peer_id,
             request_timeout: config.consensus.consensus_timeout,
             max_batch_size: config.block_request_batch_size.max(31), // between 30 seconds and 3 days of blocks.
-            _max_blocks_in_flight: config.max_blocks_in_flight.min(3600), // cap to 1-hr worth of blocks
+            max_blocks_in_flight: config.max_blocks_in_flight.min(3600), // cap to 1-hr worth of blocks
             in_flight: None,
             injected: 0,
             cache: HashMap::new(),
@@ -343,6 +343,32 @@ impl BlockStore {
         // Inject matched proposals
         self.inject_proposals(corroborated_proposals)?;
 
+        // Fire speculative request
+        if self.latest_block.is_some() {
+            if self.injected < self.max_blocks_in_flight {
+                if let Some(peer) = self.get_next_peer() {
+                    // we're far from latest block
+                    let message = RequestBlock {
+                        from_number: self.latest_block.as_ref().unwrap().number(),
+                        from_hash: self.latest_block.as_ref().unwrap().hash(),
+                        batch_size: self.max_batch_size,
+                    };
+                    tracing::info!(
+                        "blockstore::RequestMissingBlocks : speculative requesting {} blocks at {} from {}",
+                        message.batch_size,
+                        message.from_number,
+                        peer.peer_id,
+                    );
+                    self.message_sender.send_external_message(
+                        peer.peer_id,
+                        ExternalMessage::RequestFromNumber(message),
+                    )?;
+
+                    self.in_flight = Some(peer);
+                }
+            }
+        }
+
         Ok(())
     }
 

From 66abff8aaf594bfb7ded62a4d3719736cbc8b281 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 30 Dec 2024 21:33:12 +0800
Subject: [PATCH 029/119] feat: remove peer check, which allows it to proceed
 under circumstances where there is only 1 peer with the blocks.

---
 zilliqa/src/blockstore.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 67cb9a6c9..2371ec4fd 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -64,7 +64,7 @@ pub struct BlockStore {
     // how many injected proposals
     injected: usize,
     // cache
-    cache: HashMap<u64, (PeerId, Proposal)>,
+    cache: HashMap<u64, Proposal>,
     latest_block: Option<Block>,
 }
 
@@ -108,7 +108,7 @@ impl BlockStore {
                 prop.from
             );
         }
-        if let Some((_, p)) = self.cache.remove(&prop.block.number()) {
+        if let Some(p) = self.cache.remove(&prop.block.number()) {
             tracing::warn!(
                 "blockstore::MarkReceivedProposal : removing stale cache proposal {}",
                 p.number()
@@ -320,24 +320,24 @@ impl BlockStore {
 
         // Collect corroborated proposals
         for p in props.by_ref() {
-            if let Some((peer, proposal)) = self.cache.remove(&p.number()) {
+            if let Some(proposal) = self.cache.remove(&p.number()) {
                 // If the proposal already exists
-                if peer != from && proposal.hash() == p.hash() {
+                if proposal.hash() == p.hash() {
                     // is corroborated proposal
                     corroborated_proposals.push(proposal);
                 } else {
                     // insert the different one and;
-                    self.cache.insert(p.number(), (from, p));
+                    self.cache.insert(p.number(), p);
                     break; // replace the rest in the next loop
                 }
             } else {
-                self.cache.insert(p.number(), (from, p));
+                self.cache.insert(p.number(), p);
             }
         }
 
         // Replace/insert the rest of the proposals in the cache
         for p in props {
-            self.cache.insert(p.number(), (from, p));
+            self.cache.insert(p.number(), p);
         }
 
         // Inject matched proposals

From 27ae3ea595c60375c760ba48e86602946024e602 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 30 Dec 2024 21:58:48 +0800
Subject: [PATCH 030/119] chore: clippy.

---
 zilliqa/src/blockstore.rs | 47 ++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 2371ec4fd..eec712fd6 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -344,28 +344,25 @@ impl BlockStore {
         self.inject_proposals(corroborated_proposals)?;
 
         // Fire speculative request
-        if self.latest_block.is_some() {
-            if self.injected < self.max_blocks_in_flight {
-                if let Some(peer) = self.get_next_peer() {
-                    // we're far from latest block
-                    let message = RequestBlock {
-                        from_number: self.latest_block.as_ref().unwrap().number(),
-                        from_hash: self.latest_block.as_ref().unwrap().hash(),
-                        batch_size: self.max_batch_size,
-                    };
-                    tracing::info!(
-                        "blockstore::RequestMissingBlocks : speculative requesting {} blocks at {} from {}",
-                        message.batch_size,
-                        message.from_number,
-                        peer.peer_id,
-                    );
-                    self.message_sender.send_external_message(
-                        peer.peer_id,
-                        ExternalMessage::RequestFromNumber(message),
-                    )?;
-
-                    self.in_flight = Some(peer);
-                }
+        if self.latest_block.is_some() && self.injected < self.max_blocks_in_flight {
+            if let Some(peer) = self.get_next_peer() {
+                // we're far from latest block
+                let message = RequestBlock {
+                    from_number: self.latest_block.as_ref().unwrap().number(),
+                    from_hash: self.latest_block.as_ref().unwrap().hash(),
+                    batch_size: self.max_batch_size,
+                };
+                tracing::info!(
+                    "blockstore::RequestMissingBlocks : speculative fetch {} blocks at {} from {}",
+                    message.batch_size,
+                    message.from_number,
+                    peer.peer_id,
+                );
+                self.message_sender.send_external_message(
+                    peer.peer_id,
+                    ExternalMessage::RequestFromNumber(message),
+                )?;
+                self.in_flight = Some(peer);
             }
         }
 
@@ -424,7 +421,7 @@ impl BlockStore {
         if let Some(peer) = self.in_flight.as_ref() {
             if peer.last_used.elapsed() > self.request_timeout {
                 tracing::warn!(
-                    "In-flight request {} timed out, requesting from new peer",
+                    "blockstore::RequestMissingBlocks : in-flight request {} timed out, requesting from new peer",
                     peer.peer_id
                 );
                 self.done_with_peer(DownGrade::Timeout);
@@ -435,7 +432,7 @@ impl BlockStore {
         } else {
             self.in_flight = self.get_next_peer();
             if self.in_flight.is_none() {
-                tracing::warn!("Insufficient peers available to request missing blocks");
+                tracing::warn!("blockstore::RequestMissingBlocks : insufficient peers to request missing blocks");
                 return Ok(());
             }
         }
@@ -485,7 +482,7 @@ impl BlockStore {
             let message = RequestBlock {
                 from_hash: omega_block.hash(),
                 from_number: omega_block.number(),
-                batch_size: GAP_THRESHOLD * 2,
+                batch_size: GAP_THRESHOLD + 1,
             };
             tracing::info!(
                 "blockstore::RequestMissingBlocks : requesting {} blocks at {} from {}",

From 20ccc462327b9e7f3aff4966c7fffb5a741ef268 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 2 Jan 2025 15:21:17 +0800
Subject: [PATCH 031/119] feat: added handle_metadata_request/response().

---
 zilliqa/src/blockstore.rs | 208 +++++++++++++++++++++++++++-----------
 zilliqa/src/message.rs    |  26 +++++
 zilliqa/src/node.rs       |  12 +++
 3 files changed, 188 insertions(+), 58 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index eec712fd6..3800870f6 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -1,10 +1,11 @@
 use std::{
     cmp::Ordering,
-    collections::{BinaryHeap, HashMap},
+    collections::{BTreeMap, BinaryHeap, HashMap},
     sync::Arc,
     time::{Duration, Instant},
 };
 
+use crate::crypto::Hash;
 use anyhow::Result;
 use itertools::Itertools;
 use libp2p::PeerId;
@@ -12,7 +13,10 @@ use libp2p::PeerId;
 use crate::{
     cfg::NodeConfig,
     db::Db,
-    message::{Block, ExternalMessage, InjectedProposal, Proposal, RequestBlock, ResponseBlock},
+    message::{
+        Block, ChainMetaData, ExternalMessage, InjectedProposal, Proposal, RequestBlock,
+        ResponseBlock,
+    },
     node::MessageSender,
 };
 
@@ -66,6 +70,10 @@ pub struct BlockStore {
     // cache
     cache: HashMap<u64, Proposal>,
     latest_block: Option<Block>,
+
+    // Chain metadata
+    chain_metadata: BTreeMap<Hash, ChainMetaData>,
+    last_metadata: Option<ChainMetaData>,
 }
 
 impl BlockStore {
@@ -97,10 +105,14 @@ impl BlockStore {
             injected: 0,
             cache: HashMap::new(),
             latest_block: None,
+            chain_metadata: BTreeMap::new(),
+            last_metadata: None,
         })
     }
 
-    /// Match a received proposal
+    /// Mark a received proposal
+    ///
+    /// Mark a proposal as received, and remove it from the cache.
     pub fn mark_received_proposal(&mut self, prop: &InjectedProposal) -> Result<()> {
         if prop.from != self.peer_id {
             tracing::error!(
@@ -131,7 +143,7 @@ impl BlockStore {
                 "blockstore::ProcessProposal : Parent block {} not found",
                 block.parent_hash()
             );
-            self.request_missing_blocks(Some(block))?;
+            self.request_missing_chain(Some(block))?;
             return Ok(());
         }
         Ok(())
@@ -152,6 +164,123 @@ impl BlockStore {
         Proposal::from_parts(block, txs)
     }
 
+    /// Convenience function to extract metadata from the block.
+    fn block_to_metadata(&self, block: Block) -> ChainMetaData {
+        ChainMetaData {
+            block_number: block.number(),
+            block_hash: block.hash(),
+            parent_hash: block.parent_hash(),
+            block_timestamp: block.timestamp(),
+        }
+    }
+
+    pub fn handle_metadata_response(
+        &mut self,
+        from: PeerId,
+        response: Vec<ChainMetaData>,
+    ) -> Result<()> {
+        // ...
+        tracing::info!(
+            "blockstore::MetadataResponse : received {} metadata from {}",
+            response.len(),
+            from
+        );
+
+        // Process whatever we have received.
+        if response.is_empty() {
+            // Empty response, downgrade peer
+            tracing::warn!("blockstore::MetadataResponse : empty blocks {from}",);
+            self.done_with_peer(DownGrade::Empty);
+            return Ok(());
+        } else if response.len() < self.max_blocks_in_flight {
+            // Partial response, downgrade peer
+            tracing::warn!("blockstore::MetadataResponse : partial blocks {from}",);
+            self.done_with_peer(DownGrade::Partial);
+        } else {
+            self.done_with_peer(DownGrade::None);
+        }
+
+        // Sort metadata by number, reversed
+        let mut metadata = response
+            .into_iter()
+            .sorted_by_key(|f| f.block_number)
+            .collect_vec();
+        metadata.reverse();
+        // mark the block
+        metadata.last_mut().unwrap().parent_hash = metadata.first().unwrap().block_hash;
+
+        // Store the metadata
+        for meta in metadata {
+            // TODO: Check the linkage of the returned chain
+            if let Some(meta) = self.chain_metadata.insert(meta.block_hash, meta) {
+                self.last_metadata = Some(meta);
+            }
+        }
+
+        // If the last block does not exist in our canonical history, fire the next request
+        if self.last_metadata.is_some()
+            && self
+                .db
+                .get_block_by_hash(&self.last_metadata.as_ref().unwrap().block_hash)?
+                .is_none()
+        {
+            self.request_missing_chain(None)?;
+        } else {
+            // Hit our internal history. Begin replicating chain.
+            self.request_missing_blocks()?;
+        }
+
+        Ok(())
+    }
+
+    fn request_missing_blocks(&mut self) -> Result<()> {
+        // ...
+        tracing::info!(
+            "blockstore::RequestMissingBlocks : requesting missing blocks {:?}",
+            self.last_metadata
+        );
+
+        Ok(())
+    }
+
+    /// Returns the metadata of the chain from a given hash.
+    ///
+    /// This constructs a historical chain going backwards from a hash, by following the parent_hash.
+    /// It collects N blocks and returns the metadata of that particular chain.
+    /// This is mainly used in Phase 1 of the syncing algorithm, to construct a chain history.
+    pub fn handle_metadata_request(
+        &mut self,
+        from: PeerId,
+        request: RequestBlock,
+    ) -> Result<ExternalMessage> {
+        tracing::info!(
+            "blockstore::MetadataRequest : received a metadata request from {}",
+            from
+        );
+
+        // TODO: Check if we should service this request
+        // Validators could respond to this request if there is nothing else to do.
+
+        let batch_size = self.max_batch_size.min(request.batch_size); // mitigate DOS by limiting the number of blocks we return
+        let mut metas = Vec::with_capacity(batch_size);
+        let mut hash = request.from_hash;
+        while metas.len() < batch_size {
+            // grab the parent
+            let Some(block) = self.db.get_block_by_hash(&hash)? else {
+                break; // that's all we have!
+            };
+            hash = block.parent_hash();
+            metas.push(self.block_to_metadata(block));
+        }
+
+        let message = ExternalMessage::MetaDataResponse(metas);
+        tracing::trace!(
+            ?message,
+            "blockstore::MetadataFromHash : responding to block request"
+        );
+        Ok(message)
+    }
+
     /// Request blocks from a hash, backwards.
     ///
     /// It will collect N blocks by following the block.parent_hash() of the requested block.
@@ -416,7 +545,7 @@ impl BlockStore {
     /// If None block is provided, we request blocks from the last known canonical block forwards.
     /// If the block gap is large, we request blocks from the last known canonical block forwards.
     /// If the block gap is small, we request blocks from the latest block backwards.
-    pub fn request_missing_blocks(&mut self, omega_block: Option<Block>) -> Result<()> {
+    pub fn request_missing_chain(&mut self, omega_block: Option<Block>) -> Result<()> {
         // Early exit if there's a request in-flight; and if it has not expired.
         if let Some(peer) = self.in_flight.as_ref() {
             if peer.last_used.elapsed() > self.request_timeout {
@@ -437,62 +566,25 @@ impl BlockStore {
             }
         }
 
-        // highest canonical block we know
-        let alpha_block = if self.latest_block.is_some() {
-            self.latest_block.as_ref().unwrap().clone()
-        } else {
-            // TODO: Replace this with a single SQL query.
-            let height = self
-                .db
-                .get_highest_canonical_block_number()?
-                .unwrap_or_default();
-            self.db.get_canonical_block_by_number(height)?.unwrap()
-        };
-
-        // Compute the block gap.
-        let block_gap = if let Some(omega_block) = omega_block.as_ref() {
-            omega_block
-                .header
-                .number
-                .saturating_sub(alpha_block.header.number)
+        let message = if let Some(omega_block) = omega_block {
+            ExternalMessage::MetaDataRequest(RequestBlock {
+                from_number: omega_block.number(),
+                from_hash: omega_block.hash(),
+                batch_size: self.max_blocks_in_flight,
+            })
         } else {
-            // Trigger a RequestFromNumber if the source block is None
-            self.max_batch_size as u64
+            ExternalMessage::MetaDataRequest(RequestBlock {
+                from_number: self.last_metadata.as_ref().unwrap().block_number,
+                from_hash: self.last_metadata.as_ref().unwrap().block_hash,
+                batch_size: self.max_blocks_in_flight,
+            })
         };
-
         let peer = self.in_flight.as_ref().unwrap();
-
-        let message = if block_gap > GAP_THRESHOLD as u64 {
-            // we're far from latest block
-            let message = RequestBlock {
-                from_number: alpha_block.number(),
-                from_hash: alpha_block.hash(),
-                batch_size: self.max_batch_size,
-            };
-            tracing::info!(
-                "blockstore::RequestMissingBlocks : requesting {} blocks at {} from {}",
-                message.batch_size,
-                message.from_number,
-                peer.peer_id,
-            );
-            ExternalMessage::RequestFromNumber(message)
-        } else {
-            // we're close to latest block
-            let omega_block = omega_block.unwrap();
-            let message = RequestBlock {
-                from_hash: omega_block.hash(),
-                from_number: omega_block.number(),
-                batch_size: GAP_THRESHOLD + 1,
-            };
-            tracing::info!(
-                "blockstore::RequestMissingBlocks : requesting {} blocks at {} from {}",
-                message.batch_size,
-                message.from_hash,
-                peer.peer_id,
-            );
-            ExternalMessage::RequestFromHash(message)
-        };
-
+        tracing::info!(
+            ?message,
+            "blockstore::RequestMissingBlocks : requesting missing chain from {}",
+            peer.peer_id
+        );
         self.message_sender
             .send_external_message(peer.peer_id, message)?;
         Ok(())
diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index 6672f51ac..15ff364aa 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -247,6 +247,16 @@ pub struct InjectedProposal {
     pub block: Proposal,
 }
 
+/// Used to hold metadata about the chain
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ChainMetaData {
+    // An encoded PeerId
+    pub block_hash: Hash,
+    pub parent_hash: Hash,
+    pub block_number: u64,
+    pub block_timestamp: SystemTime,
+}
+
 /// Used to convey proposal processing internally, to avoid blocking threads for too long.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ProcessProposal {
@@ -286,6 +296,10 @@ pub enum ExternalMessage {
     ResponseFromNumber(ResponseBlock),
     ResponseFromHash(ResponseBlock),
     InjectedProposal(InjectedProposal),
+    MetaDataRequest(RequestBlock),
+    MetaDataResponse(Vec<ChainMetaData>),
+    MultiBlockRequest(Vec<Hash>),
+    MultiBlockResponse(Vec<Proposal>),
 }
 
 impl ExternalMessage {
@@ -301,6 +315,18 @@ impl ExternalMessage {
 impl Display for ExternalMessage {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
         match self {
+            ExternalMessage::MultiBlockRequest(r) => {
+                write!(f, "MultiBlockRequest({})", r.len())
+            }
+            ExternalMessage::MultiBlockResponse(r) => {
+                write!(f, "MultiBlockResponse({})", r.len())
+            }
+            ExternalMessage::MetaDataResponse(r) => {
+                write!(f, "MetaDataResponse({})", r.len())
+            }
+            ExternalMessage::MetaDataRequest(r) => {
+                write!(f, "MetaDataRequest({}, num={})", r.from_hash, r.batch_size)
+            }
             ExternalMessage::InjectedProposal(p) => {
                 write!(f, "InjectedProposal {}", p.block.number())
             }
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 563312e0a..440656849 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -273,6 +273,13 @@ impl Node {
                 self.request_responses
                     .send((response_channel, ExternalMessage::Acknowledgement))?;
             }
+            ExternalMessage::MetaDataRequest(request) => {
+                let message = self
+                    .consensus
+                    .blockstore
+                    .handle_metadata_request(from, request)?;
+                self.request_responses.send((response_channel, message))?;
+            }
             ExternalMessage::RequestFromNumber(request) => {
                 let message = self
                     .consensus
@@ -383,6 +390,11 @@ impl Node {
     pub fn handle_response(&mut self, from: PeerId, message: ExternalMessage) -> Result<()> {
         debug!(%from, to = %self.peer_id, %message, "handling response");
         match message {
+            ExternalMessage::MetaDataResponse(response) => {
+                self.consensus
+                    .blockstore
+                    .handle_metadata_response(from, response)?;
+            }
             ExternalMessage::ResponseFromNumber(response) => {
                 self.consensus
                     .blockstore

From 7580b399b452303dcd0bdc6b4423f75cf9ab249a Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 3 Jan 2025 09:58:23 +0800
Subject: [PATCH 032/119] feat: [checkpoint - retrieve chain metadata].

---
 zilliqa/src/blockstore.rs | 83 ++++++++++++++++++++++++++-------------
 1 file changed, 56 insertions(+), 27 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 3800870f6..319d6b5e9 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -74,6 +74,7 @@ pub struct BlockStore {
     // Chain metadata
     chain_metadata: BTreeMap<Hash, ChainMetaData>,
     last_metadata: Option<ChainMetaData>,
+    landmark_metadata: Vec<Hash>,
 }
 
 impl BlockStore {
@@ -107,6 +108,7 @@ impl BlockStore {
             latest_block: None,
             chain_metadata: BTreeMap::new(),
             last_metadata: None,
+            landmark_metadata: Vec::new(),
         })
     }
 
@@ -174,6 +176,46 @@ impl BlockStore {
         }
     }
 
+    /// Request missing blocks from the chain.
+    ///
+    /// It constructs a set of hashes, which constitute the series of blocks that are missing.
+    /// These hashes are then sent to a Peer for retrieval.
+    fn request_missing_blocks(&mut self) -> Result<()> {
+        // ...
+        tracing::info!("blockstore::RequestMissingBlocks : requesting missing blocks");
+
+        // If we have no landmarks, we have nothing to do
+        if let Some(mut hash) = self.landmark_metadata.pop() {
+            let mut request_hashes = Vec::with_capacity(self.max_batch_size);
+            request_hashes.push(hash);
+            while let Some(meta) = self.chain_metadata.remove(&hash) {
+                request_hashes.push(meta.block_hash);
+                hash = meta.parent_hash;
+                // re-insert the metadata so as not to lose it
+                // self.chain_metadata.insert(hash, meta);
+            }
+            // Fire request
+            if let Some(peer) = self.get_next_peer() {
+                tracing::debug!(
+                    "blockstore::RequestMissingBlocks : requesting {} blocks from {}",
+                    request_hashes.len(),
+                    peer.peer_id
+                );
+                self.message_sender.send_external_message(
+                    peer.peer_id,
+                    ExternalMessage::MultiBlockRequest(request_hashes),
+                )?;
+                self.in_flight = Some(peer);
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Handle a response to a metadata request.
+    /// 
+    /// This is the first step in the syncing algorithm, where we receive a set of metadata and use it to
+    /// construct a chain history. We then request the missing blocks from the chain.
     pub fn handle_metadata_response(
         &mut self,
         from: PeerId,
@@ -201,13 +243,10 @@ impl BlockStore {
         }
 
         // Sort metadata by number, reversed
-        let mut metadata = response
+        let metadata = response
             .into_iter()
-            .sorted_by_key(|f| f.block_number)
+            .sorted_by(|a, b| b.block_number.cmp(&a.block_number))
             .collect_vec();
-        metadata.reverse();
-        // mark the block
-        metadata.last_mut().unwrap().parent_hash = metadata.first().unwrap().block_hash;
 
         // Store the metadata
         for meta in metadata {
@@ -218,31 +257,18 @@ impl BlockStore {
         }
 
         // If the last block does not exist in our canonical history, fire the next request
-        if self.last_metadata.is_some()
-            && self
-                .db
-                .get_block_by_hash(&self.last_metadata.as_ref().unwrap().block_hash)?
-                .is_none()
-        {
-            self.request_missing_chain(None)?;
-        } else {
-            // Hit our internal history. Begin replicating chain.
-            self.request_missing_blocks()?;
+        if let Some(meta) = self.last_metadata.as_ref() {
+            if self.db.get_block_by_hash(&meta.block_hash)?.is_none() {
+                self.request_missing_chain(None)?;
+            } else {
+                // Hit our internal history. Begin replicating chain.
+                self.request_missing_blocks()?;
+            }
         }
 
         Ok(())
     }
 
-    fn request_missing_blocks(&mut self) -> Result<()> {
-        // ...
-        tracing::info!(
-            "blockstore::RequestMissingBlocks : requesting missing blocks {:?}",
-            self.last_metadata
-        );
-
-        Ok(())
-    }
-
     /// Returns the metadata of the chain from a given hash.
     ///
     /// This constructs a historical chain going backwards from a hash, by following the parent_hash.
@@ -283,7 +309,7 @@ impl BlockStore {
 
     /// Request blocks from a hash, backwards.
     ///
-    /// It will collect N blocks by following the block.parent_hash() of the requested block.
+    /// It will collect N blocks by following the block.parent_hash() of each requested block.
     pub fn handle_request_from_hash(
         &mut self,
         from: PeerId,
@@ -567,15 +593,18 @@ impl BlockStore {
         }
 
         let message = if let Some(omega_block) = omega_block {
+            self.landmark_metadata.push(omega_block.hash());
             ExternalMessage::MetaDataRequest(RequestBlock {
                 from_number: omega_block.number(),
                 from_hash: omega_block.hash(),
                 batch_size: self.max_blocks_in_flight,
             })
         } else {
+            let hash = self.last_metadata.as_ref().unwrap().parent_hash;
+            self.landmark_metadata.push(hash);
             ExternalMessage::MetaDataRequest(RequestBlock {
                 from_number: self.last_metadata.as_ref().unwrap().block_number,
-                from_hash: self.last_metadata.as_ref().unwrap().block_hash,
+                from_hash: hash,
                 batch_size: self.max_blocks_in_flight,
             })
         };

From abfa6d474df153196e6c6bc98073ac3b2442f514 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 3 Jan 2025 10:13:31 +0800
Subject: [PATCH 033/119] feat: added handle_multiblock_request/response().

---
 zilliqa/src/blockstore.rs | 133 +++++++++++++++++++++++++++++++-------
 zilliqa/src/node.rs       |  12 ++++
 2 files changed, 121 insertions(+), 24 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 319d6b5e9..7fbd8a0c1 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -176,10 +176,85 @@ impl BlockStore {
         }
     }
 
+    /// Handle a multi-block response.
+    ///
+    /// This is the final step in the syncing algorithm, where we receive a set of blocks and inject them into
+    /// the pipeline. We also remove the blocks from the chain metadata, because they are now in the pipeline.
+    pub fn handle_multiblock_response(
+        &mut self,
+        from: PeerId,
+        response: Vec<Proposal>,
+    ) -> Result<()> {
+        // ...
+        tracing::info!(
+            "blockstore::MultiBlockResponse : received {} blocks from {}",
+            response.len(),
+            from
+        );
+
+        // Process whatever we received
+        if response.is_empty() {
+            // Empty response, downgrade peer
+            tracing::warn!("blockstore::MultiBlockResponse : empty blocks {from}",);
+            self.done_with_peer(DownGrade::Empty);
+        } else if response.len() < self.max_blocks_in_flight {
+            // Partial response, downgrade peer
+            tracing::warn!("blockstore::MultiBlockResponse : partial blocks {from}",);
+            self.done_with_peer(DownGrade::Partial);
+        } else {
+            self.done_with_peer(DownGrade::None);
+        }
+
+        let proposals = response
+            .into_iter()
+            .sorted_by_key(|p| p.number())
+            .collect_vec();
+
+        // Remove the blocks from the chain metadata, if they exist
+        for p in &proposals {
+            self.chain_metadata.remove(&p.hash());
+        }
+
+        self.inject_proposals(proposals)?;
+
+        // Request for next bunch
+        if !self.landmark_metadata.is_empty() {
+            self.request_missing_blocks()?;
+        }
+
+        Ok(())
+    }
+
+    pub fn handle_multiblock_request(
+        &mut self,
+        from: PeerId,
+        request: Vec<Hash>,
+    ) -> Result<ExternalMessage> {
+        // ...
+        tracing::info!(
+            "blockstore::MultiBlockRequest : received a {} multiblock request from {}",
+            request.len(),
+            from
+        );
+
+        let batch_size: usize = self.max_batch_size.min(request.len()); // mitigate DOS by limiting the number of blocks we return
+        let mut proposals = Vec::with_capacity(batch_size);
+        for hash in request {
+            let Some(block) = self.db.get_block_by_hash(&hash)? else {
+                break; // that's all we have!
+            };
+            proposals.push(self.block_to_proposal(block));
+        }
+
+        let message = ExternalMessage::MultiBlockResponse(proposals);
+        Ok(message)
+    }
+
     /// Request missing blocks from the chain.
     ///
     /// It constructs a set of hashes, which constitute the series of blocks that are missing.
     /// These hashes are then sent to a Peer for retrieval.
+    /// This is Part 2 of the syncing algorithm.
     fn request_missing_blocks(&mut self) -> Result<()> {
         // ...
         tracing::info!("blockstore::RequestMissingBlocks : requesting missing blocks");
@@ -187,7 +262,6 @@ impl BlockStore {
         // If we have no landmarks, we have nothing to do
         if let Some(mut hash) = self.landmark_metadata.pop() {
             let mut request_hashes = Vec::with_capacity(self.max_batch_size);
-            request_hashes.push(hash);
             while let Some(meta) = self.chain_metadata.remove(&hash) {
                 request_hashes.push(meta.block_hash);
                 hash = meta.parent_hash;
@@ -197,9 +271,9 @@ impl BlockStore {
             // Fire request
             if let Some(peer) = self.get_next_peer() {
                 tracing::debug!(
-                    "blockstore::RequestMissingBlocks : requesting {} blocks from {}",
+                    "blockstore::RequestMissingBlocks : requesting {} blocks of {}",
                     request_hashes.len(),
-                    peer.peer_id
+                    self.landmark_metadata.len(),
                 );
                 self.message_sender.send_external_message(
                     peer.peer_id,
@@ -213,7 +287,7 @@ impl BlockStore {
     }
 
     /// Handle a response to a metadata request.
-    /// 
+    ///
     /// This is the first step in the syncing algorithm, where we receive a set of metadata and use it to
     /// construct a chain history. We then request the missing blocks from the chain.
     pub fn handle_metadata_response(
@@ -221,7 +295,6 @@ impl BlockStore {
         from: PeerId,
         response: Vec<ChainMetaData>,
     ) -> Result<()> {
-        // ...
         tracing::info!(
             "blockstore::MetadataResponse : received {} metadata from {}",
             response.len(),
@@ -234,7 +307,7 @@ impl BlockStore {
             tracing::warn!("blockstore::MetadataResponse : empty blocks {from}",);
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
-        } else if response.len() < self.max_blocks_in_flight {
+        } else if response.len() < self.max_batch_size {
             // Partial response, downgrade peer
             tracing::warn!("blockstore::MetadataResponse : partial blocks {from}",);
             self.done_with_peer(DownGrade::Partial);
@@ -248,17 +321,18 @@ impl BlockStore {
             .sorted_by(|a, b| b.block_number.cmp(&a.block_number))
             .collect_vec();
 
+        self.last_metadata = Some(metadata.last().unwrap().clone());
+
         // Store the metadata
         for meta in metadata {
             // TODO: Check the linkage of the returned chain
-            if let Some(meta) = self.chain_metadata.insert(meta.block_hash, meta) {
-                self.last_metadata = Some(meta);
-            }
+            self.chain_metadata.insert(meta.block_hash, meta);
         }
 
         // If the last block does not exist in our canonical history, fire the next request
         if let Some(meta) = self.last_metadata.as_ref() {
             if self.db.get_block_by_hash(&meta.block_hash)?.is_none() {
+                // TODO: store the peer that provided this metadata
                 self.request_missing_chain(None)?;
             } else {
                 // Hit our internal history. Begin replicating chain.
@@ -287,7 +361,7 @@ impl BlockStore {
         // TODO: Check if we should service this request
         // Validators could respond to this request if there is nothing else to do.
 
-        let batch_size = self.max_batch_size.min(request.batch_size); // mitigate DOS by limiting the number of blocks we return
+        let batch_size: usize = self.max_batch_size.min(request.batch_size); // mitigate DOS by limiting the number of blocks we return
         let mut metas = Vec::with_capacity(batch_size);
         let mut hash = request.from_hash;
         while metas.len() < batch_size {
@@ -386,11 +460,6 @@ impl BlockStore {
     /// last known Proposal in the pipeline. This is used for speculative fetches, and also for
     /// knowing where to continue fetching from.
     fn inject_proposals(&mut self, proposals: Vec<Proposal>) -> Result<()> {
-        tracing::info!(
-            "blockstore::InjectProposals : injecting {} proposals",
-            proposals.len()
-        );
-
         if proposals.is_empty() {
             return Ok(());
         }
@@ -401,6 +470,7 @@ impl BlockStore {
 
         // Increment proposals injected
         self.injected = self.injected.saturating_add(proposals.len());
+        let len = proposals.len();
 
         // Just pump the Proposals back to ourselves.
         for p in proposals {
@@ -418,6 +488,12 @@ impl BlockStore {
                 }),
             )?;
         }
+
+        tracing::info!(
+            "blockstore::InjectProposals : injected {}/{} proposals",
+            len,
+            self.injected
+        );
         // return last proposal
         Ok(())
     }
@@ -576,7 +652,7 @@ impl BlockStore {
         if let Some(peer) = self.in_flight.as_ref() {
             if peer.last_used.elapsed() > self.request_timeout {
                 tracing::warn!(
-                    "blockstore::RequestMissingBlocks : in-flight request {} timed out, requesting from new peer",
+                    "blockstore::RequestMissingChain : in-flight request {} timed out, requesting from new peer",
                     peer.peer_id
                 );
                 self.done_with_peer(DownGrade::Timeout);
@@ -585,33 +661,42 @@ impl BlockStore {
                 return Ok(());
             }
         } else {
+            if self.injected > 0 {
+                tracing::warn!(
+                    "blockstore::RequestMissingChain : too many {} blocks in flight",
+                    self.injected
+                );
+                return Ok(());
+            }
             self.in_flight = self.get_next_peer();
             if self.in_flight.is_none() {
-                tracing::warn!("blockstore::RequestMissingBlocks : insufficient peers to request missing blocks");
+                tracing::warn!("blockstore::RequestMissingChain : insufficient peers to request missing blocks");
                 return Ok(());
             }
         }
 
         let message = if let Some(omega_block) = omega_block {
-            self.landmark_metadata.push(omega_block.hash());
+            let num = omega_block.number();
+            let hash = omega_block.hash();
+            self.landmark_metadata.push(hash);
             ExternalMessage::MetaDataRequest(RequestBlock {
-                from_number: omega_block.number(),
-                from_hash: omega_block.hash(),
-                batch_size: self.max_blocks_in_flight,
+                from_number: num,
+                from_hash: hash,
+                batch_size: self.max_batch_size,
             })
         } else {
             let hash = self.last_metadata.as_ref().unwrap().parent_hash;
             self.landmark_metadata.push(hash);
             ExternalMessage::MetaDataRequest(RequestBlock {
-                from_number: self.last_metadata.as_ref().unwrap().block_number,
+                from_number: 0,
                 from_hash: hash,
-                batch_size: self.max_blocks_in_flight,
+                batch_size: self.max_batch_size,
             })
         };
         let peer = self.in_flight.as_ref().unwrap();
         tracing::info!(
             ?message,
-            "blockstore::RequestMissingBlocks : requesting missing chain from {}",
+            "blockstore::RequestMissingChain : requesting missing chain from {}",
             peer.peer_id
         );
         self.message_sender
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 440656849..816d9a5f4 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -273,6 +273,13 @@ impl Node {
                 self.request_responses
                     .send((response_channel, ExternalMessage::Acknowledgement))?;
             }
+            ExternalMessage::MultiBlockRequest(request) => {
+                let message = self
+                    .consensus
+                    .blockstore
+                    .handle_multiblock_request(from, request)?;
+                self.request_responses.send((response_channel, message))?;
+            }
             ExternalMessage::MetaDataRequest(request) => {
                 let message = self
                     .consensus
@@ -390,6 +397,11 @@ impl Node {
     pub fn handle_response(&mut self, from: PeerId, message: ExternalMessage) -> Result<()> {
         debug!(%from, to = %self.peer_id, %message, "handling response");
         match message {
+            ExternalMessage::MultiBlockResponse(response) => {
+                self.consensus
+                    .blockstore
+                    .handle_multiblock_response(from, response)?;
+            }
             ExternalMessage::MetaDataResponse(response) => {
                 self.consensus
                     .blockstore

From 936b0a426d69538df3417b8d74a33fa0c93647af Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 3 Jan 2025 13:22:14 +0800
Subject: [PATCH 034/119] feat: [checkpoint - multi_block_request/response;
 never quite catching up.]

---
 zilliqa/src/blockstore.rs | 252 +++++++++++++++++++++++---------------
 1 file changed, 156 insertions(+), 96 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index 7fbd8a0c1..b4597963e 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -46,6 +46,7 @@ enum DownGrade {
 // TODO: How to handle case where only single source of truth i.e. bootstrap node?
 
 const GAP_THRESHOLD: usize = 5; // How big is big/small gap.
+const DO_SPECULATIVE: bool = false; // Speeds up syncing by speculatively fetching blocks, allowing it to catch up.
 
 #[derive(Debug)]
 pub struct BlockStore {
@@ -73,8 +74,9 @@ pub struct BlockStore {
 
     // Chain metadata
     chain_metadata: BTreeMap<Hash, ChainMetaData>,
-    last_metadata: Option<ChainMetaData>,
-    landmark_metadata: Vec<Hash>,
+    p1_metadata: Option<ChainMetaData>,
+    p2_metadata: Option<ChainMetaData>,
+    landmarks: Vec<Hash>,
 }
 
 impl BlockStore {
@@ -107,8 +109,9 @@ impl BlockStore {
             cache: HashMap::new(),
             latest_block: None,
             chain_metadata: BTreeMap::new(),
-            last_metadata: None,
-            landmark_metadata: Vec::new(),
+            p1_metadata: None,
+            landmarks: Vec::new(),
+            p2_metadata: None,
         })
     }
 
@@ -145,7 +148,18 @@ impl BlockStore {
                 "blockstore::ProcessProposal : Parent block {} not found",
                 block.parent_hash()
             );
-            self.request_missing_chain(Some(block))?;
+            if self.p2_metadata.is_some() {
+                // Continue phase 2
+                self.request_missing_blocks()?;
+            } else {
+                if self.p1_metadata.is_none() {
+                    // Start phase 1
+                    self.request_missing_chain(Some(block))?;
+                } else {
+                    // Continue phase 1
+                    self.request_missing_chain(None)?;
+                }
+            }
             return Ok(());
         }
         Ok(())
@@ -197,7 +211,7 @@ impl BlockStore {
             // Empty response, downgrade peer
             tracing::warn!("blockstore::MultiBlockResponse : empty blocks {from}",);
             self.done_with_peer(DownGrade::Empty);
-        } else if response.len() < self.max_blocks_in_flight {
+        } else if response.len() < self.max_batch_size {
             // Partial response, downgrade peer
             tracing::warn!("blockstore::MultiBlockResponse : partial blocks {from}",);
             self.done_with_peer(DownGrade::Partial);
@@ -210,6 +224,19 @@ impl BlockStore {
             .sorted_by_key(|p| p.number())
             .collect_vec();
 
+        if let Some(landmark) = self.landmarks.pop() {
+            // remove the last landmark, should match proposals.last()
+            let hash = proposals.last().as_ref().unwrap().hash();
+            if hash != landmark {
+                tracing::warn!(
+                    "blockstore::MultiBlockResponse : mismatched landmark {} != {}",
+                    landmark,
+                    hash,
+                );
+                self.landmarks.push(landmark); // put it back
+            }
+        }
+
         // Remove the blocks from the chain metadata, if they exist
         for p in &proposals {
             self.chain_metadata.remove(&p.hash());
@@ -217,8 +244,12 @@ impl BlockStore {
 
         self.inject_proposals(proposals)?;
 
-        // Request for next bunch
-        if !self.landmark_metadata.is_empty() {
+        // Done with phase 2, allow phase 1 to restart.
+        if self.landmarks.is_empty() {
+            self.p1_metadata = None;
+            self.chain_metadata.clear();
+        } else if DO_SPECULATIVE {
+            // Speculatively request more blocks
             self.request_missing_blocks()?;
         }
 
@@ -256,33 +287,60 @@ impl BlockStore {
     /// These hashes are then sent to a Peer for retrieval.
     /// This is Part 2 of the syncing algorithm.
     fn request_missing_blocks(&mut self) -> Result<()> {
-        // ...
-        tracing::info!("blockstore::RequestMissingBlocks : requesting missing blocks");
-
-        // If we have no landmarks, we have nothing to do
-        if let Some(mut hash) = self.landmark_metadata.pop() {
-            let mut request_hashes = Vec::with_capacity(self.max_batch_size);
-            while let Some(meta) = self.chain_metadata.remove(&hash) {
-                request_hashes.push(meta.block_hash);
-                hash = meta.parent_hash;
-                // re-insert the metadata so as not to lose it
-                // self.chain_metadata.insert(hash, meta);
+        // Early exit if there's a request in-flight; and if it has not expired.
+        if let Some(peer) = self.in_flight.as_ref() {
+            if peer.last_used.elapsed() > self.request_timeout {
+                tracing::warn!(
+                    "blockstore::RequestMissingBlocks : in-flight request {} timed out, requesting from new peer",
+                    peer.peer_id
+                );
+                self.done_with_peer(DownGrade::Timeout);
+            } else {
+                return Ok(());
             }
-            // Fire request
-            if let Some(peer) = self.get_next_peer() {
+        } else {
+            if self.p2_metadata.is_none() {
+                tracing::warn!(
+                    "blockstore::RequestMissingBlocks : no metadata to request missing blocks"
+                );
+                return Ok(());
+            }
+        }
+
+        if let Some(peer) = self.get_next_peer() {
+            // If we have no landmarks, we have nothing to do
+            self.p2_metadata = None;
+            if let Some(mut hash) = self.landmarks.pop() {
+                self.landmarks.push(hash); // we actually need to peek() at the last element
+                let mut request_hashes = Vec::with_capacity(self.max_batch_size);
+                while let Some(meta) = self.chain_metadata.remove(&hash) {
+                    request_hashes.push(meta.block_hash);
+                    hash = meta.parent_hash;
+                    // re-insert the metadata so as not to lose it
+                    // self.chain_metadata.insert(hash, meta);
+                    self.p2_metadata = Some(meta);
+                }
+
+                // Fire request
                 tracing::debug!(
-                    "blockstore::RequestMissingBlocks : requesting {} blocks of {}",
+                    "blockstore::RequestMissingBlocks : requesting {} blocks of set #{}",
                     request_hashes.len(),
-                    self.landmark_metadata.len(),
+                    self.landmarks.len(),
                 );
                 self.message_sender.send_external_message(
                     peer.peer_id,
                     ExternalMessage::MultiBlockRequest(request_hashes),
                 )?;
                 self.in_flight = Some(peer);
+            } else {
+                // No more landmarks, we're done
+                self.peers.push(peer);
             }
+        } else {
+            tracing::warn!(
+                "blockstore::RequestMissingBlocks : insufficient peers to request missing blocks"
+            );
         }
-
         Ok(())
     }
 
@@ -321,7 +379,12 @@ impl BlockStore {
             .sorted_by(|a, b| b.block_number.cmp(&a.block_number))
             .collect_vec();
 
-        self.last_metadata = Some(metadata.last().unwrap().clone());
+        let p1_metadata = metadata.last().unwrap().clone();
+        let last_hash = p1_metadata.block_hash;
+        self.p1_metadata = Some(p1_metadata);
+
+        self.landmarks
+            .push(metadata.first().as_ref().unwrap().block_hash);
 
         // Store the metadata
         for meta in metadata {
@@ -330,14 +393,11 @@ impl BlockStore {
         }
 
         // If the last block does not exist in our canonical history, fire the next request
-        if let Some(meta) = self.last_metadata.as_ref() {
-            if self.db.get_block_by_hash(&meta.block_hash)?.is_none() {
-                // TODO: store the peer that provided this metadata
-                self.request_missing_chain(None)?;
-            } else {
-                // Hit our internal history. Begin replicating chain.
-                self.request_missing_blocks()?;
-            }
+        if self.db.get_block_by_hash(&last_hash)?.is_some() {
+            // Hit our internal history. Start phase 2.
+            self.p2_metadata = self.p1_metadata.clone();
+        } else if DO_SPECULATIVE {
+            self.request_missing_chain(None)?;
         }
 
         Ok(())
@@ -381,6 +441,69 @@ impl BlockStore {
         Ok(message)
     }
 
+    /// Request missing chain from a peer.
+    ///
+    /// This constructs a chain history by requesting blocks from a peer, going backwards from a given block.
+    /// If phase 1 is in progress, it continues requesting blocks from the last known phase 1 block.
+    /// Otherwise, it requests blocks from the given omega_block.
+    pub fn request_missing_chain(&mut self, omega_block: Option<Block>) -> Result<()> {
+        // Early exit if there's a request in-flight; and if it has not expired.
+        if let Some(peer) = self.in_flight.as_ref() {
+            if peer.last_used.elapsed() > self.request_timeout {
+                tracing::warn!(
+                    "blockstore::RequestMissingChain : in-flight request {} timed out, requesting from new peer",
+                    peer.peer_id
+                );
+                self.done_with_peer(DownGrade::Timeout);
+            } else {
+                return Ok(());
+            }
+        } else {
+            if self.injected > 0 {
+                tracing::warn!(
+                    "blockstore::RequestMissingChain : too many {} blocks in flight",
+                    self.injected
+                );
+                return Ok(());
+            }
+        }
+
+        if let Some(peer) = self.get_next_peer() {
+            let message = if let Some(meta) = self.p1_metadata.as_ref() {
+                ExternalMessage::MetaDataRequest(RequestBlock {
+                    from_number: 0,
+                    from_hash: meta.parent_hash,
+                    batch_size: self.max_batch_size,
+                })
+            } else if let Some(omega_block) = omega_block {
+                let num = omega_block.number();
+                let hash = omega_block.hash();
+                ExternalMessage::MetaDataRequest(RequestBlock {
+                    from_number: num,
+                    from_hash: hash,
+                    batch_size: self.max_batch_size,
+                })
+            } else {
+                todo!("blockstore::RequestMissingChain : no metadata to request missing blocks");
+            };
+
+            tracing::info!(
+                ?message,
+                "blockstore::RequestMissingChain : requesting missing chain from {}",
+                peer.peer_id
+            );
+            self.message_sender
+                .send_external_message(peer.peer_id, message)?;
+
+            self.in_flight = Some(peer);
+        } else {
+            tracing::warn!(
+                "blockstore::RequestMissingChain : insufficient peers to request missing blocks"
+            );
+        }
+        Ok(())
+    }
+
     /// Request blocks from a hash, backwards.
     ///
     /// It will collect N blocks by following the block.parent_hash() of each requested block.
@@ -641,69 +764,6 @@ impl BlockStore {
         Ok(())
     }
 
-    /// Request blocks between the current height and the given block.
-    ///
-    /// The approach is to request blocks in batches of `max_batch_size` blocks.
-    /// If None block is provided, we request blocks from the last known canonical block forwards.
-    /// If the block gap is large, we request blocks from the last known canonical block forwards.
-    /// If the block gap is small, we request blocks from the latest block backwards.
-    pub fn request_missing_chain(&mut self, omega_block: Option<Block>) -> Result<()> {
-        // Early exit if there's a request in-flight; and if it has not expired.
-        if let Some(peer) = self.in_flight.as_ref() {
-            if peer.last_used.elapsed() > self.request_timeout {
-                tracing::warn!(
-                    "blockstore::RequestMissingChain : in-flight request {} timed out, requesting from new peer",
-                    peer.peer_id
-                );
-                self.done_with_peer(DownGrade::Timeout);
-                self.in_flight = self.get_next_peer();
-            } else {
-                return Ok(());
-            }
-        } else {
-            if self.injected > 0 {
-                tracing::warn!(
-                    "blockstore::RequestMissingChain : too many {} blocks in flight",
-                    self.injected
-                );
-                return Ok(());
-            }
-            self.in_flight = self.get_next_peer();
-            if self.in_flight.is_none() {
-                tracing::warn!("blockstore::RequestMissingChain : insufficient peers to request missing blocks");
-                return Ok(());
-            }
-        }
-
-        let message = if let Some(omega_block) = omega_block {
-            let num = omega_block.number();
-            let hash = omega_block.hash();
-            self.landmark_metadata.push(hash);
-            ExternalMessage::MetaDataRequest(RequestBlock {
-                from_number: num,
-                from_hash: hash,
-                batch_size: self.max_batch_size,
-            })
-        } else {
-            let hash = self.last_metadata.as_ref().unwrap().parent_hash;
-            self.landmark_metadata.push(hash);
-            ExternalMessage::MetaDataRequest(RequestBlock {
-                from_number: 0,
-                from_hash: hash,
-                batch_size: self.max_batch_size,
-            })
-        };
-        let peer = self.in_flight.as_ref().unwrap();
-        tracing::info!(
-            ?message,
-            "blockstore::RequestMissingChain : requesting missing chain from {}",
-            peer.peer_id
-        );
-        self.message_sender
-            .send_external_message(peer.peer_id, message)?;
-        Ok(())
-    }
-
     /// Add a peer to the list of peers.
     pub fn add_peer(&mut self, peer: PeerId) {
         // new peers should be tried last, which gives them time to sync first.

From 9ea6e41279ba9b1c6540def96cae47745a1e787e Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 3 Jan 2025 15:52:15 +0800
Subject: [PATCH 035/119] chore: clippy.

---
 zilliqa/src/blockstore.rs | 83 +++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 42 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index b4597963e..f6bfd46fa 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -5,13 +5,13 @@ use std::{
     time::{Duration, Instant},
 };
 
-use crate::crypto::Hash;
 use anyhow::Result;
 use itertools::Itertools;
 use libp2p::PeerId;
 
 use crate::{
     cfg::NodeConfig,
+    crypto::Hash,
     db::Db,
     message::{
         Block, ChainMetaData, ExternalMessage, InjectedProposal, Proposal, RequestBlock,
@@ -32,18 +32,22 @@ enum DownGrade {
 // When a Proposal is received by Consensus, we check if the parent exists in our DB.
 // If not, then it triggers a syncing algorithm.
 //
-// 1. We check if the gap between our last canonical block and the latest Proposal.
-//     a. If it is a small gap, we request for blocks, going backwards from Proposal.
-//     b. If it is a big gap, we request for blocks, going forwards from Canonical.
-// 2. When we receive a forwards history response, we check for matches against the cache.
-//    This means that for a proposal to be injected, it must be corroborated by 2 sources.
-//     a. If it matches the cached value, we inject the proposal into the pipeline.
-//     b. If it does not match, we replace the cached value and request for more.
-//     b. If it does not exist in the cache, we cache the proposal.
-// 3. When we receive a backwards history response, we inject it into the pipeline.
-//     a. If it does not line up with the existing Canonical, then it will be dropped.
+// Phase 1: Request missing chain metadata.
+// The entire chain metadata is stored in-memory, and is used to construct a chain of metadata.
+// 1. We start with the latest Proposal and request the chain of metadata from a peer.
+// 2. We construct the chain of metadata, based on the response received.
+// 3. If the last block does not exist in our canonical history, we repeat from 1.
+// 4. If the last block exists, we have hit our canonical history, we move to Phase 2.
 //
-// TODO: How to handle case where only single source of truth i.e. bootstrap node?
+// Phase 2: Request missing blocks.
+// 1. We construct a set of hashes, from the in-memory chain metadata.
+// 2. We send these block hashes to a Peer for retrieval.
+// 3. We inject the Proposals into the pipeline, when the response is received.
+// 4. If there are still missing blocks, we repeat from 1.
+// 5. If there are no more missing blocks, we are done.
+//
+// Subsequent missing Proposals are treated as a new sync algorithm.
+// Eventually, we get up to 99.9% of the chain.
 
 const GAP_THRESHOLD: usize = 5; // How big is big/small gap.
 const DO_SPECULATIVE: bool = false; // Speeds up syncing by speculatively fetching blocks, allowing it to catch up.
@@ -151,14 +155,12 @@ impl BlockStore {
             if self.p2_metadata.is_some() {
                 // Continue phase 2
                 self.request_missing_blocks()?;
+            } else if self.p1_metadata.is_none() {
+                // Start phase 1
+                self.request_missing_chain(Some(block))?;
             } else {
-                if self.p1_metadata.is_none() {
-                    // Start phase 1
-                    self.request_missing_chain(Some(block))?;
-                } else {
-                    // Continue phase 1
-                    self.request_missing_chain(None)?;
-                }
+                // Continue phase 1
+                self.request_missing_chain(None)?;
             }
             return Ok(());
         }
@@ -298,20 +300,18 @@ impl BlockStore {
             } else {
                 return Ok(());
             }
-        } else {
-            if self.p2_metadata.is_none() {
-                tracing::warn!(
-                    "blockstore::RequestMissingBlocks : no metadata to request missing blocks"
-                );
-                return Ok(());
-            }
+        } else if self.p2_metadata.is_none() {
+            tracing::warn!(
+                "blockstore::RequestMissingBlocks : no metadata to request missing blocks"
+            );
+            return Ok(());
         }
 
         if let Some(peer) = self.get_next_peer() {
             // If we have no landmarks, we have nothing to do
             self.p2_metadata = None;
-            if let Some(mut hash) = self.landmarks.pop() {
-                self.landmarks.push(hash); // we actually need to peek() at the last element
+            if let Some(hash) = self.landmarks.last() {
+                let mut hash = *hash; // peek at the last value
                 let mut request_hashes = Vec::with_capacity(self.max_batch_size);
                 while let Some(meta) = self.chain_metadata.remove(&hash) {
                     request_hashes.push(meta.block_hash);
@@ -353,12 +353,6 @@ impl BlockStore {
         from: PeerId,
         response: Vec<ChainMetaData>,
     ) -> Result<()> {
-        tracing::info!(
-            "blockstore::MetadataResponse : received {} metadata from {}",
-            response.len(),
-            from
-        );
-
         // Process whatever we have received.
         if response.is_empty() {
             // Empty response, downgrade peer
@@ -386,6 +380,13 @@ impl BlockStore {
         self.landmarks
             .push(metadata.first().as_ref().unwrap().block_hash);
 
+        tracing::info!(
+            "blockstore::MetadataResponse : received {} metadata set #{} from {}",
+            metadata.len(),
+            self.landmarks.len(),
+            from
+        );
+
         // Store the metadata
         for meta in metadata {
             // TODO: Check the linkage of the returned chain
@@ -458,14 +459,12 @@ impl BlockStore {
             } else {
                 return Ok(());
             }
-        } else {
-            if self.injected > 0 {
-                tracing::warn!(
-                    "blockstore::RequestMissingChain : too many {} blocks in flight",
-                    self.injected
-                );
-                return Ok(());
-            }
+        } else if self.injected > 0 {
+            tracing::warn!(
+                "blockstore::RequestMissingChain : too many {} blocks in flight",
+                self.injected
+            );
+            return Ok(());
         }
 
         if let Some(peer) = self.get_next_peer() {

From c4c89febd6abcfdcfbbd85997b9d4a18f149c1b2 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Sat, 4 Jan 2025 11:53:16 +0800
Subject: [PATCH 036/119] feat: sync phase#3 - zip it up. works for syncing new
 nodes.

---
 zilliqa/src/blockstore.rs | 111 ++++++++++++++++++++++++--------------
 zilliqa/src/consensus.rs  |   1 -
 zilliqa/src/node.rs       |   8 +--
 3 files changed, 76 insertions(+), 44 deletions(-)

diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/blockstore.rs
index f6bfd46fa..a6de12d5c 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/blockstore.rs
@@ -1,6 +1,6 @@
 use std::{
     cmp::Ordering,
-    collections::{BTreeMap, BinaryHeap, HashMap},
+    collections::{BTreeMap, BinaryHeap, HashMap, VecDeque},
     sync::Arc,
     time::{Duration, Instant},
 };
@@ -40,16 +40,21 @@ enum DownGrade {
 // 4. If the last block exists, we have hit our canonical history, we move to Phase 2.
 //
 // Phase 2: Request missing blocks.
+// Once the chain metadata is constructed, we request the missing blocks to replay the history.
 // 1. We construct a set of hashes, from the in-memory chain metadata.
 // 2. We send these block hashes to a Peer for retrieval.
 // 3. We inject the Proposals into the pipeline, when the response is received.
 // 4. If there are still missing blocks, we repeat from 1.
-// 5. If there are no more missing blocks, we are done.
+// 5. If there are no more missing blocks, we are done, ready for Phase 3.
 //
-// Subsequent missing Proposals are treated as a new sync algorithm.
-// Eventually, we get up to 99.9% of the chain.
-
-const GAP_THRESHOLD: usize = 5; // How big is big/small gap.
+// Phase 3: Zip it up.
+// Phase 1 & 2 brings up to 99% of the chain. This step closes the last gap.
+// 1. We queue all newly received Proposals, while Phase 1 & 2 were in progress.
+// 2. We check the head of the queue if it's parent exists in our canonical history.
+// 3. If it does not, we trigger Phase 1.
+// 4. If it does, we inject the entire queue into the pipeline. We are done.
+
+const GAP_THRESHOLD: usize = 10; // How big is big/small gap.
 const DO_SPECULATIVE: bool = false; // Speeds up syncing by speculatively fetching blocks, allowing it to catch up.
 
 #[derive(Debug)]
@@ -81,6 +86,7 @@ pub struct BlockStore {
     p1_metadata: Option<ChainMetaData>,
     p2_metadata: Option<ChainMetaData>,
     landmarks: Vec<Hash>,
+    zip_queue: VecDeque<Proposal>,
 }
 
 impl BlockStore {
@@ -116,6 +122,7 @@ impl BlockStore {
             p1_metadata: None,
             landmarks: Vec::new(),
             p2_metadata: None,
+            zip_queue: VecDeque::with_capacity(GAP_THRESHOLD),
         })
     }
 
@@ -139,30 +146,55 @@ impl BlockStore {
         Ok(())
     }
 
-    /// Process a block proposal.
-    /// Checks if the parent block exists, and if not, triggers a sync.
-    pub fn process_proposal(&mut self, block: Block) -> Result<()> {
-        // ...
-        // check if block parent exists
-        let parent_block = self.db.get_block_by_hash(&block.parent_hash())?;
-
-        // no parent block, trigger sync
-        if parent_block.is_none() {
-            tracing::warn!(
-                "blockstore::ProcessProposal : Parent block {} not found",
-                block.parent_hash()
-            );
-            if self.p2_metadata.is_some() {
-                // Continue phase 2
-                self.request_missing_blocks()?;
-            } else if self.p1_metadata.is_none() {
-                // Start phase 1
-                self.request_missing_chain(Some(block))?;
+    /// Sync a block proposal.
+    ///
+    /// This is the main entry point for syncing a block proposal.
+    /// We start by enqueuing all proposals, and then check if the parent block exists in history.
+    /// If the parent block exists, we do nothing. Ttherwise, we check the oldest one in the queue.
+    /// If we find its parent in history, we inject the entire queue.
+    ///
+    /// We do not perform checks on the Proposal here. This is done in the consensus layer.
+    pub fn sync_proposal(&mut self, proposal: Proposal) -> Result<()> {
+        // just stuff the latest proposal into the fixed-size queue.
+        while self.zip_queue.len() >= GAP_THRESHOLD {
+            self.zip_queue.pop_front();
+        }
+        self.zip_queue.push_back(proposal);
+
+        // TODO: Replace with single SQL query
+        // Check if block parent exist in history
+        let parent_hash = self.zip_queue.back().unwrap().header.qc.block_hash;
+        if self.db.get_block_by_hash(&parent_hash)?.is_none() {
+            // Check if oldes block exists in the history. If it does, we have synced up 99% of the chain.
+            let ancestor_hash = self.zip_queue.front().unwrap().header.qc.block_hash;
+            if self.zip_queue.len() == 1 || self.db.get_block_by_hash(&ancestor_hash)?.is_none() {
+                // No ancestor block, trigger sync
+                tracing::warn!(
+                    "blockstore::SyncProposal : parent block {} not found",
+                    parent_hash
+                );
+                if self.p2_metadata.is_some() {
+                    // Continue phase 2
+                    self.request_missing_blocks()?;
+                } else if self.p1_metadata.is_some() {
+                    // Continue phase 1
+                    self.request_missing_chain(None)?;
+                } else {
+                    // Start phase 1
+                    self.request_missing_chain(Some(parent_hash))?;
+                }
             } else {
-                // Continue phase 1
-                self.request_missing_chain(None)?;
+                // 99% synced, zip it up!
+                tracing::info!(
+                    "blockstore::SyncProposal : zip up {} blocks from {}",
+                    self.zip_queue.len(),
+                    ancestor_hash
+                );
+                // parent block exists, inject the proposal
+                let proposals = self.zip_queue.drain(..).collect_vec();
+                self.inject_proposals(proposals)?;
+                // we're done
             }
-            return Ok(());
         }
         Ok(())
     }
@@ -201,13 +233,6 @@ impl BlockStore {
         from: PeerId,
         response: Vec<Proposal>,
     ) -> Result<()> {
-        // ...
-        tracing::info!(
-            "blockstore::MultiBlockResponse : received {} blocks from {}",
-            response.len(),
-            from
-        );
-
         // Process whatever we received
         if response.is_empty() {
             // Empty response, downgrade peer
@@ -221,11 +246,19 @@ impl BlockStore {
             self.done_with_peer(DownGrade::None);
         }
 
+        // Sort proposals by number, ascending
         let proposals = response
             .into_iter()
             .sorted_by_key(|p| p.number())
             .collect_vec();
 
+        tracing::info!(
+            "blockstore::MultiBlockResponse : received {} blocks for set #{} from {}",
+            proposals.len(),
+            self.landmarks.len(),
+            from
+        );
+
         if let Some(landmark) = self.landmarks.pop() {
             // remove the last landmark, should match proposals.last()
             let hash = proposals.last().as_ref().unwrap().hash();
@@ -447,7 +480,7 @@ impl BlockStore {
     /// This constructs a chain history by requesting blocks from a peer, going backwards from a given block.
     /// If phase 1 is in progress, it continues requesting blocks from the last known phase 1 block.
     /// Otherwise, it requests blocks from the given omega_block.
-    pub fn request_missing_chain(&mut self, omega_block: Option<Block>) -> Result<()> {
+    pub fn request_missing_chain(&mut self, parent_hash: Option<Hash>) -> Result<()> {
         // Early exit if there's a request in-flight; and if it has not expired.
         if let Some(peer) = self.in_flight.as_ref() {
             if peer.last_used.elapsed() > self.request_timeout {
@@ -474,11 +507,9 @@ impl BlockStore {
                     from_hash: meta.parent_hash,
                     batch_size: self.max_batch_size,
                 })
-            } else if let Some(omega_block) = omega_block {
-                let num = omega_block.number();
-                let hash = omega_block.hash();
+            } else if let Some(hash) = parent_hash {
                 ExternalMessage::MetaDataRequest(RequestBlock {
-                    from_number: num,
+                    from_number: 0,
                     from_hash: hash,
                     batch_size: self.max_batch_size,
                 })
diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index e1f5db700..3ffc64204 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -651,7 +651,6 @@ impl Consensus {
         );
 
         // FIXME: Cleanup
-        self.blockstore.process_proposal(block.clone())?;
 
         if self.block_store.contains_block(&block.hash())? {
             trace!("ignoring block proposal, block store contains this block already");
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 816d9a5f4..6ba0dc917 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -376,8 +376,8 @@ impl Node {
                 self.request_responses
                     .send((response_channel, ExternalMessage::Acknowledgement))?;
             }
-            _ => {
-                warn!("unexpected message type");
+            msg => {
+                warn!(%msg, "unexpected message type");
             }
         }
 
@@ -968,7 +968,7 @@ impl Node {
     }
 
     fn handle_proposal(&mut self, from: PeerId, proposal: Proposal) -> Result<()> {
-        if let Some((to, message)) = self.consensus.proposal(from, proposal, false)? {
+        if let Some((to, message)) = self.consensus.proposal(from, proposal.clone(), false)? {
             self.reset_timeout
                 .send(self.config.consensus.consensus_timeout)?;
             if let Some(to) = to {
@@ -976,6 +976,8 @@ impl Node {
             } else {
                 self.message_sender.broadcast_proposal(message)?;
             }
+        } else {
+            self.consensus.blockstore.sync_proposal(proposal)?; // proposal is already verified
         }
 
         Ok(())

From 39d2cd43fd59d4fefb81b9536a9bf0d3e6ebac31 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 6 Jan 2025 08:56:30 +0800
Subject: [PATCH 037/119] feat: rename blockstore.rs to sync.rs - makes clear
 that its job is to sync.

---
 zilliqa/src/consensus.rs               |   8 +-
 zilliqa/src/lib.rs                     |   2 +-
 zilliqa/src/node.rs                    | 125 ++++---------------------
 zilliqa/src/{blockstore.rs => sync.rs} |   4 +-
 4 files changed, 25 insertions(+), 114 deletions(-)
 rename zilliqa/src/{blockstore.rs => sync.rs} (99%)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index 3ffc64204..cca2c722e 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -22,7 +22,6 @@ use tracing::*;
 use crate::{
     block_store::BlockStore,
     blockhooks,
-    blockstore::BlockStore as BlockStore2,
     cfg::{ConsensusConfig, NodeConfig},
     constants::TIME_TO_ALLOW_PROPOSAL_BROADCAST,
     contracts,
@@ -39,6 +38,7 @@ use crate::{
     pool::{TransactionPool, TxAddResult, TxPoolContent},
     range_map::RangeMap,
     state::State,
+    sync::Sync,
     time::SystemTime,
     transaction::{EvmGas, SignedTransaction, TransactionReceipt, VerifiedTransaction},
 };
@@ -152,7 +152,7 @@ pub struct Consensus {
     config: NodeConfig,
     message_sender: MessageSender,
     reset_timeout: UnboundedSender<Duration>,
-    pub blockstore: BlockStore2,
+    pub sync: Sync,
     pub block_store: BlockStore,
     latest_leader_cache: RefCell<Option<CachedLeader>>,
     votes: BTreeMap<Hash, BlockVotes>,
@@ -208,7 +208,7 @@ impl Consensus {
             )?;
         }
 
-        let blockstore = BlockStore2::new(&config, db.clone(), message_sender.clone(), Vec::new())?;
+        let sync = Sync::new(&config, db.clone(), message_sender.clone(), Vec::new())?;
 
         // It is important to create the `BlockStore` after the checkpoint has been loaded into the DB. The
         // `BlockStore` pre-loads and caches information about the currently stored blocks.
@@ -328,7 +328,7 @@ impl Consensus {
         let mut consensus = Consensus {
             secret_key,
             config,
-            blockstore,
+            sync,
             block_store,
             latest_leader_cache: RefCell::new(None),
             message_sender,
diff --git a/zilliqa/src/lib.rs b/zilliqa/src/lib.rs
index 28445f822..bbb360644 100644
--- a/zilliqa/src/lib.rs
+++ b/zilliqa/src/lib.rs
@@ -1,7 +1,6 @@
 pub mod api;
 pub mod block_store;
 mod blockhooks;
-pub mod blockstore;
 pub mod cfg;
 pub mod consensus;
 pub mod constants;
@@ -25,6 +24,7 @@ mod scilla_proto;
 pub mod serde_util;
 pub mod state;
 pub mod test_util;
+pub mod sync;
 pub mod time;
 pub mod transaction;
 pub mod zq1_proto;
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 6ba0dc917..b8d2f535f 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -226,10 +226,10 @@ impl Node {
                 }
             }
             ExternalMessage::AddPeer => {
-                self.consensus.blockstore.add_peer(from);
+                self.consensus.sync.add_peer(from);
             }
             ExternalMessage::RemovePeer => {
-                self.consensus.blockstore.remove_peer(from);
+                self.consensus.sync.remove_peer(from);
             }
             // `Proposals` are re-routed to `handle_request()`
             _ => {
@@ -276,91 +276,14 @@ impl Node {
             ExternalMessage::MultiBlockRequest(request) => {
                 let message = self
                     .consensus
-                    .blockstore
+                    .sync
                     .handle_multiblock_request(from, request)?;
                 self.request_responses.send((response_channel, message))?;
             }
             ExternalMessage::MetaDataRequest(request) => {
-                let message = self
-                    .consensus
-                    .blockstore
-                    .handle_metadata_request(from, request)?;
-                self.request_responses.send((response_channel, message))?;
-            }
-            ExternalMessage::RequestFromNumber(request) => {
-                let message = self
-                    .consensus
-                    .blockstore
-                    .handle_request_from_number(from, request)?;
+                let message = self.consensus.sync.handle_metadata_request(from, request)?;
                 self.request_responses.send((response_channel, message))?;
             }
-            ExternalMessage::RequestFromHash(request) => {
-                let message = self
-                    .consensus
-                    .blockstore
-                    .handle_request_from_hash(from, request)?;
-                self.request_responses.send((response_channel, message))?;
-            }
-            // Respond negatively to old BlockRequests.
-            ExternalMessage::BlockRequest(request) => {
-                self.request_responses.send((
-                    response_channel,
-                    ExternalMessage::BlockResponse(BlockResponse {
-                        proposals: vec![],
-                        from_view: request.from_view,
-                        availability: None,
-                    }),
-                ))?;
-                return Ok(());
-
-                // if from == self.peer_id {
-                //     debug!("block_store::BlockRequest : ignoring blocks request to self");
-                //     return Ok(());
-                // }
-
-                // trace!(
-                //     "block_store::BlockRequest : received a block request - {}",
-                //     self.peer_id
-                // );
-                // // Note that it is very important that we limit this by number of blocks
-                // // returned, _not_ by max view range returned. If we don't, then any
-                // // view gap larger than block_request_limit will never be filliable
-                // // because no node will ever be prepared to return the block after it.
-                // let proposals: Vec<Proposal> = (request.from_view..=request.to_view)
-                //     .take(self.config.block_request_limit)
-                //     .filter_map(|view| {
-                //         self.consensus
-                //             .get_block_by_view(view)
-                //             .transpose()
-                //             .map(|block| Ok(self.block_to_proposal(block?)))
-                //     })
-                //     .collect::<Result<_>>()?;
-
-                // let availability = self.consensus.block_store.availability()?;
-                // trace!("block_store::BlockRequest - responding to new blocks request {id:?} from {from:?} of {request:?} with props {0:?} availability {availability:?}",
-                //        proposals.iter().fold("".to_string(), |state, x| format!("{},{}", state, x.header.view)));
-
-                // // Send the response to this block request.
-                // self.request_responses.send((
-                //     response_channel,
-                //     ExternalMessage::BlockResponse(BlockResponse {
-                //         proposals,
-                //         from_view: request.from_view,
-                //         availability,
-                //     }),
-                // ))?;
-            }
-            // We don't usually expect a [BlockResponse] to be received as a request, however this can occur when our
-            // [BlockStore] has re-sent a previously unusable block because we didn't (yet) have the block's parent.
-            // Having knowledge of this here breaks our abstraction boundaries slightly, but it also keeps things
-            // simple.
-            ExternalMessage::BlockResponse(m) => {
-                self.handle_block_response(from, m)?;
-                // Acknowledge this block response. This does nothing because the `BlockResponse` request was sent by
-                // us, but we keep it here for symmetry with the other handlers.
-                self.request_responses
-                    .send((response_channel, ExternalMessage::Acknowledgement))?;
-            }
             // This just breaks down group block messages into individual messages to stop them blocking threads
             // for long periods.
             ExternalMessage::InjectedProposal(p) => {
@@ -397,30 +320,18 @@ impl Node {
     pub fn handle_response(&mut self, from: PeerId, message: ExternalMessage) -> Result<()> {
         debug!(%from, to = %self.peer_id, %message, "handling response");
         match message {
-            ExternalMessage::MultiBlockResponse(response) => {
-                self.consensus
-                    .blockstore
-                    .handle_multiblock_response(from, response)?;
-            }
-            ExternalMessage::MetaDataResponse(response) => {
-                self.consensus
-                    .blockstore
-                    .handle_metadata_response(from, response)?;
-            }
-            ExternalMessage::ResponseFromNumber(response) => {
-                self.consensus
-                    .blockstore
-                    .handle_response_from_number(from, response)?;
-            }
-            ExternalMessage::ResponseFromHash(response) => {
-                self.consensus
-                    .blockstore
-                    .handle_response_from_hash(from, response)?;
-            }
-            ExternalMessage::BlockResponse(m) => self.handle_block_response(from, m)?,
+            ExternalMessage::MultiBlockResponse(response) => self
+                .consensus
+                .sync
+                .handle_multiblock_response(from, response)?,
+
+            ExternalMessage::MetaDataResponse(response) => self
+                .consensus
+                .sync
+                .handle_metadata_response(from, response)?,
             ExternalMessage::Acknowledgement => {}
-            _ => {
-                warn!("unexpected message type");
+            msg => {
+                warn!(%msg, "unexpected message type");
             }
         }
 
@@ -977,13 +888,13 @@ impl Node {
                 self.message_sender.broadcast_proposal(message)?;
             }
         } else {
-            self.consensus.blockstore.sync_proposal(proposal)?; // proposal is already verified
+            self.consensus.sync.sync_proposal(proposal)?; // proposal is already verified
         }
 
         Ok(())
     }
 
-    fn handle_block_response(&mut self, from: PeerId, response: BlockResponse) -> Result<()> {
+    fn _handle_block_response(&mut self, from: PeerId, response: BlockResponse) -> Result<()> {
         trace!(
             "block_store::handle_block_response - received blocks response of length {}",
             response.proposals.len()
@@ -1009,7 +920,7 @@ impl Node {
             return Ok(());
         }
         trace!("Handling proposal for view {0}", req.block.header.view);
-        self.consensus.blockstore.mark_received_proposal(&req)?;
+        self.consensus.sync.mark_received_proposal(&req)?;
         let proposal = self.consensus.receive_block(from, req.block)?;
         if let Some(proposal) = proposal {
             trace!(
diff --git a/zilliqa/src/blockstore.rs b/zilliqa/src/sync.rs
similarity index 99%
rename from zilliqa/src/blockstore.rs
rename to zilliqa/src/sync.rs
index a6de12d5c..5a88e2e8e 100644
--- a/zilliqa/src/blockstore.rs
+++ b/zilliqa/src/sync.rs
@@ -58,7 +58,7 @@ const GAP_THRESHOLD: usize = 10; // How big is big/small gap.
 const DO_SPECULATIVE: bool = false; // Speeds up syncing by speculatively fetching blocks, allowing it to catch up.
 
 #[derive(Debug)]
-pub struct BlockStore {
+pub struct Sync {
     // database
     db: Arc<Db>,
     // message bus
@@ -89,7 +89,7 @@ pub struct BlockStore {
     zip_queue: VecDeque<Proposal>,
 }
 
-impl BlockStore {
+impl Sync {
     pub fn new(
         config: &NodeConfig,
         db: Arc<Db>,

From 6ccc7ca7d592c67c5a67253486019a4b640eb875 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 6 Jan 2025 09:34:44 +0800
Subject: [PATCH 038/119] nit: minor refactor - removing previous strategy.

---
 zilliqa/src/sync.rs | 313 ++++++++------------------------------------
 1 file changed, 52 insertions(+), 261 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 5a88e2e8e..decee9fb8 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -1,6 +1,6 @@
 use std::{
     cmp::Ordering,
-    collections::{BTreeMap, BinaryHeap, HashMap, VecDeque},
+    collections::{BTreeMap, BinaryHeap, VecDeque},
     sync::Arc,
     time::{Duration, Instant},
 };
@@ -13,10 +13,7 @@ use crate::{
     cfg::NodeConfig,
     crypto::Hash,
     db::Db,
-    message::{
-        Block, ChainMetaData, ExternalMessage, InjectedProposal, Proposal, RequestBlock,
-        ResponseBlock,
-    },
+    message::{Block, ChainMetaData, ExternalMessage, InjectedProposal, Proposal, RequestBlock},
     node::MessageSender,
 };
 
@@ -32,30 +29,33 @@ enum DownGrade {
 // When a Proposal is received by Consensus, we check if the parent exists in our DB.
 // If not, then it triggers a syncing algorithm.
 //
-// Phase 1: Request missing chain metadata.
+// PHASE 1: Request missing chain metadata.
 // The entire chain metadata is stored in-memory, and is used to construct a chain of metadata.
 // 1. We start with the latest Proposal and request the chain of metadata from a peer.
 // 2. We construct the chain of metadata, based on the response received.
-// 3. If the last block does not exist in our canonical history, we repeat from 1.
-// 4. If the last block exists, we have hit our canonical history, we move to Phase 2.
+// 3. If the last block does not exist in our canonical history, we request for additional metadata.
+// 4. If the last block exists, we have hit our canonical history.
+// 5. Move to Phase 2.
 //
-// Phase 2: Request missing blocks.
+// PHASE 2: Request missing blocks.
 // Once the chain metadata is constructed, we request the missing blocks to replay the history.
 // 1. We construct a set of hashes, from the in-memory chain metadata.
-// 2. We send these block hashes to a Peer for retrieval.
+// 2. We send these block hashes to the same Peer (that sent the metadata) for retrieval.
 // 3. We inject the Proposals into the pipeline, when the response is received.
-// 4. If there are still missing blocks, we repeat from 1.
-// 5. If there are no more missing blocks, we are done, ready for Phase 3.
+// 4. If there are still missing blocks, we ask for more, from 1.
+// 5. If there are no more missing blocks, we have filled up all blocks from the chain metadata.
+// 6. Ready for Phase 3.
 //
-// Phase 3: Zip it up.
-// Phase 1 & 2 brings up to 99% of the chain. This step closes the last gap.
+// PHASE 3: Zip it up.
+// Phase 1&2 may run several times that brings up 99% of the chain. This closes the final gap.
 // 1. We queue all newly received Proposals, while Phase 1 & 2 were in progress.
-// 2. We check the head of the queue if it's parent exists in our canonical history.
-// 3. If it does not, we trigger Phase 1.
-// 4. If it does, we inject the entire queue into the pipeline. We are done.
+// 2. We check the head of the queue if its parent exists in our canonical history.
+// 3. If it does not, we trigger Phase 1&2.
+// 4. If it does, we inject the entire queue into the pipeline.
+// 5. We are caught up.
 
-const GAP_THRESHOLD: usize = 10; // How big is big/small gap.
-const DO_SPECULATIVE: bool = false; // Speeds up syncing by speculatively fetching blocks, allowing it to catch up.
+const GAP_THRESHOLD: usize = 20; // Size of internal Proposal cache.
+const DO_SPECULATIVE: bool = false; // Speeds up syncing by speculatively fetching blocks.
 
 #[derive(Debug)]
 pub struct Sync {
@@ -77,9 +77,6 @@ pub struct Sync {
     peer_id: PeerId,
     // how many injected proposals
     injected: usize,
-    // cache
-    cache: HashMap<u64, Proposal>,
-    latest_block: Option<Block>,
 
     // Chain metadata
     chain_metadata: BTreeMap<Hash, ChainMetaData>,
@@ -116,8 +113,6 @@ impl Sync {
             max_blocks_in_flight: config.max_blocks_in_flight.min(3600), // cap to 1-hr worth of blocks
             in_flight: None,
             injected: 0,
-            cache: HashMap::new(),
-            latest_block: None,
             chain_metadata: BTreeMap::new(),
             p1_metadata: None,
             landmarks: Vec::new(),
@@ -132,14 +127,14 @@ impl Sync {
     pub fn mark_received_proposal(&mut self, prop: &InjectedProposal) -> Result<()> {
         if prop.from != self.peer_id {
             tracing::error!(
-                "blockstore::MarkReceivedProposal : foreign InjectedProposal from {}",
+                "sync::MarkReceivedProposal : foreign InjectedProposal from {}",
                 prop.from
             );
         }
-        if let Some(p) = self.cache.remove(&prop.block.number()) {
+        if let Some(p) = self.chain_metadata.remove(&prop.block.hash()) {
             tracing::warn!(
-                "blockstore::MarkReceivedProposal : removing stale cache proposal {}",
-                p.number()
+                "sync::MarkReceivedProposal : removing stale metadata {}",
+                p.block_hash
             );
         }
         self.injected = self.injected.saturating_sub(1);
@@ -170,7 +165,7 @@ impl Sync {
             if self.zip_queue.len() == 1 || self.db.get_block_by_hash(&ancestor_hash)?.is_none() {
                 // No ancestor block, trigger sync
                 tracing::warn!(
-                    "blockstore::SyncProposal : parent block {} not found",
+                    "sync::SyncProposal : parent block {} not found",
                     parent_hash
                 );
                 if self.p2_metadata.is_some() {
@@ -186,7 +181,7 @@ impl Sync {
             } else {
                 // 99% synced, zip it up!
                 tracing::info!(
-                    "blockstore::SyncProposal : zip up {} blocks from {}",
+                    "sync::SyncProposal : zip up {} blocks from {}",
                     self.zip_queue.len(),
                     ancestor_hash
                 );
@@ -236,11 +231,12 @@ impl Sync {
         // Process whatever we received
         if response.is_empty() {
             // Empty response, downgrade peer
-            tracing::warn!("blockstore::MultiBlockResponse : empty blocks {from}",);
+            tracing::warn!("sync::MultiBlockResponse : empty blocks {from}",);
             self.done_with_peer(DownGrade::Empty);
         } else if response.len() < self.max_batch_size {
             // Partial response, downgrade peer
-            tracing::warn!("blockstore::MultiBlockResponse : partial blocks {from}",);
+            // TODO: Match against request numbers
+            tracing::warn!("sync::MultiBlockResponse : partial blocks {from}",);
             self.done_with_peer(DownGrade::Partial);
         } else {
             self.done_with_peer(DownGrade::None);
@@ -253,7 +249,7 @@ impl Sync {
             .collect_vec();
 
         tracing::info!(
-            "blockstore::MultiBlockResponse : received {} blocks for set #{} from {}",
+            "sync::MultiBlockResponse : received {} blocks for set #{} from {}",
             proposals.len(),
             self.landmarks.len(),
             from
@@ -264,7 +260,7 @@ impl Sync {
             let hash = proposals.last().as_ref().unwrap().hash();
             if hash != landmark {
                 tracing::warn!(
-                    "blockstore::MultiBlockResponse : mismatched landmark {} != {}",
+                    "sync::MultiBlockResponse : mismatched landmark {} != {}",
                     landmark,
                     hash,
                 );
@@ -283,7 +279,7 @@ impl Sync {
         if self.landmarks.is_empty() {
             self.p1_metadata = None;
             self.chain_metadata.clear();
-        } else if DO_SPECULATIVE {
+        } else if DO_SPECULATIVE && self.injected < self.max_blocks_in_flight {
             // Speculatively request more blocks
             self.request_missing_blocks()?;
         }
@@ -298,7 +294,7 @@ impl Sync {
     ) -> Result<ExternalMessage> {
         // ...
         tracing::info!(
-            "blockstore::MultiBlockRequest : received a {} multiblock request from {}",
+            "sync::MultiBlockRequest : received a {} multiblock request from {}",
             request.len(),
             from
         );
@@ -326,7 +322,7 @@ impl Sync {
         if let Some(peer) = self.in_flight.as_ref() {
             if peer.last_used.elapsed() > self.request_timeout {
                 tracing::warn!(
-                    "blockstore::RequestMissingBlocks : in-flight request {} timed out, requesting from new peer",
+                    "sync::RequestMissingBlocks : in-flight request {} timed out, requesting from new peer",
                     peer.peer_id
                 );
                 self.done_with_peer(DownGrade::Timeout);
@@ -335,11 +331,12 @@ impl Sync {
             }
         } else if self.p2_metadata.is_none() {
             tracing::warn!(
-                "blockstore::RequestMissingBlocks : no metadata to request missing blocks"
+                "sync::RequestMissingBlocks : no metadata to request missing blocks"
             );
             return Ok(());
         }
 
+        // TODO: Use original peer, which would have the set of blocks
         if let Some(peer) = self.get_next_peer() {
             // If we have no landmarks, we have nothing to do
             self.p2_metadata = None;
@@ -356,7 +353,7 @@ impl Sync {
 
                 // Fire request
                 tracing::debug!(
-                    "blockstore::RequestMissingBlocks : requesting {} blocks of set #{}",
+                    "sync::RequestMissingBlocks : requesting {} blocks of set #{}",
                     request_hashes.len(),
                     self.landmarks.len(),
                 );
@@ -371,7 +368,7 @@ impl Sync {
             }
         } else {
             tracing::warn!(
-                "blockstore::RequestMissingBlocks : insufficient peers to request missing blocks"
+                "sync::RequestMissingBlocks : insufficient peers to request missing blocks"
             );
         }
         Ok(())
@@ -389,17 +386,19 @@ impl Sync {
         // Process whatever we have received.
         if response.is_empty() {
             // Empty response, downgrade peer
-            tracing::warn!("blockstore::MetadataResponse : empty blocks {from}",);
+            tracing::warn!("sync::MetadataResponse : empty blocks {from}",);
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
         } else if response.len() < self.max_batch_size {
             // Partial response, downgrade peer
-            tracing::warn!("blockstore::MetadataResponse : partial blocks {from}",);
+            tracing::warn!("sync::MetadataResponse : partial blocks {from}",);
             self.done_with_peer(DownGrade::Partial);
         } else {
             self.done_with_peer(DownGrade::None);
         }
 
+        // TODO: Check the linkage of the returned chain
+
         // Sort metadata by number, reversed
         let metadata = response
             .into_iter()
@@ -410,11 +409,13 @@ impl Sync {
         let last_hash = p1_metadata.block_hash;
         self.p1_metadata = Some(p1_metadata);
 
+        // TODO: Store peer id.
+        // TODO: Insert intermediate landmarks
         self.landmarks
             .push(metadata.first().as_ref().unwrap().block_hash);
 
         tracing::info!(
-            "blockstore::MetadataResponse : received {} metadata set #{} from {}",
+            "sync::MetadataResponse : received {} metadata set #{} from {}",
             metadata.len(),
             self.landmarks.len(),
             from
@@ -422,7 +423,6 @@ impl Sync {
 
         // Store the metadata
         for meta in metadata {
-            // TODO: Check the linkage of the returned chain
             self.chain_metadata.insert(meta.block_hash, meta);
         }
 
@@ -448,7 +448,7 @@ impl Sync {
         request: RequestBlock,
     ) -> Result<ExternalMessage> {
         tracing::info!(
-            "blockstore::MetadataRequest : received a metadata request from {}",
+            "sync::MetadataRequest : received a metadata request from {}",
             from
         );
 
@@ -470,7 +470,7 @@ impl Sync {
         let message = ExternalMessage::MetaDataResponse(metas);
         tracing::trace!(
             ?message,
-            "blockstore::MetadataFromHash : responding to block request"
+            "sync::MetadataFromHash : responding to block request"
         );
         Ok(message)
     }
@@ -485,7 +485,7 @@ impl Sync {
         if let Some(peer) = self.in_flight.as_ref() {
             if peer.last_used.elapsed() > self.request_timeout {
                 tracing::warn!(
-                    "blockstore::RequestMissingChain : in-flight request {} timed out, requesting from new peer",
+                    "sync::RequestMissingChain : in-flight request {} timed out, requesting from new peer",
                     peer.peer_id
                 );
                 self.done_with_peer(DownGrade::Timeout);
@@ -494,7 +494,7 @@ impl Sync {
             }
         } else if self.injected > 0 {
             tracing::warn!(
-                "blockstore::RequestMissingChain : too many {} blocks in flight",
+                "sync::RequestMissingChain : too many {} blocks in flight",
                 self.injected
             );
             return Ok(());
@@ -514,12 +514,12 @@ impl Sync {
                     batch_size: self.max_batch_size,
                 })
             } else {
-                todo!("blockstore::RequestMissingChain : no metadata to request missing blocks");
+                todo!("sync::RequestMissingChain : no metadata to request missing blocks");
             };
 
             tracing::info!(
                 ?message,
-                "blockstore::RequestMissingChain : requesting missing chain from {}",
+                "sync::RequestMissingChain : requesting missing chain from {}",
                 peer.peer_id
             );
             self.message_sender
@@ -528,85 +528,12 @@ impl Sync {
             self.in_flight = Some(peer);
         } else {
             tracing::warn!(
-                "blockstore::RequestMissingChain : insufficient peers to request missing blocks"
+                "sync::RequestMissingChain : insufficient peers to request missing blocks"
             );
         }
         Ok(())
     }
 
-    /// Request blocks from a hash, backwards.
-    ///
-    /// It will collect N blocks by following the block.parent_hash() of each requested block.
-    pub fn handle_request_from_hash(
-        &mut self,
-        from: PeerId,
-        request: RequestBlock,
-    ) -> Result<ExternalMessage> {
-        tracing::debug!(
-            "blockstore::RequestFromHash : received a block request from {}",
-            from
-        );
-
-        // TODO: Check if we should service this request
-        // Validators could respond to this request if there is nothing else to do.
-
-        let batch_size = self.max_batch_size.min(request.batch_size); // mitigate DOS by limiting the number of blocks we return
-        let mut proposals = Vec::with_capacity(batch_size);
-        let mut hash = request.from_hash;
-        while proposals.len() < batch_size {
-            // grab the parent
-            let Some(block) = self.db.get_block_by_hash(&hash)? else {
-                // that's all we have!
-                break;
-            };
-            hash = block.parent_hash();
-            proposals.push(self.block_to_proposal(block));
-        }
-
-        let message = ExternalMessage::ResponseFromHash(ResponseBlock { proposals });
-        tracing::trace!(
-            ?message,
-            "blockstore::RequestFromHash : responding to block request from height"
-        );
-        Ok(message)
-    }
-
-    /// Request for blocks from a height, forwards.
-    pub fn handle_request_from_number(
-        &mut self,
-        from: PeerId,
-        request: RequestBlock,
-    ) -> Result<ExternalMessage> {
-        // ...
-        tracing::debug!(
-            "blockstore::RequestFromNumber : received a block request from {}",
-            from
-        );
-
-        // TODO: Check if we should service this request.
-        // Validators shall not respond to this request.
-
-        // TODO: Replace this with a single SQL query
-        let batch_size = self.max_batch_size.min(request.batch_size); // mitigate DOS attacks by limiting the number of blocks we send
-        let mut proposals = Vec::with_capacity(batch_size);
-        for num in request.from_number.saturating_add(1)
-            ..=request.from_number.saturating_add(batch_size as u64)
-        {
-            let Some(block) = self.db.get_canonical_block_by_number(num)? else {
-                // that's all we have!
-                break;
-            };
-            proposals.push(self.block_to_proposal(block));
-        }
-
-        let message = ExternalMessage::ResponseFromNumber(ResponseBlock { proposals });
-        tracing::trace!(
-            ?message,
-            "blockstore::RequestFromNumber : responding to block request from height"
-        );
-        Ok(message)
-    }
-
     /// Inject the proposals into the chain.
     ///
     /// Besides pumping the set of Proposals into the processing pipeline, it also records the
@@ -617,10 +544,6 @@ impl Sync {
             return Ok(());
         }
 
-        // Store the tip
-        let (last_block, _) = proposals.last().unwrap().clone().into_parts();
-        self.latest_block = Some(last_block);
-
         // Increment proposals injected
         self.injected = self.injected.saturating_add(proposals.len());
         let len = proposals.len();
@@ -643,7 +566,7 @@ impl Sync {
         }
 
         tracing::info!(
-            "blockstore::InjectProposals : injected {}/{} proposals",
+            "sync::InjectProposals : injected {}/{} proposals",
             len,
             self.injected
         );
@@ -662,138 +585,6 @@ impl Sync {
         }
     }
 
-    pub fn handle_response_from_number(
-        &mut self,
-        from: PeerId,
-        response: ResponseBlock,
-    ) -> Result<()> {
-        // Process whatever we have received.
-        if response.proposals.is_empty() {
-            // Empty response, downgrade peer
-            tracing::warn!("blockstore::ResponseFromNumber : empty blocks {from}",);
-            self.done_with_peer(DownGrade::Empty);
-            return Ok(());
-        } else if response.proposals.len() < self.max_batch_size {
-            // Partial response, downgrade peer
-            tracing::warn!("blockstore::ResponseFromNumber : partial blocks {from}",);
-            self.done_with_peer(DownGrade::Partial);
-        } else {
-            self.done_with_peer(DownGrade::None);
-        }
-
-        tracing::info!(
-            "blockstore::ResponseFromNumber : received {} blocks from {}",
-            response.proposals.len(),
-            from
-        );
-
-        // TODO: Any additional checks we should do here?
-
-        // Sort proposals by number
-        let proposals = response
-            .proposals
-            .into_iter()
-            .sorted_by_key(|p| p.number())
-            .collect_vec();
-
-        // Insert into the cache.
-        // If current proposal matches another one in cache, from a different peer, inject the proposal.
-        // Else, replace the cached Proposal with the new one.
-        let mut corroborated_proposals = Vec::with_capacity(proposals.len());
-        let mut props = proposals.into_iter();
-
-        // Collect corroborated proposals
-        for p in props.by_ref() {
-            if let Some(proposal) = self.cache.remove(&p.number()) {
-                // If the proposal already exists
-                if proposal.hash() == p.hash() {
-                    // is corroborated proposal
-                    corroborated_proposals.push(proposal);
-                } else {
-                    // insert the different one and;
-                    self.cache.insert(p.number(), p);
-                    break; // replace the rest in the next loop
-                }
-            } else {
-                self.cache.insert(p.number(), p);
-            }
-        }
-
-        // Replace/insert the rest of the proposals in the cache
-        for p in props {
-            self.cache.insert(p.number(), p);
-        }
-
-        // Inject matched proposals
-        self.inject_proposals(corroborated_proposals)?;
-
-        // Fire speculative request
-        if self.latest_block.is_some() && self.injected < self.max_blocks_in_flight {
-            if let Some(peer) = self.get_next_peer() {
-                // we're far from latest block
-                let message = RequestBlock {
-                    from_number: self.latest_block.as_ref().unwrap().number(),
-                    from_hash: self.latest_block.as_ref().unwrap().hash(),
-                    batch_size: self.max_batch_size,
-                };
-                tracing::info!(
-                    "blockstore::RequestMissingBlocks : speculative fetch {} blocks at {} from {}",
-                    message.batch_size,
-                    message.from_number,
-                    peer.peer_id,
-                );
-                self.message_sender.send_external_message(
-                    peer.peer_id,
-                    ExternalMessage::RequestFromNumber(message),
-                )?;
-                self.in_flight = Some(peer);
-            }
-        }
-
-        Ok(())
-    }
-
-    pub fn handle_response_from_hash(
-        &mut self,
-        from: PeerId,
-        response: ResponseBlock,
-    ) -> Result<()> {
-        // Check that we have enough to complete the process, otherwise ignore
-        if response.proposals.is_empty() {
-            // Empty response, downgrade peer, skip
-            tracing::warn!("blockstore::ResponseFromHash : empty blocks {from}",);
-            self.done_with_peer(DownGrade::Empty);
-            return Ok(());
-        } else if response.proposals.len() < GAP_THRESHOLD {
-            // Partial response, downgrade peer
-            // Skip processing because we want to ensure that we have ALL the needed blocks to sync up.
-            tracing::warn!("blockstore::ResponseFromHash : partial blocks {from}",);
-            self.done_with_peer(DownGrade::Partial);
-            return Ok(());
-        } else {
-            // only process full responses
-            self.done_with_peer(DownGrade::None);
-        }
-
-        tracing::info!(
-            "blockstore::ResponseFromHash : received {} blocks from {}",
-            response.proposals.len(),
-            from
-        );
-
-        // TODO: Any additional checks we should do here?
-        // Sort proposals by number
-        let proposals = response
-            .proposals
-            .into_iter()
-            .sorted_by_key(|p| p.number())
-            .collect_vec();
-
-        // Inject the proposals
-        self.inject_proposals(proposals)?;
-        Ok(())
-    }
-
     /// Add a peer to the list of peers.
     pub fn add_peer(&mut self, peer: PeerId) {
         // new peers should be tried last, which gives them time to sync first.

From 1ee96851a9afa429d43b9f57059020fd3b97730c Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 6 Jan 2025 10:17:52 +0800
Subject: [PATCH 039/119] feat: request multi-blocks from original meta-data
 peer.

---
 zilliqa/src/sync.rs | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index decee9fb8..7fee18b7f 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -82,7 +82,7 @@ pub struct Sync {
     chain_metadata: BTreeMap<Hash, ChainMetaData>,
     p1_metadata: Option<ChainMetaData>,
     p2_metadata: Option<ChainMetaData>,
-    landmarks: Vec<Hash>,
+    landmarks: Vec<(Hash, PeerId)>,
     zip_queue: VecDeque<Proposal>,
 }
 
@@ -255,16 +255,16 @@ impl Sync {
             from
         );
 
-        if let Some(landmark) = self.landmarks.pop() {
+        if let Some((hash, peer_id)) = self.landmarks.pop() {
             // remove the last landmark, should match proposals.last()
-            let hash = proposals.last().as_ref().unwrap().hash();
-            if hash != landmark {
+            let prop_hash = proposals.last().as_ref().unwrap().hash();
+            if hash != prop_hash {
                 tracing::warn!(
                     "sync::MultiBlockResponse : mismatched landmark {} != {}",
-                    landmark,
                     hash,
+                    prop_hash,
                 );
-                self.landmarks.push(landmark); // put it back
+                self.landmarks.push((hash, peer_id)); // put it back
             }
         }
 
@@ -329,18 +329,18 @@ impl Sync {
             } else {
                 return Ok(());
             }
+        } else if self.injected > self.max_blocks_in_flight {
+            return Ok(());
         } else if self.p2_metadata.is_none() {
-            tracing::warn!(
-                "sync::RequestMissingBlocks : no metadata to request missing blocks"
-            );
+            tracing::warn!("sync::RequestMissingBlocks : no metadata to request missing blocks");
             return Ok(());
         }
 
-        // TODO: Use original peer, which would have the set of blocks
+        // Use original peer, which should have the blocks in the metadata
         if let Some(peer) = self.get_next_peer() {
-            // If we have no landmarks, we have nothing to do
             self.p2_metadata = None;
-            if let Some(hash) = self.landmarks.last() {
+            // If we have no landmarks, we have nothing to do
+            if let Some((hash, peer_id)) = self.landmarks.last() {
                 let mut hash = *hash; // peek at the last value
                 let mut request_hashes = Vec::with_capacity(self.max_batch_size);
                 while let Some(meta) = self.chain_metadata.remove(&hash) {
@@ -358,10 +358,15 @@ impl Sync {
                     self.landmarks.len(),
                 );
                 self.message_sender.send_external_message(
-                    peer.peer_id,
+                    *peer_id,
                     ExternalMessage::MultiBlockRequest(request_hashes),
                 )?;
-                self.in_flight = Some(peer);
+                self.peers.push(peer); // reinsert peer, as we will be using a faux peer below
+                self.in_flight = Some(PeerInfo {
+                    peer_id: *peer_id,
+                    last_used: std::time::Instant::now(),
+                    score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
+                });
             } else {
                 // No more landmarks, we're done
                 self.peers.push(peer);
@@ -412,7 +417,7 @@ impl Sync {
         // TODO: Store peer id.
         // TODO: Insert intermediate landmarks
         self.landmarks
-            .push(metadata.first().as_ref().unwrap().block_hash);
+            .push((metadata.first().as_ref().unwrap().block_hash, from));
 
         tracing::info!(
             "sync::MetadataResponse : received {} metadata set #{} from {}",
@@ -581,7 +586,10 @@ impl Sync {
             peer.score = peer.score.saturating_add(downgrade as u32);
             // Ensure that the next peer is equal or better, to avoid a single source of truth.
             peer.score = peer.score.max(self.peers.peek().unwrap().score);
-            self.peers.push(peer);
+            // Reinsert peers that are good
+            if peer.score < u32::MAX {
+                self.peers.push(peer);
+            }
         }
     }
 

From d016c05eaa7605912bbf5ee15ca353d757424060 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 6 Jan 2025 14:55:07 +0800
Subject: [PATCH 040/119] feat: validates the chain metadata as it is
 retrieved.

---
 zilliqa/src/sync.rs | 118 +++++++++++++++++++++++++++++++-------------
 1 file changed, 85 insertions(+), 33 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 7fee18b7f..39413c827 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -15,6 +15,7 @@ use crate::{
     db::Db,
     message::{Block, ChainMetaData, ExternalMessage, InjectedProposal, Proposal, RequestBlock},
     node::MessageSender,
+    time::SystemTime,
 };
 
 enum DownGrade {
@@ -77,12 +78,15 @@ pub struct Sync {
     peer_id: PeerId,
     // how many injected proposals
     injected: usize,
-
-    // Chain metadata
+    // complete chain metadata
     chain_metadata: BTreeMap<Hash, ChainMetaData>,
+    // phase 1 cursor
     p1_metadata: Option<ChainMetaData>,
+    // phase 2 cursor
     p2_metadata: Option<ChainMetaData>,
+    // stack of chain landmarks
     landmarks: Vec<(Hash, PeerId)>,
+    // fixed-size queue of latest proposals
     zip_queue: VecDeque<Proposal>,
 }
 
@@ -176,7 +180,8 @@ impl Sync {
                     self.request_missing_chain(None)?;
                 } else {
                     // Start phase 1
-                    self.request_missing_chain(Some(parent_hash))?;
+                    let block_number = self.zip_queue.back().unwrap().number();
+                    self.request_missing_chain(Some((parent_hash, block_number)))?;
                 }
             } else {
                 // 99% synced, zip it up!
@@ -249,12 +254,13 @@ impl Sync {
             .collect_vec();
 
         tracing::info!(
-            "sync::MultiBlockResponse : received {} blocks for set #{} from {}",
+            "sync::MultiBlockResponse : received {} blocks for segment #{} from {}",
             proposals.len(),
             self.landmarks.len(),
             from
         );
 
+        // Check that this segment is for the expected landmark
         if let Some((hash, peer_id)) = self.landmarks.pop() {
             // remove the last landmark, should match proposals.last()
             let prop_hash = proposals.last().as_ref().unwrap().hash();
@@ -287,18 +293,24 @@ impl Sync {
         Ok(())
     }
 
+    /// Returns a list of Proposals
+    ///
+    /// Given a set of block hashes, retrieve the list of proposals from its history.
+    /// Returns this list of proposals to the requestor.
     pub fn handle_multiblock_request(
         &mut self,
         from: PeerId,
         request: Vec<Hash>,
     ) -> Result<ExternalMessage> {
-        // ...
-        tracing::info!(
+        tracing::debug!(
             "sync::MultiBlockRequest : received a {} multiblock request from {}",
             request.len(),
             from
         );
 
+        // TODO: Any additional checks
+        // Validators should not respond to this, unless they are free e.g. stuck in an exponential backoff.
+
         let batch_size: usize = self.max_batch_size.min(request.len()); // mitigate DOS by limiting the number of blocks we return
         let mut proposals = Vec::with_capacity(batch_size);
         for hash in request {
@@ -336,7 +348,7 @@ impl Sync {
             return Ok(());
         }
 
-        // Use original peer, which should have the blocks in the metadata
+        // will be re-inserted below
         if let Some(peer) = self.get_next_peer() {
             self.p2_metadata = None;
             // If we have no landmarks, we have nothing to do
@@ -346,16 +358,17 @@ impl Sync {
                 while let Some(meta) = self.chain_metadata.remove(&hash) {
                     request_hashes.push(meta.block_hash);
                     hash = meta.parent_hash;
-                    // re-insert the metadata so as not to lose it
+                    // TODO: Allow retry of multi-block request
                     // self.chain_metadata.insert(hash, meta);
                     self.p2_metadata = Some(meta);
                 }
 
-                // Fire request
-                tracing::debug!(
-                    "sync::RequestMissingBlocks : requesting {} blocks of set #{}",
+                // Fire request, to the original peer that sent the segment metadata
+                tracing::info!(
+                    "sync::RequestMissingBlocks : requesting {} blocks of segment #{} from {}",
                     request_hashes.len(),
                     self.landmarks.len(),
+                    peer_id,
                 );
                 self.message_sender.send_external_message(
                     *peer_id,
@@ -402,37 +415,69 @@ impl Sync {
             self.done_with_peer(DownGrade::None);
         }
 
-        // TODO: Check the linkage of the returned chain
+        // Check the linkage of the returned chain
+        let Some(p1) = self.p1_metadata.as_ref() else {
+            tracing::error!(
+                "no way to check chain linkage from {}",
+                response.first().unwrap().block_hash
+            );
+            return Ok(());
+        };
+        let mut parent_hash = p1.parent_hash;
+        let mut parent_num = p1.block_number;
+        for meta in response.iter() {
+            // check that the block hash and number is as expected.
+            if meta.block_hash != Hash::ZERO
+                && meta.block_hash == parent_hash
+                && parent_num == meta.block_number + 1
+            {
+                parent_hash = meta.parent_hash;
+                parent_num = meta.block_number;
+            } else {
+                // if something does not match, we will retry the request with the next peer.
+                // TODO: possibly, discard and rebuild entire chain
+                tracing::error!(
+                    "sync::MetadataResponse : retry metadata history for {}",
+                    parent_hash
+                );
+                return Ok(());
+            }
+            if meta.block_hash == response.last().unwrap().block_hash {
+                break; // done, we do not check the last parent, because that's outside this segment
+            }
+        }
 
-        // Sort metadata by number, reversed
-        let metadata = response
-            .into_iter()
-            .sorted_by(|a, b| b.block_number.cmp(&a.block_number))
-            .collect_vec();
+        // Chain segment is sane
+        let segment = response;
 
-        let p1_metadata = metadata.last().unwrap().clone();
-        let last_hash = p1_metadata.block_hash;
-        self.p1_metadata = Some(p1_metadata);
+        // Record the oldest block in the chain
+        self.p1_metadata = Some(segment.last().unwrap().clone());
 
-        // TODO: Store peer id.
         // TODO: Insert intermediate landmarks
+        // Record landmark, including peer that has this set of blocks
         self.landmarks
-            .push((metadata.first().as_ref().unwrap().block_hash, from));
+            .push((segment.first().as_ref().unwrap().block_hash, from));
 
         tracing::info!(
-            "sync::MetadataResponse : received {} metadata set #{} from {}",
-            metadata.len(),
+            "sync::MetadataResponse : received {} metadata segment #{} from {}",
+            segment.len(),
             self.landmarks.len(),
             from
         );
 
-        // Store the metadata
-        for meta in metadata {
-            self.chain_metadata.insert(meta.block_hash, meta);
+        // Record the actual chain metadata
+        for meta in segment {
+            if self.chain_metadata.insert(meta.block_hash, meta).is_some() {
+                anyhow::bail!("loop in chain!"); // there is a possible loop in the chain
+            }
         }
 
-        // If the last block does not exist in our canonical history, fire the next request
-        if self.db.get_block_by_hash(&last_hash)?.is_some() {
+        // If the segment does not link to our canonical history, fire the next request
+        if self
+            .db
+            .get_block_by_hash(&self.p1_metadata.as_ref().unwrap().block_hash)?
+            .is_some()
+        {
             // Hit our internal history. Start phase 2.
             self.p2_metadata = self.p1_metadata.clone();
         } else if DO_SPECULATIVE {
@@ -452,7 +497,7 @@ impl Sync {
         from: PeerId,
         request: RequestBlock,
     ) -> Result<ExternalMessage> {
-        tracing::info!(
+        tracing::debug!(
             "sync::MetadataRequest : received a metadata request from {}",
             from
         );
@@ -485,7 +530,7 @@ impl Sync {
     /// This constructs a chain history by requesting blocks from a peer, going backwards from a given block.
     /// If phase 1 is in progress, it continues requesting blocks from the last known phase 1 block.
     /// Otherwise, it requests blocks from the given omega_block.
-    pub fn request_missing_chain(&mut self, parent_hash: Option<Hash>) -> Result<()> {
+    pub fn request_missing_chain(&mut self, block: Option<(Hash, u64)>) -> Result<()> {
         // Early exit if there's a request in-flight; and if it has not expired.
         if let Some(peer) = self.in_flight.as_ref() {
             if peer.last_used.elapsed() > self.request_timeout {
@@ -512,7 +557,14 @@ impl Sync {
                     from_hash: meta.parent_hash,
                     batch_size: self.max_batch_size,
                 })
-            } else if let Some(hash) = parent_hash {
+            } else if let Some((hash, number)) = block {
+                // insert the starting point for phase 1
+                self.p1_metadata = Some(ChainMetaData {
+                    block_hash: Hash::ZERO, // invalid block hash
+                    block_number: number,
+                    parent_hash: hash,
+                    block_timestamp: SystemTime::UNIX_EPOCH,
+                });
                 ExternalMessage::MetaDataRequest(RequestBlock {
                     from_number: 0,
                     from_hash: hash,
@@ -570,7 +622,7 @@ impl Sync {
             )?;
         }
 
-        tracing::info!(
+        tracing::debug!(
             "sync::InjectProposals : injected {}/{} proposals",
             len,
             self.injected

From d777a8a6272c8de9aeb0bec91899cc7ff70bb913 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 6 Jan 2025 16:17:30 +0800
Subject: [PATCH 041/119] chore: minor cleanup.

---
 zilliqa/src/message.rs | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index 15ff364aa..26720461d 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -234,11 +234,6 @@ pub struct RequestBlock {
     pub batch_size: usize,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ResponseBlock {
-    pub proposals: Vec<Proposal>,
-}
-
 /// Used to convey proposal processing internally, to avoid blocking threads for too long.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct InjectedProposal {
@@ -291,10 +286,6 @@ pub enum ExternalMessage {
     Acknowledgement,
     AddPeer,
     RemovePeer,
-    RequestFromNumber(RequestBlock),
-    RequestFromHash(RequestBlock),
-    ResponseFromNumber(ResponseBlock),
-    ResponseFromHash(ResponseBlock),
     InjectedProposal(InjectedProposal),
     MetaDataRequest(RequestBlock),
     MetaDataResponse(Vec<ChainMetaData>),
@@ -332,22 +323,6 @@ impl Display for ExternalMessage {
             }
             ExternalMessage::AddPeer => write!(f, "AddPeer"),
             ExternalMessage::RemovePeer => write!(f, "RemovePeer"),
-            ExternalMessage::ResponseFromNumber(r) => {
-                write!(f, "ResponseFromNumber({})", r.proposals.len())
-            }
-            ExternalMessage::ResponseFromHash(r) => {
-                write!(f, "ResponseFromHash({})", r.proposals.len())
-            }
-            ExternalMessage::RequestFromNumber(r) => {
-                write!(
-                    f,
-                    "RequestFromNumber({}, num={})",
-                    r.from_hash, r.batch_size
-                )
-            }
-            ExternalMessage::RequestFromHash(r) => {
-                write!(f, "RequestFromHash({}, num={})", r.from_hash, r.batch_size)
-            }
             ExternalMessage::Proposal(p) => write!(f, "Proposal({})", p.view()),
             ExternalMessage::Vote(v) => write!(f, "Vote({})", v.view),
             ExternalMessage::NewView(n) => write!(f, "NewView({})", n.view),

From 5c339e9abe92cdf551e384f0b16bca817e61f848 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 6 Jan 2025 17:40:00 +0800
Subject: [PATCH 042/119] feat: perform checks to ensure multi-block response
 matches multi-block request.

---
 zilliqa/src/sync.rs | 80 ++++++++++++++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 26 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 39413c827..af2f31def 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -64,7 +64,7 @@ pub struct Sync {
     db: Arc<Db>,
     // message bus
     message_sender: MessageSender,
-    // internal peers
+    // internal list of peers, maintained with add_peer/remove_peer.
     peers: BinaryHeap<PeerInfo>,
     // in-flight
     in_flight: Option<PeerInfo>,
@@ -83,7 +83,7 @@ pub struct Sync {
     // phase 1 cursor
     p1_metadata: Option<ChainMetaData>,
     // phase 2 cursor
-    p2_metadata: Option<ChainMetaData>,
+    p2_metadata: Option<Hash>,
     // stack of chain landmarks
     landmarks: Vec<(Hash, PeerId)>,
     // fixed-size queue of latest proposals
@@ -240,45 +240,65 @@ impl Sync {
             self.done_with_peer(DownGrade::Empty);
         } else if response.len() < self.max_batch_size {
             // Partial response, downgrade peer
-            // TODO: Match against request numbers
             tracing::warn!("sync::MultiBlockResponse : partial blocks {from}",);
             self.done_with_peer(DownGrade::Partial);
         } else {
             self.done_with_peer(DownGrade::None);
         }
 
-        // Sort proposals by number, ascending
-        let proposals = response
-            .into_iter()
-            .sorted_by_key(|p| p.number())
-            .collect_vec();
-
         tracing::info!(
             "sync::MultiBlockResponse : received {} blocks for segment #{} from {}",
-            proposals.len(),
+            response.len(),
             self.landmarks.len(),
             from
         );
 
-        // Check that this segment is for the expected landmark
-        if let Some((hash, peer_id)) = self.landmarks.pop() {
-            // remove the last landmark, should match proposals.last()
-            let prop_hash = proposals.last().as_ref().unwrap().hash();
-            if hash != prop_hash {
-                tracing::warn!(
-                    "sync::MultiBlockResponse : mismatched landmark {} != {}",
-                    hash,
-                    prop_hash,
-                );
-                self.landmarks.push((hash, peer_id)); // put it back
-            }
+        let Some((hash, peer_id)) = self.landmarks.last() else {
+            tracing::error!("sync::MultiBlockResponse: no more landmarks!");
+            return Ok(());
+        };
+
+        // Check that this segment is from the requested peer.
+        if *peer_id != from {
+            tracing::error!("sync::MultiBlockResponse: response received from unknown peer {from}");
+            return Ok(());
+        }
+
+        // Check that this segment starts at the expected landmark
+        let prop_hash = response.first().as_ref().unwrap().hash();
+        if *hash != prop_hash {
+            tracing::warn!(
+                "sync::MultiBlockResponse : mismatched landmark {} != {}",
+                hash,
+                prop_hash,
+            );
+            return Ok(());
+        }
+
+        // Check it matches request hashes
+        let checksum = response
+            .iter()
+            .fold(Hash::builder().with(Hash::ZERO.as_bytes()), |sum, p| {
+                sum.with(p.hash().as_bytes())
+            })
+            .finalize();
+        if self.p2_metadata.unwrap_or_else(|| Hash::ZERO) != checksum {
+            tracing::error!("sync::MultiBlockResponse : mismatch request checksum {checksum}");
+            return Ok(());
         }
 
+        // Sort proposals by number, ascending
+        let proposals = response
+            .into_iter()
+            .sorted_by_key(|p| p.number())
+            .collect_vec();
+
         // Remove the blocks from the chain metadata, if they exist
         for p in &proposals {
             self.chain_metadata.remove(&p.hash());
         }
 
+        self.landmarks.pop();
         self.inject_proposals(proposals)?;
 
         // Done with phase 2, allow phase 1 to restart.
@@ -358,11 +378,19 @@ impl Sync {
                 while let Some(meta) = self.chain_metadata.remove(&hash) {
                     request_hashes.push(meta.block_hash);
                     hash = meta.parent_hash;
-                    // TODO: Allow retry of multi-block request
-                    // self.chain_metadata.insert(hash, meta);
-                    self.p2_metadata = Some(meta);
+                    // TODO: Implement retry mechanism
+                    // self.chain_metadata.insert(hash, meta); // reinsert, for retries
                 }
 
+                // Checksum of the request hashes
+                let checksum = request_hashes
+                    .iter()
+                    .fold(Hash::builder().with(Hash::ZERO.as_bytes()), |sum, h| {
+                        sum.with(h.as_bytes())
+                    })
+                    .finalize();
+                self.p2_metadata = Some(checksum);
+
                 // Fire request, to the original peer that sent the segment metadata
                 tracing::info!(
                     "sync::RequestMissingBlocks : requesting {} blocks of segment #{} from {}",
@@ -479,7 +507,7 @@ impl Sync {
             .is_some()
         {
             // Hit our internal history. Start phase 2.
-            self.p2_metadata = self.p1_metadata.clone();
+            self.p2_metadata = Some(self.p1_metadata.as_ref().unwrap().block_hash);
         } else if DO_SPECULATIVE {
             self.request_missing_chain(None)?;
         }

From b7bc13bd84216541ab0ff2a14fcf74acb1444303 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 6 Jan 2025 17:54:15 +0800
Subject: [PATCH 043/119] feat: allow retries of request_missing_blocks().

---
 zilliqa/src/sync.rs | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index af2f31def..75c0ccbaf 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -282,20 +282,22 @@ impl Sync {
                 sum.with(p.hash().as_bytes())
             })
             .finalize();
-        if self.p2_metadata.unwrap_or_else(|| Hash::ZERO) != checksum {
+        if self.p2_metadata.unwrap_or(Hash::ZERO) != checksum {
             tracing::error!("sync::MultiBlockResponse : mismatch request checksum {checksum}");
             return Ok(());
         }
 
-        // Sort proposals by number, ascending
+        // Response seems sane.
         let proposals = response
             .into_iter()
             .sorted_by_key(|p| p.number())
             .collect_vec();
 
-        // Remove the blocks from the chain metadata, if they exist
+        // Remove the blocks from the chain metadata
         for p in &proposals {
-            self.chain_metadata.remove(&p.hash());
+            if self.chain_metadata.remove(&p.hash()).is_none() {
+                anyhow::bail!("missing chain data for proposal"); // this should never happen!
+            }
         }
 
         self.landmarks.pop();
@@ -362,6 +364,10 @@ impl Sync {
                 return Ok(());
             }
         } else if self.injected > self.max_blocks_in_flight {
+            tracing::warn!(
+                "sync::RequestMissingBlocks : too many {} blocks in flight",
+                self.injected
+            );
             return Ok(());
         } else if self.p2_metadata.is_none() {
             tracing::warn!("sync::RequestMissingBlocks : no metadata to request missing blocks");
@@ -373,13 +379,12 @@ impl Sync {
             self.p2_metadata = None;
             // If we have no landmarks, we have nothing to do
             if let Some((hash, peer_id)) = self.landmarks.last() {
-                let mut hash = *hash; // peek at the last value
                 let mut request_hashes = Vec::with_capacity(self.max_batch_size);
-                while let Some(meta) = self.chain_metadata.remove(&hash) {
+                let mut key = *hash; // start from this block
+                while let Some(meta) = self.chain_metadata.remove(&key) {
                     request_hashes.push(meta.block_hash);
-                    hash = meta.parent_hash;
-                    // TODO: Implement retry mechanism
-                    // self.chain_metadata.insert(hash, meta); // reinsert, for retries
+                    key = meta.parent_hash;
+                    self.chain_metadata.insert(meta.block_hash, meta); // reinsert, for retries
                 }
 
                 // Checksum of the request hashes

From 2c35504c0f8be8384c5a7872580a18ef0014b616 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 7 Jan 2025 15:35:44 +0800
Subject: [PATCH 044/119] feat: added ability to retry phase 1, during phase 2
 error.

---
 zilliqa/src/message.rs |   3 +-
 zilliqa/src/sync.rs    | 254 +++++++++++++++++++++++------------------
 2 files changed, 141 insertions(+), 116 deletions(-)

diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index 26720461d..4fbcbc6d5 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -229,7 +229,7 @@ impl fmt::Debug for BlockResponse {
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct RequestBlock {
-    pub from_number: u64,
+    pub request_at: SystemTime,
     pub from_hash: Hash,
     pub batch_size: usize,
 }
@@ -249,7 +249,6 @@ pub struct ChainMetaData {
     pub block_hash: Hash,
     pub parent_hash: Hash,
     pub block_number: u64,
-    pub block_timestamp: SystemTime,
 }
 
 /// Used to convey proposal processing internally, to avoid blocking threads for too long.
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 75c0ccbaf..15528d0da 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -66,28 +66,28 @@ pub struct Sync {
     message_sender: MessageSender,
     // internal list of peers, maintained with add_peer/remove_peer.
     peers: BinaryHeap<PeerInfo>,
-    // in-flight
+    // peer handling an in-flight request
     in_flight: Option<PeerInfo>,
-    // in-flight timeout
+    // in-flight request timeout, before retry
     request_timeout: Duration,
     // how many blocks to request at once
     max_batch_size: usize,
     // how many blocks to inject into the queue
     max_blocks_in_flight: usize,
+    // count of injected proposals pending processing
+    injected: usize,
     // our peer id
     peer_id: PeerId,
-    // how many injected proposals
-    injected: usize,
-    // complete chain metadata
+    // complete chain metadata, in-memory
     chain_metadata: BTreeMap<Hash, ChainMetaData>,
-    // phase 1 cursor
-    p1_metadata: Option<ChainMetaData>,
-    // phase 2 cursor
-    p2_metadata: Option<Hash>,
-    // stack of chain landmarks
-    landmarks: Vec<(Hash, PeerId)>,
-    // fixed-size queue of latest proposals
-    zip_queue: VecDeque<Proposal>,
+    // markers to segments in the chain, and the source peer for that segment.
+    chain_segments: Vec<(PeerId, Hash, u64)>,
+    // phase 1 cursor containing parent hash, and block number.
+    p1_cursor: Option<(Hash, u64)>,
+    // phase 2 cursor containing a hash of a set of hashes.
+    p2_cursor: Option<Hash>,
+    // fixed-size queue of the most recent proposals
+    recent_proposals: VecDeque<Proposal>,
 }
 
 impl Sync {
@@ -118,10 +118,10 @@ impl Sync {
             in_flight: None,
             injected: 0,
             chain_metadata: BTreeMap::new(),
-            p1_metadata: None,
-            landmarks: Vec::new(),
-            p2_metadata: None,
-            zip_queue: VecDeque::with_capacity(GAP_THRESHOLD),
+            p1_cursor: None,
+            chain_segments: Vec::new(),
+            p2_cursor: None,
+            recent_proposals: VecDeque::with_capacity(GAP_THRESHOLD),
         })
     }
 
@@ -155,43 +155,44 @@ impl Sync {
     /// We do not perform checks on the Proposal here. This is done in the consensus layer.
     pub fn sync_proposal(&mut self, proposal: Proposal) -> Result<()> {
         // just stuff the latest proposal into the fixed-size queue.
-        while self.zip_queue.len() >= GAP_THRESHOLD {
-            self.zip_queue.pop_front();
+        while self.recent_proposals.len() >= GAP_THRESHOLD {
+            self.recent_proposals.pop_front();
         }
-        self.zip_queue.push_back(proposal);
+        self.recent_proposals.push_back(proposal);
 
         // TODO: Replace with single SQL query
         // Check if block parent exist in history
-        let parent_hash = self.zip_queue.back().unwrap().header.qc.block_hash;
+        let parent_hash = self.recent_proposals.back().unwrap().header.qc.block_hash;
         if self.db.get_block_by_hash(&parent_hash)?.is_none() {
             // Check if oldes block exists in the history. If it does, we have synced up 99% of the chain.
-            let ancestor_hash = self.zip_queue.front().unwrap().header.qc.block_hash;
-            if self.zip_queue.len() == 1 || self.db.get_block_by_hash(&ancestor_hash)?.is_none() {
+            let ancestor_hash = self.recent_proposals.front().unwrap().header.qc.block_hash;
+            if self.recent_proposals.len() == 1
+                || self.db.get_block_by_hash(&ancestor_hash)?.is_none()
+            {
                 // No ancestor block, trigger sync
                 tracing::warn!(
                     "sync::SyncProposal : parent block {} not found",
                     parent_hash
                 );
-                if self.p2_metadata.is_some() {
+                if self.p2_cursor.is_some() {
                     // Continue phase 2
                     self.request_missing_blocks()?;
-                } else if self.p1_metadata.is_some() {
+                } else if self.p1_cursor.is_some() {
                     // Continue phase 1
-                    self.request_missing_chain(None)?;
+                    self.request_missing_metadata(None)?;
                 } else {
                     // Start phase 1
-                    let block_number = self.zip_queue.back().unwrap().number();
-                    self.request_missing_chain(Some((parent_hash, block_number)))?;
+                    let block_number = self.recent_proposals.back().unwrap().number();
+                    self.request_missing_metadata(Some((parent_hash, block_number)))?;
                 }
             } else {
                 // 99% synced, zip it up!
                 tracing::info!(
-                    "sync::SyncProposal : zip up {} blocks from {}",
-                    self.zip_queue.len(),
-                    ancestor_hash
+                    "sync::SyncProposal : finishing up {} blocks for segment #0 from {ancestor_hash}",
+                    self.recent_proposals.len()
                 );
                 // parent block exists, inject the proposal
-                let proposals = self.zip_queue.drain(..).collect_vec();
+                let proposals = self.recent_proposals.drain(..).collect_vec();
                 self.inject_proposals(proposals)?;
                 // we're done
             }
@@ -220,26 +221,53 @@ impl Sync {
             block_number: block.number(),
             block_hash: block.hash(),
             parent_hash: block.parent_hash(),
-            block_timestamp: block.timestamp(),
         }
     }
 
+    /// Retry phase 1
+    ///
+    /// If something went wrong, phase 1 may need to be retried for the most recent segment.
+    /// Pop the segment from the landmark, and continue phase 1.
+    fn retry_phase1(&mut self) -> Result<()> {
+        if self.chain_segments.is_empty() {
+            tracing::error!("sync::RetryPhase1 : cannot retry phase 1 without chain_segments!");
+            return Ok(());
+        }
+
+        // remove the last segment from the chain metadata
+        let (peer, hash, num) = self.chain_segments.pop().unwrap();
+        let mut key = hash;
+        while let Some(p) = self.chain_metadata.remove(&key) {
+            key = p.parent_hash;
+        }
+
+        // set the p1/p2 cursor value, to allow retry from p1
+        self.p1_cursor = Some((hash, num));
+        self.p2_cursor = None;
+        tracing::info!("sync::RetryPhase1 : retrying block {hash} from {peer}");
+        if DO_SPECULATIVE {
+            self.request_missing_metadata(None)?;
+        }
+        Ok(())
+    }
+
     /// Handle a multi-block response.
     ///
-    /// This is the final step in the syncing algorithm, where we receive a set of blocks and inject them into
-    /// the pipeline. We also remove the blocks from the chain metadata, because they are now in the pipeline.
+    /// This is phase 2 in the syncing algorithm, where we receive a set of blocks and inject them into the pipeline.
+    /// We also remove the blocks from the chain metadata, because they are now in the pipeline.
     pub fn handle_multiblock_response(
         &mut self,
         from: PeerId,
         response: Vec<Proposal>,
     ) -> Result<()> {
-        // Process whatever we received
+        // Process only a full response
         if response.is_empty() {
-            // Empty response, downgrade peer
+            // Empty response, downgrade peer and retry phase 1.
             tracing::warn!("sync::MultiBlockResponse : empty blocks {from}",);
             self.done_with_peer(DownGrade::Empty);
+            return self.retry_phase1();
         } else if response.len() < self.max_batch_size {
-            // Partial response, downgrade peer
+            // Partial response, downgrade peer but process the block.
             tracing::warn!("sync::MultiBlockResponse : partial blocks {from}",);
             self.done_with_peer(DownGrade::Partial);
         } else {
@@ -249,42 +277,38 @@ impl Sync {
         tracing::info!(
             "sync::MultiBlockResponse : received {} blocks for segment #{} from {}",
             response.len(),
-            self.landmarks.len(),
+            self.chain_segments.len(),
             from
         );
 
-        let Some((hash, peer_id)) = self.landmarks.last() else {
-            tracing::error!("sync::MultiBlockResponse: no more landmarks!");
-            return Ok(());
+        // Spurious response
+        let Some((peer_id, hash, _)) = self.chain_segments.last() else {
+            anyhow::bail!("sync::MultiBlockResponse: no more chain_segments!");
         };
 
-        // Check that this segment is from the requested peer.
+        // If the response is not from the expected peer, retry phase 2.
         if *peer_id != from {
-            tracing::error!("sync::MultiBlockResponse: response received from unknown peer {from}");
+            tracing::warn!("sync::MultiBlockResponse: unknown peer {from}, will retry");
             return Ok(());
         }
 
-        // Check that this segment starts at the expected landmark
+        // Segment history does not match, retry phase 1.
         let prop_hash = response.first().as_ref().unwrap().hash();
         if *hash != prop_hash {
-            tracing::warn!(
-                "sync::MultiBlockResponse : mismatched landmark {} != {}",
-                hash,
-                prop_hash,
-            );
-            return Ok(());
+            tracing::error!("sync::MultiBlockResponse : mismatched landmark {hash} != {prop_hash}");
+            return self.retry_phase1();
         }
 
-        // Check it matches request hashes
+        // If the checksum does not match, retry phase 1. Maybe the node has pruned the segment.
         let checksum = response
             .iter()
             .fold(Hash::builder().with(Hash::ZERO.as_bytes()), |sum, p| {
                 sum.with(p.hash().as_bytes())
             })
             .finalize();
-        if self.p2_metadata.unwrap_or(Hash::ZERO) != checksum {
-            tracing::error!("sync::MultiBlockResponse : mismatch request checksum {checksum}");
-            return Ok(());
+        if self.p2_cursor.unwrap_or(Hash::ZERO) != checksum {
+            tracing::error!("sync::MultiBlockResponse : mismatch history {checksum}");
+            return self.retry_phase1();
         }
 
         // Response seems sane.
@@ -300,12 +324,13 @@ impl Sync {
             }
         }
 
-        self.landmarks.pop();
+        // Done with this segment
+        self.chain_segments.pop();
         self.inject_proposals(proposals)?;
 
         // Done with phase 2, allow phase 1 to restart.
-        if self.landmarks.is_empty() {
-            self.p1_metadata = None;
+        if self.chain_segments.is_empty() {
+            self.p1_cursor = None;
             self.chain_metadata.clear();
         } else if DO_SPECULATIVE && self.injected < self.max_blocks_in_flight {
             // Speculatively request more blocks
@@ -369,16 +394,16 @@ impl Sync {
                 self.injected
             );
             return Ok(());
-        } else if self.p2_metadata.is_none() {
+        } else if self.p2_cursor.is_none() {
             tracing::warn!("sync::RequestMissingBlocks : no metadata to request missing blocks");
             return Ok(());
         }
 
         // will be re-inserted below
         if let Some(peer) = self.get_next_peer() {
-            self.p2_metadata = None;
-            // If we have no landmarks, we have nothing to do
-            if let Some((hash, peer_id)) = self.landmarks.last() {
+            self.p2_cursor = None;
+            // If we have no chain_segments, we have nothing to do
+            if let Some((peer_id, hash, _)) = self.chain_segments.last() {
                 let mut request_hashes = Vec::with_capacity(self.max_batch_size);
                 let mut key = *hash; // start from this block
                 while let Some(meta) = self.chain_metadata.remove(&key) {
@@ -394,13 +419,13 @@ impl Sync {
                         sum.with(h.as_bytes())
                     })
                     .finalize();
-                self.p2_metadata = Some(checksum);
+                self.p2_cursor = Some(checksum);
 
                 // Fire request, to the original peer that sent the segment metadata
                 tracing::info!(
                     "sync::RequestMissingBlocks : requesting {} blocks of segment #{} from {}",
                     request_hashes.len(),
-                    self.landmarks.len(),
+                    self.chain_segments.len(),
                     peer_id,
                 );
                 self.message_sender.send_external_message(
@@ -414,7 +439,7 @@ impl Sync {
                     score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
                 });
             } else {
-                // No more landmarks, we're done
+                // No more chain_segments, we're done
                 self.peers.push(peer);
             }
         } else {
@@ -428,7 +453,7 @@ impl Sync {
     /// Handle a response to a metadata request.
     ///
     /// This is the first step in the syncing algorithm, where we receive a set of metadata and use it to
-    /// construct a chain history. We then request the missing blocks from the chain.
+    /// construct a chain history.
     pub fn handle_metadata_response(
         &mut self,
         from: PeerId,
@@ -436,12 +461,12 @@ impl Sync {
     ) -> Result<()> {
         // Process whatever we have received.
         if response.is_empty() {
-            // Empty response, downgrade peer
+            // Empty response, downgrade peer and retry with a new peer.
             tracing::warn!("sync::MetadataResponse : empty blocks {from}",);
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
         } else if response.len() < self.max_batch_size {
-            // Partial response, downgrade peer
+            // Partial response, downgrade peer but accept the response.
             tracing::warn!("sync::MetadataResponse : partial blocks {from}",);
             self.done_with_peer(DownGrade::Partial);
         } else {
@@ -449,29 +474,28 @@ impl Sync {
         }
 
         // Check the linkage of the returned chain
-        let Some(p1) = self.p1_metadata.as_ref() else {
+        let Some((p1_hash, p1_num)) = self.p1_cursor.as_ref() else {
             tracing::error!(
-                "no way to check chain linkage from {}",
+                "synce::MetadataResponse : no way to check chain history from {}",
                 response.first().unwrap().block_hash
             );
             return Ok(());
         };
-        let mut parent_hash = p1.parent_hash;
-        let mut parent_num = p1.block_number;
+        let mut block_hash = *p1_hash;
+        let mut block_num = *p1_num;
         for meta in response.iter() {
             // check that the block hash and number is as expected.
             if meta.block_hash != Hash::ZERO
-                && meta.block_hash == parent_hash
-                && parent_num == meta.block_number + 1
+                && block_hash == meta.block_hash
+                && block_num == meta.block_number + 1
             {
-                parent_hash = meta.parent_hash;
-                parent_num = meta.block_number;
+                block_hash = meta.parent_hash;
+                block_num = meta.block_number;
             } else {
-                // if something does not match, we will retry the request with the next peer.
                 // TODO: possibly, discard and rebuild entire chain
+                // if something does not match, do nothing and retry the request with the next peer.
                 tracing::error!(
-                    "sync::MetadataResponse : retry metadata history for {}",
-                    parent_hash
+                    "sync::MetadataResponse : retry metadata history for {block_hash}/{block_num}"
                 );
                 return Ok(());
             }
@@ -483,22 +507,24 @@ impl Sync {
         // Chain segment is sane
         let segment = response;
 
-        // Record the oldest block in the chain
-        self.p1_metadata = Some(segment.last().unwrap().clone());
-
-        // TODO: Insert intermediate landmarks
         // Record landmark, including peer that has this set of blocks
-        self.landmarks
-            .push((segment.first().as_ref().unwrap().block_hash, from));
+        self.chain_segments.push((from, *p1_hash, *p1_num));
+
+        // Record the oldest block in the chain's parent
+        self.p1_cursor = Some((
+            segment.last().unwrap().parent_hash,
+            segment.last().unwrap().block_number,
+        ));
 
         tracing::info!(
             "sync::MetadataResponse : received {} metadata segment #{} from {}",
             segment.len(),
-            self.landmarks.len(),
+            self.chain_segments.len(),
             from
         );
 
         // Record the actual chain metadata
+        let last_block_hash = segment.last().as_ref().unwrap().block_hash;
         for meta in segment {
             if self.chain_metadata.insert(meta.block_hash, meta).is_some() {
                 anyhow::bail!("loop in chain!"); // there is a possible loop in the chain
@@ -506,15 +532,11 @@ impl Sync {
         }
 
         // If the segment does not link to our canonical history, fire the next request
-        if self
-            .db
-            .get_block_by_hash(&self.p1_metadata.as_ref().unwrap().block_hash)?
-            .is_some()
-        {
-            // Hit our internal history. Start phase 2.
-            self.p2_metadata = Some(self.p1_metadata.as_ref().unwrap().block_hash);
+        if self.db.get_block_by_hash(&last_block_hash)?.is_some() {
+            // Hit our internal history. Next, phase 2.
+            self.p2_cursor = Some(Hash::ZERO);
         } else if DO_SPECULATIVE {
-            self.request_missing_chain(None)?;
+            self.request_missing_metadata(None)?;
         }
 
         Ok(())
@@ -535,6 +557,12 @@ impl Sync {
             from
         );
 
+        // Do not respond to stale requests
+        if request.request_at.elapsed()? > self.request_timeout {
+            tracing::warn!("sync::MetadataRequest : stale request");
+            return Ok(ExternalMessage::Acknowledgement);
+        }
+
         // TODO: Check if we should service this request
         // Validators could respond to this request if there is nothing else to do.
 
@@ -563,53 +591,51 @@ impl Sync {
     /// This constructs a chain history by requesting blocks from a peer, going backwards from a given block.
     /// If phase 1 is in progress, it continues requesting blocks from the last known phase 1 block.
     /// Otherwise, it requests blocks from the given omega_block.
-    pub fn request_missing_chain(&mut self, block: Option<(Hash, u64)>) -> Result<()> {
+    pub fn request_missing_metadata(&mut self, block: Option<(Hash, u64)>) -> Result<()> {
         // Early exit if there's a request in-flight; and if it has not expired.
         if let Some(peer) = self.in_flight.as_ref() {
             if peer.last_used.elapsed() > self.request_timeout {
                 tracing::warn!(
-                    "sync::RequestMissingChain : in-flight request {} timed out, requesting from new peer",
+                    "sync::RequestMissingMetadata : in-flight request {} timed out, requesting from new peer",
                     peer.peer_id
                 );
                 self.done_with_peer(DownGrade::Timeout);
             } else {
                 return Ok(());
             }
-        } else if self.injected > 0 {
-            tracing::warn!(
-                "sync::RequestMissingChain : too many {} blocks in flight",
-                self.injected
-            );
+        } else if self.p2_cursor.is_some() {
+            tracing::warn!("sync::RequestMissingMetadata : phase 2 in progress");
             return Ok(());
+            // } else if self.injected > 0 {
+            //     tracing::warn!(
+            //         "sync::RequestMissingMetadata : too many {} blocks in flight",
+            //         self.injected
+            //     );
+            //     return Ok(());
         }
 
         if let Some(peer) = self.get_next_peer() {
-            let message = if let Some(meta) = self.p1_metadata.as_ref() {
+            let message = if let Some((hash, _)) = self.p1_cursor.as_ref() {
                 ExternalMessage::MetaDataRequest(RequestBlock {
-                    from_number: 0,
-                    from_hash: meta.parent_hash,
+                    request_at: SystemTime::now(),
+                    from_hash: *hash,
                     batch_size: self.max_batch_size,
                 })
             } else if let Some((hash, number)) = block {
                 // insert the starting point for phase 1
-                self.p1_metadata = Some(ChainMetaData {
-                    block_hash: Hash::ZERO, // invalid block hash
-                    block_number: number,
-                    parent_hash: hash,
-                    block_timestamp: SystemTime::UNIX_EPOCH,
-                });
+                self.p1_cursor = Some((hash, number));
                 ExternalMessage::MetaDataRequest(RequestBlock {
-                    from_number: 0,
+                    request_at: SystemTime::now(),
                     from_hash: hash,
                     batch_size: self.max_batch_size,
                 })
             } else {
-                todo!("sync::RequestMissingChain : no metadata to request missing blocks");
+                todo!("sync::RequestMissingMetadata : no metadata to request missing blocks");
             };
 
             tracing::info!(
                 ?message,
-                "sync::RequestMissingChain : requesting missing chain from {}",
+                "sync::RequestMissingMetadata : requesting missing chain from {}",
                 peer.peer_id
             );
             self.message_sender
@@ -618,7 +644,7 @@ impl Sync {
             self.in_flight = Some(peer);
         } else {
             tracing::warn!(
-                "sync::RequestMissingChain : insufficient peers to request missing blocks"
+                "sync::RequestMissingMetadata : insufficient peers to request missing blocks"
             );
         }
         Ok(())

From 3fae169c6ab222d7380d680943eb6ba7af3090ba Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 7 Jan 2025 21:28:19 +0800
Subject: [PATCH 045/119] feat: combined p1_cursor/p2_cursor into a self.state
 value.

---
 zilliqa/src/sync.rs | 141 +++++++++++++++++++++++---------------------
 1 file changed, 75 insertions(+), 66 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 15528d0da..2f33bebc9 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -18,13 +18,6 @@ use crate::{
     time::SystemTime,
 };
 
-enum DownGrade {
-    None,
-    Partial,
-    Timeout,
-    Empty,
-}
-
 // Syncing Algorithm
 //
 // When a Proposal is received by Consensus, we check if the parent exists in our DB.
@@ -78,14 +71,12 @@ pub struct Sync {
     injected: usize,
     // our peer id
     peer_id: PeerId,
+    // internal sync state
+    state: SyncState,
     // complete chain metadata, in-memory
     chain_metadata: BTreeMap<Hash, ChainMetaData>,
     // markers to segments in the chain, and the source peer for that segment.
     chain_segments: Vec<(PeerId, Hash, u64)>,
-    // phase 1 cursor containing parent hash, and block number.
-    p1_cursor: Option<(Hash, u64)>,
-    // phase 2 cursor containing a hash of a set of hashes.
-    p2_cursor: Option<Hash>,
     // fixed-size queue of the most recent proposals
     recent_proposals: VecDeque<Proposal>,
 }
@@ -118,9 +109,8 @@ impl Sync {
             in_flight: None,
             injected: 0,
             chain_metadata: BTreeMap::new(),
-            p1_cursor: None,
             chain_segments: Vec::new(),
-            p2_cursor: None,
+            state: SyncState::Phase0,
             recent_proposals: VecDeque::with_capacity(GAP_THRESHOLD),
         })
     }
@@ -174,16 +164,22 @@ impl Sync {
                     "sync::SyncProposal : parent block {} not found",
                     parent_hash
                 );
-                if self.p2_cursor.is_some() {
-                    // Continue phase 2
-                    self.request_missing_blocks()?;
-                } else if self.p1_cursor.is_some() {
-                    // Continue phase 1
-                    self.request_missing_metadata(None)?;
-                } else {
-                    // Start phase 1
-                    let block_number = self.recent_proposals.back().unwrap().number();
-                    self.request_missing_metadata(Some((parent_hash, block_number)))?;
+                // TODO: Move this up
+                match self.state {
+                    SyncState::Phase0 => {
+                        // Start phase 1
+                        let block_number = self.recent_proposals.back().unwrap().number();
+                        self.request_missing_metadata(Some((parent_hash, block_number)))?;
+                    }
+                    SyncState::Phase1(_, _) => {
+                        // Continue phase 1
+                        self.request_missing_metadata(None)?;
+                    }
+                    SyncState::Phase2(_) => {
+                        // Continue phase 2
+                        self.request_missing_blocks()?;
+                    }
+                    SyncState::Phase3 => {}
                 }
             } else {
                 // 99% synced, zip it up!
@@ -241,9 +237,8 @@ impl Sync {
             key = p.parent_hash;
         }
 
-        // set the p1/p2 cursor value, to allow retry from p1
-        self.p1_cursor = Some((hash, num));
-        self.p2_cursor = None;
+        // allow retry from p1
+        self.state = SyncState::Phase1(hash, num);
         tracing::info!("sync::RetryPhase1 : retrying block {hash} from {peer}");
         if DO_SPECULATIVE {
             self.request_missing_metadata(None)?;
@@ -274,6 +269,10 @@ impl Sync {
             self.done_with_peer(DownGrade::None);
         }
 
+        let SyncState::Phase2(p2_hash) = self.state else {
+            anyhow::bail!("sync::MultiBlockResponse : invalid state");
+        };
+
         tracing::info!(
             "sync::MultiBlockResponse : received {} blocks for segment #{} from {}",
             response.len(),
@@ -306,7 +305,8 @@ impl Sync {
                 sum.with(p.hash().as_bytes())
             })
             .finalize();
-        if self.p2_cursor.unwrap_or(Hash::ZERO) != checksum {
+
+        if p2_hash != checksum {
             tracing::error!("sync::MultiBlockResponse : mismatch history {checksum}");
             return self.retry_phase1();
         }
@@ -330,8 +330,7 @@ impl Sync {
 
         // Done with phase 2, allow phase 1 to restart.
         if self.chain_segments.is_empty() {
-            self.p1_cursor = None;
-            self.chain_metadata.clear();
+            self.state = SyncState::Phase3;
         } else if DO_SPECULATIVE && self.injected < self.max_blocks_in_flight {
             // Speculatively request more blocks
             self.request_missing_blocks()?;
@@ -394,14 +393,15 @@ impl Sync {
                 self.injected
             );
             return Ok(());
-        } else if self.p2_cursor.is_none() {
+        };
+
+        let SyncState::Phase2(_) = self.state else {
             tracing::warn!("sync::RequestMissingBlocks : no metadata to request missing blocks");
             return Ok(());
-        }
+        };
 
         // will be re-inserted below
         if let Some(peer) = self.get_next_peer() {
-            self.p2_cursor = None;
             // If we have no chain_segments, we have nothing to do
             if let Some((peer_id, hash, _)) = self.chain_segments.last() {
                 let mut request_hashes = Vec::with_capacity(self.max_batch_size);
@@ -419,7 +419,7 @@ impl Sync {
                         sum.with(h.as_bytes())
                     })
                     .finalize();
-                self.p2_cursor = Some(checksum);
+                self.state = SyncState::Phase2(checksum);
 
                 // Fire request, to the original peer that sent the segment metadata
                 tracing::info!(
@@ -474,15 +474,12 @@ impl Sync {
         }
 
         // Check the linkage of the returned chain
-        let Some((p1_hash, p1_num)) = self.p1_cursor.as_ref() else {
-            tracing::error!(
-                "synce::MetadataResponse : no way to check chain history from {}",
-                response.first().unwrap().block_hash
-            );
-            return Ok(());
+        let SyncState::Phase1(p1_hash, p1_num) = self.state else {
+            anyhow::bail!("sync::MetadataResponse : invalid state");
         };
-        let mut block_hash = *p1_hash;
-        let mut block_num = *p1_num;
+
+        let mut block_hash = p1_hash;
+        let mut block_num = p1_num;
         for meta in response.iter() {
             // check that the block hash and number is as expected.
             if meta.block_hash != Hash::ZERO
@@ -508,13 +505,13 @@ impl Sync {
         let segment = response;
 
         // Record landmark, including peer that has this set of blocks
-        self.chain_segments.push((from, *p1_hash, *p1_num));
+        self.chain_segments.push((from, p1_hash, p1_num));
 
         // Record the oldest block in the chain's parent
-        self.p1_cursor = Some((
+        self.state = SyncState::Phase1(
             segment.last().unwrap().parent_hash,
             segment.last().unwrap().block_number,
-        ));
+        );
 
         tracing::info!(
             "sync::MetadataResponse : received {} metadata segment #{} from {}",
@@ -534,7 +531,7 @@ impl Sync {
         // If the segment does not link to our canonical history, fire the next request
         if self.db.get_block_by_hash(&last_block_hash)?.is_some() {
             // Hit our internal history. Next, phase 2.
-            self.p2_cursor = Some(Hash::ZERO);
+            self.state = SyncState::Phase2(Hash::ZERO);
         } else if DO_SPECULATIVE {
             self.request_missing_metadata(None)?;
         }
@@ -603,39 +600,33 @@ impl Sync {
             } else {
                 return Ok(());
             }
-        } else if self.p2_cursor.is_some() {
+        } else if let SyncState::Phase2(_) = self.state {
             tracing::warn!("sync::RequestMissingMetadata : phase 2 in progress");
             return Ok(());
-            // } else if self.injected > 0 {
-            //     tracing::warn!(
-            //         "sync::RequestMissingMetadata : too many {} blocks in flight",
-            //         self.injected
-            //     );
-            //     return Ok(());
         }
 
         if let Some(peer) = self.get_next_peer() {
-            let message = if let Some((hash, _)) = self.p1_cursor.as_ref() {
-                ExternalMessage::MetaDataRequest(RequestBlock {
-                    request_at: SystemTime::now(),
-                    from_hash: *hash,
-                    batch_size: self.max_batch_size,
-                })
-            } else if let Some((hash, number)) = block {
-                // insert the starting point for phase 1
-                self.p1_cursor = Some((hash, number));
-                ExternalMessage::MetaDataRequest(RequestBlock {
+            let message = match self.state {
+                SyncState::Phase1(hash, _) => ExternalMessage::MetaDataRequest(RequestBlock {
                     request_at: SystemTime::now(),
                     from_hash: hash,
                     batch_size: self.max_batch_size,
-                })
-            } else {
-                todo!("sync::RequestMissingMetadata : no metadata to request missing blocks");
+                }),
+                SyncState::Phase0 if block.is_some() => {
+                    let (hash, number) = block.unwrap();
+                    self.state = SyncState::Phase1(hash, number);
+                    ExternalMessage::MetaDataRequest(RequestBlock {
+                        request_at: SystemTime::now(),
+                        from_hash: hash,
+                        batch_size: self.max_batch_size,
+                    })
+                }
+                _ => anyhow::bail!("sync::MissingMetadata : invalid state"),
             };
 
             tracing::info!(
                 ?message,
-                "sync::RequestMissingMetadata : requesting missing chain from {}",
+                "sync::RequestMissingMetadata : requesting {} missing chain from {}",
                 peer.peer_id
             );
             self.message_sender
@@ -753,3 +744,21 @@ impl PartialOrd for PeerInfo {
         Some(self.cmp(other))
     }
 }
+
+/// Peer downgrade states/values, for downgrading an internal peer from selection.
+#[derive(Debug)]
+enum DownGrade {
+    None,
+    Partial,
+    Timeout,
+    Empty,
+}
+
+/// Sync state
+#[derive(Debug)]
+enum SyncState {
+    Phase0,
+    Phase1(Hash, u64),
+    Phase2(Hash),
+    Phase3,
+}

From 4c0a274d928b63cf76bd4e93a6bebaa2b20430f6 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 7 Jan 2025 22:29:28 +0800
Subject: [PATCH 046/119] feat: restructure sync_proposal() to make it legible.

---
 zilliqa/src/sync.rs | 102 ++++++++++++++++++++++----------------------
 1 file changed, 51 insertions(+), 51 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 2f33bebc9..ba1110b0d 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -150,49 +150,44 @@ impl Sync {
         }
         self.recent_proposals.push_back(proposal);
 
-        // TODO: Replace with single SQL query
-        // Check if block parent exist in history
-        let parent_hash = self.recent_proposals.back().unwrap().header.qc.block_hash;
-        if self.db.get_block_by_hash(&parent_hash)?.is_none() {
-            // Check if oldes block exists in the history. If it does, we have synced up 99% of the chain.
-            let ancestor_hash = self.recent_proposals.front().unwrap().header.qc.block_hash;
-            if self.recent_proposals.len() == 1
-                || self.db.get_block_by_hash(&ancestor_hash)?.is_none()
-            {
-                // No ancestor block, trigger sync
-                tracing::warn!(
-                    "sync::SyncProposal : parent block {} not found",
-                    parent_hash
-                );
-                // TODO: Move this up
-                match self.state {
-                    SyncState::Phase0 => {
-                        // Start phase 1
-                        let block_number = self.recent_proposals.back().unwrap().number();
-                        self.request_missing_metadata(Some((parent_hash, block_number)))?;
-                    }
-                    SyncState::Phase1(_, _) => {
-                        // Continue phase 1
-                        self.request_missing_metadata(None)?;
-                    }
-                    SyncState::Phase2(_) => {
-                        // Continue phase 2
-                        self.request_missing_blocks()?;
-                    }
-                    SyncState::Phase3 => {}
+        match self.state {
+            // Check if we are out of sync
+            SyncState::Phase0 if self.injected == 0 => {
+                let parent_hash = self.recent_proposals.back().unwrap().header.qc.block_hash;
+                if self.db.get_block_by_hash(&parent_hash)?.is_none() {
+                    // No parent block, trigger sync
+                    tracing::warn!("sync::SyncProposal : syncing from {parent_hash}",);
+                    let block_number = self.recent_proposals.back().unwrap().number();
+                    self.request_missing_metadata(Some((parent_hash, block_number)))?;
                 }
-            } else {
-                // 99% synced, zip it up!
-                tracing::info!(
-                    "sync::SyncProposal : finishing up {} blocks for segment #0 from {ancestor_hash}",
-                    self.recent_proposals.len()
-                );
-                // parent block exists, inject the proposal
-                let proposals = self.recent_proposals.drain(..).collect_vec();
-                self.inject_proposals(proposals)?;
-                // we're done
+            }
+            // Continue phase 1, until we hit history/genesis.
+            SyncState::Phase1(_, _) if self.injected < self.max_batch_size => {
+                self.request_missing_metadata(None)?;
+            }
+            // Continue phase 2, until we have all segments.
+            SyncState::Phase2(_) if self.injected < self.max_blocks_in_flight => {
+                self.request_missing_blocks()?;
+            }
+            // Wait till 99% synced, zip it up!
+            SyncState::Phase3 if self.injected == 0 => {
+                let ancestor_hash = self.recent_proposals.front().unwrap().header.qc.block_hash;
+                if self.db.get_block_by_hash(&ancestor_hash)?.is_some() {
+                    tracing::info!(
+                        "sync::SyncProposal : finishing up {} blocks for segment #0 from {ancestor_hash}",
+                        self.recent_proposals.len()
+                    );
+                    // inject the proposals
+                    let proposals = self.recent_proposals.drain(..).collect_vec();
+                    self.inject_proposals(proposals)?;
+                }
+                self.state = SyncState::Phase0;
+            }
+            _ => {
+                tracing::debug!("sync::SyncProposal : syncing {} blocks", self.injected);
             }
         }
+
         Ok(())
     }
 
@@ -328,7 +323,7 @@ impl Sync {
         self.chain_segments.pop();
         self.inject_proposals(proposals)?;
 
-        // Done with phase 2, allow phase 1 to restart.
+        // Done with phase 2
         if self.chain_segments.is_empty() {
             self.state = SyncState::Phase3;
         } else if DO_SPECULATIVE && self.injected < self.max_blocks_in_flight {
@@ -376,6 +371,9 @@ impl Sync {
     /// These hashes are then sent to a Peer for retrieval.
     /// This is Part 2 of the syncing algorithm.
     fn request_missing_blocks(&mut self) -> Result<()> {
+        if !matches!(self.state, SyncState::Phase2(_)) {
+            anyhow::bail!("sync::RequestMissingBlocks : invalid state");
+        }
         // Early exit if there's a request in-flight; and if it has not expired.
         if let Some(peer) = self.in_flight.as_ref() {
             if peer.last_used.elapsed() > self.request_timeout {
@@ -389,16 +387,11 @@ impl Sync {
             }
         } else if self.injected > self.max_blocks_in_flight {
             tracing::warn!(
-                "sync::RequestMissingBlocks : too many {} blocks in flight",
+                "sync::RequestMissingBlocks : syncing {} blocks in flight",
                 self.injected
             );
             return Ok(());
-        };
-
-        let SyncState::Phase2(_) = self.state else {
-            tracing::warn!("sync::RequestMissingBlocks : no metadata to request missing blocks");
-            return Ok(());
-        };
+        }
 
         // will be re-inserted below
         if let Some(peer) = self.get_next_peer() {
@@ -589,6 +582,9 @@ impl Sync {
     /// If phase 1 is in progress, it continues requesting blocks from the last known phase 1 block.
     /// Otherwise, it requests blocks from the given omega_block.
     pub fn request_missing_metadata(&mut self, block: Option<(Hash, u64)>) -> Result<()> {
+        if matches!(self.state, SyncState::Phase2(_)) || matches!(self.state, SyncState::Phase3) {
+            anyhow::bail!("sync::RequestMissingMetadata : invalid state");
+        }
         // Early exit if there's a request in-flight; and if it has not expired.
         if let Some(peer) = self.in_flight.as_ref() {
             if peer.last_used.elapsed() > self.request_timeout {
@@ -600,8 +596,12 @@ impl Sync {
             } else {
                 return Ok(());
             }
-        } else if let SyncState::Phase2(_) = self.state {
-            tracing::warn!("sync::RequestMissingMetadata : phase 2 in progress");
+        } else if self.injected > self.max_batch_size {
+            // anything more than this and we cannot check whether the segment hits history
+            tracing::warn!(
+                "sync::RequestMissingMetadata : syncing {} blocks in flight",
+                self.injected
+            );
             return Ok(());
         }
 
@@ -626,7 +626,7 @@ impl Sync {
 
             tracing::info!(
                 ?message,
-                "sync::RequestMissingMetadata : requesting {} missing chain from {}",
+                "sync::RequestMissingMetadata : requesting missing chain from {}",
                 peer.peer_id
             );
             self.message_sender

From 2d1b044d8dccadb76fa2eabd9214880dd504a51a Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Wed, 8 Jan 2025 00:00:38 +0800
Subject: [PATCH 047/119] checkpoint: working sync with state machine.

---
 zilliqa/src/sync.rs | 79 +++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 38 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index ba1110b0d..cb428a38f 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -67,8 +67,8 @@ pub struct Sync {
     max_batch_size: usize,
     // how many blocks to inject into the queue
     max_blocks_in_flight: usize,
-    // count of injected proposals pending processing
-    injected: usize,
+    // count of proposals pending in the pipeline
+    in_pipeline: usize,
     // our peer id
     peer_id: PeerId,
     // internal sync state
@@ -107,7 +107,7 @@ impl Sync {
             max_batch_size: config.block_request_batch_size.max(31), // between 30 seconds and 3 days of blocks.
             max_blocks_in_flight: config.max_blocks_in_flight.min(3600), // cap to 1-hr worth of blocks
             in_flight: None,
-            injected: 0,
+            in_pipeline: usize::MIN,
             chain_metadata: BTreeMap::new(),
             chain_segments: Vec::new(),
             state: SyncState::Phase0,
@@ -115,26 +115,6 @@ impl Sync {
         })
     }
 
-    /// Mark a received proposal
-    ///
-    /// Mark a proposal as received, and remove it from the cache.
-    pub fn mark_received_proposal(&mut self, prop: &InjectedProposal) -> Result<()> {
-        if prop.from != self.peer_id {
-            tracing::error!(
-                "sync::MarkReceivedProposal : foreign InjectedProposal from {}",
-                prop.from
-            );
-        }
-        if let Some(p) = self.chain_metadata.remove(&prop.block.hash()) {
-            tracing::warn!(
-                "sync::MarkReceivedProposal : removing stale metadata {}",
-                p.block_hash
-            );
-        }
-        self.injected = self.injected.saturating_sub(1);
-        Ok(())
-    }
-
     /// Sync a block proposal.
     ///
     /// This is the main entry point for syncing a block proposal.
@@ -152,7 +132,7 @@ impl Sync {
 
         match self.state {
             // Check if we are out of sync
-            SyncState::Phase0 if self.injected == 0 => {
+            SyncState::Phase0 if self.in_pipeline == 0 => {
                 let parent_hash = self.recent_proposals.back().unwrap().header.qc.block_hash;
                 if self.db.get_block_by_hash(&parent_hash)?.is_none() {
                     // No parent block, trigger sync
@@ -162,15 +142,15 @@ impl Sync {
                 }
             }
             // Continue phase 1, until we hit history/genesis.
-            SyncState::Phase1(_, _) if self.injected < self.max_batch_size => {
+            SyncState::Phase1(_, _) if self.in_pipeline < self.max_batch_size => {
                 self.request_missing_metadata(None)?;
             }
             // Continue phase 2, until we have all segments.
-            SyncState::Phase2(_) if self.injected < self.max_blocks_in_flight => {
+            SyncState::Phase2(_) if self.in_pipeline < self.max_blocks_in_flight => {
                 self.request_missing_blocks()?;
             }
             // Wait till 99% synced, zip it up!
-            SyncState::Phase3 if self.injected == 0 => {
+            SyncState::Phase3 if self.in_pipeline == 0 => {
                 let ancestor_hash = self.recent_proposals.front().unwrap().header.qc.block_hash;
                 if self.db.get_block_by_hash(&ancestor_hash)?.is_some() {
                     tracing::info!(
@@ -184,7 +164,10 @@ impl Sync {
                 self.state = SyncState::Phase0;
             }
             _ => {
-                tracing::debug!("sync::SyncProposal : syncing {} blocks", self.injected);
+                tracing::debug!(
+                    "sync::SyncProposal : syncing {} blocks in pipeline",
+                    self.in_pipeline
+                );
             }
         }
 
@@ -326,7 +309,7 @@ impl Sync {
         // Done with phase 2
         if self.chain_segments.is_empty() {
             self.state = SyncState::Phase3;
-        } else if DO_SPECULATIVE && self.injected < self.max_blocks_in_flight {
+        } else if DO_SPECULATIVE {
             // Speculatively request more blocks
             self.request_missing_blocks()?;
         }
@@ -385,10 +368,10 @@ impl Sync {
             } else {
                 return Ok(());
             }
-        } else if self.injected > self.max_blocks_in_flight {
+        } else if self.in_pipeline > self.max_blocks_in_flight {
             tracing::warn!(
-                "sync::RequestMissingBlocks : syncing {} blocks in flight",
-                self.injected
+                "sync::RequestMissingBlocks : syncing {} blocks in pipeline",
+                self.in_pipeline
             );
             return Ok(());
         }
@@ -459,7 +442,7 @@ impl Sync {
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
         } else if response.len() < self.max_batch_size {
-            // Partial response, downgrade peer but accept the response.
+            // Partial response, downgrade peer but process the response.
             tracing::warn!("sync::MetadataResponse : partial blocks {from}",);
             self.done_with_peer(DownGrade::Partial);
         } else {
@@ -596,11 +579,11 @@ impl Sync {
             } else {
                 return Ok(());
             }
-        } else if self.injected > self.max_batch_size {
+        } else if self.in_pipeline > self.max_batch_size {
             // anything more than this and we cannot check whether the segment hits history
             tracing::warn!(
-                "sync::RequestMissingMetadata : syncing {} blocks in flight",
-                self.injected
+                "sync::RequestMissingMetadata :  syncing {} blocks in pipeline",
+                self.in_pipeline
             );
             return Ok(());
         }
@@ -652,7 +635,7 @@ impl Sync {
         }
 
         // Increment proposals injected
-        self.injected = self.injected.saturating_add(proposals.len());
+        self.in_pipeline = self.in_pipeline.saturating_add(proposals.len());
         let len = proposals.len();
 
         // Just pump the Proposals back to ourselves.
@@ -675,12 +658,32 @@ impl Sync {
         tracing::debug!(
             "sync::InjectProposals : injected {}/{} proposals",
             len,
-            self.injected
+            self.in_pipeline
         );
         // return last proposal
         Ok(())
     }
 
+    /// Mark a received proposal
+    ///
+    /// Mark a proposal as received, and remove it from the cache.
+    pub fn mark_received_proposal(&mut self, prop: &InjectedProposal) -> Result<()> {
+        if prop.from != self.peer_id {
+            tracing::error!(
+                "sync::MarkReceivedProposal : foreign InjectedProposal from {}",
+                prop.from
+            );
+        }
+        if let Some(p) = self.chain_metadata.remove(&prop.block.hash()) {
+            tracing::warn!(
+                "sync::MarkReceivedProposal : removing stale metadata {}",
+                p.block_hash
+            );
+        }
+        self.in_pipeline = self.in_pipeline.saturating_sub(1);
+        Ok(())
+    }
+
     /// Downgrade a peer based on the response received.
     fn done_with_peer(&mut self, downgrade: DownGrade) {
         if let Some(mut peer) = self.in_flight.take() {

From b583758adcb6582a6c643d68e196494aa72ac63e Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Wed, 8 Jan 2025 16:07:54 +0800
Subject: [PATCH 048/119] Revert "sec: make RequestId random, to mitigate
 response injections."

This reverts commit 33d45f6d516f9cb4247d224ebfc948a6259d89c9.
---
 zilliqa/src/node.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index b8d2f535f..2e712ad52 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -84,7 +84,9 @@ impl MessageSender {
     }
 
     pub fn next_request_id(&mut self) -> RequestId {
-        RequestId(rand::random()) // TODO: make this more secure, non-predictable
+        let request_id = self.request_id;
+        self.request_id.0 = self.request_id.0.wrapping_add(1);
+        request_id
     }
 
     /// Send a message to a remote node of the same shard.

From bbfaf530bd6d04f3a26163087c036e957c4d2619 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Wed, 8 Jan 2025 16:13:46 +0800
Subject: [PATCH 049/119] feat: make fixed-sized queue size, configurable.

---
 zilliqa/src/sync.rs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index cb428a38f..d2b445e71 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -48,7 +48,6 @@ use crate::{
 // 4. If it does, we inject the entire queue into the pipeline.
 // 5. We are caught up.
 
-const GAP_THRESHOLD: usize = 20; // Size of internal Proposal cache.
 const DO_SPECULATIVE: bool = false; // Speeds up syncing by speculatively fetching blocks.
 
 #[derive(Debug)]
@@ -97,6 +96,8 @@ impl Sync {
             })
             .collect();
         let peer_id = message_sender.our_peer_id;
+        let max_batch_size = config.block_request_batch_size.clamp(30, 180); // 30-180 sec of blocks at a time.
+        let max_blocks_in_flight = config.max_blocks_in_flight.clamp(max_batch_size, 1800); // up to 30-mins worth of blocks in-pipeline.
 
         Ok(Self {
             db,
@@ -104,14 +105,14 @@ impl Sync {
             peers,
             peer_id,
             request_timeout: config.consensus.consensus_timeout,
-            max_batch_size: config.block_request_batch_size.max(31), // between 30 seconds and 3 days of blocks.
-            max_blocks_in_flight: config.max_blocks_in_flight.min(3600), // cap to 1-hr worth of blocks
+            max_batch_size,
+            max_blocks_in_flight,
             in_flight: None,
             in_pipeline: usize::MIN,
             chain_metadata: BTreeMap::new(),
             chain_segments: Vec::new(),
             state: SyncState::Phase0,
-            recent_proposals: VecDeque::with_capacity(GAP_THRESHOLD),
+            recent_proposals: VecDeque::with_capacity(max_batch_size),
         })
     }
 
@@ -125,7 +126,7 @@ impl Sync {
     /// We do not perform checks on the Proposal here. This is done in the consensus layer.
     pub fn sync_proposal(&mut self, proposal: Proposal) -> Result<()> {
         // just stuff the latest proposal into the fixed-size queue.
-        while self.recent_proposals.len() >= GAP_THRESHOLD {
+        while self.recent_proposals.len() >= self.max_batch_size {
             self.recent_proposals.pop_front();
         }
         self.recent_proposals.push_back(proposal);

From 179cb4d48b11c6c211be1e5b63b2d11dc5023c91 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 9 Jan 2025 15:36:43 +0800
Subject: [PATCH 050/119] feat: v1 sync compatibility.

---
 zilliqa/src/node.rs | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 2e712ad52..16c961681 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -291,6 +291,15 @@ impl Node {
             ExternalMessage::InjectedProposal(p) => {
                 self.handle_injected_proposal(from, p)?;
             }
+            // Respond negatively to old block requests
+            ExternalMessage::BlockRequest(req) => {
+                let message = ExternalMessage::BlockResponse(BlockResponse {
+                    availability: None,
+                    proposals: vec![],
+                    from_view: req.from_view,
+                });
+                self.request_responses.send((response_channel, message))?;
+            }
             // Handle requests which contain a block proposal. Initially sent as a broadcast, it is re-routed into
             // a Request by the underlying layer, with a faux request-id. This is to mitigate issues when there are
             // too many transactions in the broadcast queue.

From c93028364ad34e737f871e81ad5e644e3024887e Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 9 Jan 2025 17:58:29 +0800
Subject: [PATCH 051/119] feat: use ChainMetaData as the main state variable
 structure.

---
 zilliqa/src/message.rs |   4 +-
 zilliqa/src/node.rs    |   3 +
 zilliqa/src/sync.rs    | 132 ++++++++++++++++++++++++++++++-----------
 3 files changed, 101 insertions(+), 38 deletions(-)

diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index 4fbcbc6d5..2826190d5 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -245,10 +245,10 @@ pub struct InjectedProposal {
 /// Used to hold metadata about the chain
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ChainMetaData {
-    // An encoded PeerId
-    pub block_hash: Hash,
     pub parent_hash: Hash,
+    pub block_hash: Hash,
     pub block_number: u64,
+    pub view_number: u64,
 }
 
 /// Used to convey proposal processing internally, to avoid blocking threads for too long.
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 16c961681..558cab097 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -340,6 +340,9 @@ impl Node {
                 .consensus
                 .sync
                 .handle_metadata_response(from, response)?,
+            ExternalMessage::BlockResponse(response) => {
+                self.consensus.sync.handle_block_response(from, response)?
+            }
             ExternalMessage::Acknowledgement => {}
             msg => {
                 warn!(%msg, "unexpected message type");
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index d2b445e71..7d27c237c 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -13,7 +13,10 @@ use crate::{
     cfg::NodeConfig,
     crypto::Hash,
     db::Db,
-    message::{Block, ChainMetaData, ExternalMessage, InjectedProposal, Proposal, RequestBlock},
+    message::{
+        Block, BlockResponse, ChainMetaData, ExternalMessage, InjectedProposal, Proposal,
+        RequestBlock,
+    },
     node::MessageSender,
     time::SystemTime,
 };
@@ -75,7 +78,7 @@ pub struct Sync {
     // complete chain metadata, in-memory
     chain_metadata: BTreeMap<Hash, ChainMetaData>,
     // markers to segments in the chain, and the source peer for that segment.
-    chain_segments: Vec<(PeerId, Hash, u64)>,
+    chain_segments: Vec<(PeerId, ChainMetaData)>,
     // fixed-size queue of the most recent proposals
     recent_proposals: VecDeque<Proposal>,
 }
@@ -90,6 +93,7 @@ impl Sync {
         let peers = peers
             .into_iter()
             .map(|peer_id| PeerInfo {
+                version: PeerVer::V2, // default to V2 peer
                 score: 0,
                 peer_id,
                 last_used: Instant::now(),
@@ -138,12 +142,20 @@ impl Sync {
                 if self.db.get_block_by_hash(&parent_hash)?.is_none() {
                     // No parent block, trigger sync
                     tracing::warn!("sync::SyncProposal : syncing from {parent_hash}",);
+                    let block_hash = self.recent_proposals.back().unwrap().hash();
                     let block_number = self.recent_proposals.back().unwrap().number();
-                    self.request_missing_metadata(Some((parent_hash, block_number)))?;
+                    let view_number = self.recent_proposals.back().unwrap().view();
+                    let meta = ChainMetaData {
+                        block_hash,
+                        parent_hash,
+                        block_number,
+                        view_number,
+                    };
+                    self.request_missing_metadata(Some(meta))?;
                 }
             }
             // Continue phase 1, until we hit history/genesis.
-            SyncState::Phase1(_, _) if self.in_pipeline < self.max_batch_size => {
+            SyncState::Phase1(_) if self.in_pipeline < self.max_batch_size => {
                 self.request_missing_metadata(None)?;
             }
             // Continue phase 2, until we have all segments.
@@ -193,9 +205,10 @@ impl Sync {
     /// Convenience function to extract metadata from the block.
     fn block_to_metadata(&self, block: Block) -> ChainMetaData {
         ChainMetaData {
-            block_number: block.number(),
-            block_hash: block.hash(),
             parent_hash: block.parent_hash(),
+            block_hash: block.hash(),
+            block_number: block.number(),
+            view_number: block.view(),
         }
     }
 
@@ -210,15 +223,18 @@ impl Sync {
         }
 
         // remove the last segment from the chain metadata
-        let (peer, hash, num) = self.chain_segments.pop().unwrap();
-        let mut key = hash;
+        let (peer, meta) = self.chain_segments.pop().unwrap();
+        let mut key = meta.parent_hash;
         while let Some(p) = self.chain_metadata.remove(&key) {
             key = p.parent_hash;
         }
 
         // allow retry from p1
-        self.state = SyncState::Phase1(hash, num);
-        tracing::info!("sync::RetryPhase1 : retrying block {hash} from {peer}");
+        tracing::info!(
+            "sync::RetryPhase1 : retrying block {} from {peer}",
+            meta.parent_hash
+        );
+        self.state = SyncState::Phase1(meta);
         if DO_SPECULATIVE {
             self.request_missing_metadata(None)?;
         }
@@ -260,7 +276,7 @@ impl Sync {
         );
 
         // Spurious response
-        let Some((peer_id, hash, _)) = self.chain_segments.last() else {
+        let Some((peer_id, meta)) = self.chain_segments.last() else {
             anyhow::bail!("sync::MultiBlockResponse: no more chain_segments!");
         };
 
@@ -272,8 +288,11 @@ impl Sync {
 
         // Segment history does not match, retry phase 1.
         let prop_hash = response.first().as_ref().unwrap().hash();
-        if *hash != prop_hash {
-            tracing::error!("sync::MultiBlockResponse : mismatched landmark {hash} != {prop_hash}");
+        if meta.parent_hash != prop_hash {
+            tracing::error!(
+                "sync::MultiBlockResponse : mismatched landmark {} != {prop_hash}",
+                meta.parent_hash
+            );
             return self.retry_phase1();
         }
 
@@ -380,9 +399,9 @@ impl Sync {
         // will be re-inserted below
         if let Some(peer) = self.get_next_peer() {
             // If we have no chain_segments, we have nothing to do
-            if let Some((peer_id, hash, _)) = self.chain_segments.last() {
+            if let Some((peer_id, meta)) = self.chain_segments.last() {
                 let mut request_hashes = Vec::with_capacity(self.max_batch_size);
-                let mut key = *hash; // start from this block
+                let mut key = meta.parent_hash; // start from this block
                 while let Some(meta) = self.chain_metadata.remove(&key) {
                     request_hashes.push(meta.block_hash);
                     key = meta.parent_hash;
@@ -411,6 +430,7 @@ impl Sync {
                 )?;
                 self.peers.push(peer); // reinsert peer, as we will be using a faux peer below
                 self.in_flight = Some(PeerInfo {
+                    version: PeerVer::V2,
                     peer_id: *peer_id,
                     last_used: std::time::Instant::now(),
                     score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
@@ -427,6 +447,26 @@ impl Sync {
         Ok(())
     }
 
+    /// Handle a V1 block response
+    ///
+    /// This will be called during both Phase 1 & Phase 2 block responses.
+    /// In phase 1, it will extract the metadata and feed it into handle_metadata_response.
+    /// In phase 2, it will extract the blocks and feed it into handle_multiblock_response.
+    pub fn handle_block_response(&mut self, from: PeerId, response: BlockResponse) -> Result<()> {
+        // ...
+        match self.state {
+            // Phase 1
+            // Phase 2
+            _ => {
+                tracing::debug!(
+                    "sync::HandleBlockResponse : from={from} response={:?}",
+                    response
+                );
+            }
+        }
+        Ok(())
+    }
+
     /// Handle a response to a metadata request.
     ///
     /// This is the first step in the syncing algorithm, where we receive a set of metadata and use it to
@@ -451,12 +491,12 @@ impl Sync {
         }
 
         // Check the linkage of the returned chain
-        let SyncState::Phase1(p1_hash, p1_num) = self.state else {
+        let SyncState::Phase1(meta) = &self.state else {
             anyhow::bail!("sync::MetadataResponse : invalid state");
         };
 
-        let mut block_hash = p1_hash;
-        let mut block_num = p1_num;
+        let mut block_hash = meta.parent_hash;
+        let mut block_num = meta.block_number;
         for meta in response.iter() {
             // check that the block hash and number is as expected.
             if meta.block_hash != Hash::ZERO
@@ -482,13 +522,11 @@ impl Sync {
         let segment = response;
 
         // Record landmark, including peer that has this set of blocks
-        self.chain_segments.push((from, p1_hash, p1_num));
+        self.chain_segments.push((from, meta.clone()));
 
         // Record the oldest block in the chain's parent
-        self.state = SyncState::Phase1(
-            segment.last().unwrap().parent_hash,
-            segment.last().unwrap().block_number,
-        );
+        self.state = SyncState::Phase1(segment.last().cloned().unwrap());
+        let last_block_hash = segment.last().as_ref().unwrap().block_hash;
 
         tracing::info!(
             "sync::MetadataResponse : received {} metadata segment #{} from {}",
@@ -498,7 +536,6 @@ impl Sync {
         );
 
         // Record the actual chain metadata
-        let last_block_hash = segment.last().as_ref().unwrap().block_hash;
         for meta in segment {
             if self.chain_metadata.insert(meta.block_hash, meta).is_some() {
                 anyhow::bail!("loop in chain!"); // there is a possible loop in the chain
@@ -565,7 +602,7 @@ impl Sync {
     /// This constructs a chain history by requesting blocks from a peer, going backwards from a given block.
     /// If phase 1 is in progress, it continues requesting blocks from the last known phase 1 block.
     /// Otherwise, it requests blocks from the given omega_block.
-    pub fn request_missing_metadata(&mut self, block: Option<(Hash, u64)>) -> Result<()> {
+    pub fn request_missing_metadata(&mut self, meta: Option<ChainMetaData>) -> Result<()> {
         if matches!(self.state, SyncState::Phase2(_)) || matches!(self.state, SyncState::Phase3) {
             anyhow::bail!("sync::RequestMissingMetadata : invalid state");
         }
@@ -591,17 +628,20 @@ impl Sync {
 
         if let Some(peer) = self.get_next_peer() {
             let message = match self.state {
-                SyncState::Phase1(hash, _) => ExternalMessage::MetaDataRequest(RequestBlock {
-                    request_at: SystemTime::now(),
-                    from_hash: hash,
-                    batch_size: self.max_batch_size,
-                }),
-                SyncState::Phase0 if block.is_some() => {
-                    let (hash, number) = block.unwrap();
-                    self.state = SyncState::Phase1(hash, number);
+                SyncState::Phase1(ChainMetaData { parent_hash, .. }) => {
                     ExternalMessage::MetaDataRequest(RequestBlock {
                         request_at: SystemTime::now(),
-                        from_hash: hash,
+                        from_hash: parent_hash,
+                        batch_size: self.max_batch_size,
+                    })
+                }
+                SyncState::Phase0 if meta.is_some() => {
+                    let meta = meta.unwrap();
+                    self.state = SyncState::Phase1(meta.clone());
+                    let ChainMetaData { parent_hash, .. } = meta;
+                    ExternalMessage::MetaDataRequest(RequestBlock {
+                        request_at: SystemTime::now(),
+                        from_hash: parent_hash,
                         batch_size: self.max_batch_size,
                     })
                 }
@@ -688,6 +728,16 @@ impl Sync {
     /// Downgrade a peer based on the response received.
     fn done_with_peer(&mut self, downgrade: DownGrade) {
         if let Some(mut peer) = self.in_flight.take() {
+            // TODO: Double-check version logic
+            peer.version = match downgrade {
+                // a V1 will not respond with anything to a V2 request.
+                DownGrade::Timeout if matches!(peer.version, PeerVer::V2) => PeerVer::V1,
+                // a V2 will respond with availability = None to a V1 request.
+                DownGrade::Unavailable if matches!(peer.version, PeerVer::V1) => PeerVer::V2,
+                // Otherwise, maintain
+                _ => peer.version,
+            };
+
             // Downgrade peer, if necessary
             peer.score = peer.score.saturating_add(downgrade as u32);
             // Ensure that the next peer is equal or better, to avoid a single source of truth.
@@ -703,6 +753,7 @@ impl Sync {
     pub fn add_peer(&mut self, peer: PeerId) {
         // new peers should be tried last, which gives them time to sync first.
         let new_peer = PeerInfo {
+            version: PeerVer::V2, // default V2
             score: self.peers.iter().map(|p| p.score).max().unwrap_or_default(),
             peer_id: peer,
             last_used: Instant::now(),
@@ -727,11 +778,12 @@ impl Sync {
     }
 }
 
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, Eq, PartialEq)]
 struct PeerInfo {
     score: u32,
     peer_id: PeerId,
     last_used: Instant,
+    version: PeerVer,
 }
 
 impl Ord for PeerInfo {
@@ -753,6 +805,7 @@ impl PartialOrd for PeerInfo {
 #[derive(Debug)]
 enum DownGrade {
     None,
+    Unavailable,
     Partial,
     Timeout,
     Empty,
@@ -762,7 +815,14 @@ enum DownGrade {
 #[derive(Debug)]
 enum SyncState {
     Phase0,
-    Phase1(Hash, u64),
+    Phase1(ChainMetaData),
     Phase2(Hash),
     Phase3,
 }
+
+/// Peer Version
+#[derive(Debug, Clone, Eq, PartialEq)]
+enum PeerVer {
+    V1,
+    V2,
+}

From b643541f6419ed9a1de5091562162965006d6f6c Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 9 Jan 2025 19:08:43 +0800
Subject: [PATCH 052/119] feat: make sync compatible with older nodes.

---
 zilliqa/src/node.rs |   4 +-
 zilliqa/src/sync.rs | 114 +++++++++++++++++++++++++++++++++-----------
 2 files changed, 90 insertions(+), 28 deletions(-)

diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 558cab097..410d29c21 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -275,6 +275,7 @@ impl Node {
                 self.request_responses
                     .send((response_channel, ExternalMessage::Acknowledgement))?;
             }
+            // RFC-161 sync algorithm, phase 2.
             ExternalMessage::MultiBlockRequest(request) => {
                 let message = self
                     .consensus
@@ -282,6 +283,7 @@ impl Node {
                     .handle_multiblock_request(from, request)?;
                 self.request_responses.send((response_channel, message))?;
             }
+            // RFC-161 sync algorithm, phase 1.
             ExternalMessage::MetaDataRequest(request) => {
                 let message = self.consensus.sync.handle_metadata_request(from, request)?;
                 self.request_responses.send((response_channel, message))?;
@@ -291,7 +293,7 @@ impl Node {
             ExternalMessage::InjectedProposal(p) => {
                 self.handle_injected_proposal(from, p)?;
             }
-            // Respond negatively to old block requests
+            // Respond negatively to block request from old nodes
             ExternalMessage::BlockRequest(req) => {
                 let message = ExternalMessage::BlockResponse(BlockResponse {
                     availability: None,
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 7d27c237c..56333424d 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -14,8 +14,8 @@ use crate::{
     crypto::Hash,
     db::Db,
     message::{
-        Block, BlockResponse, ChainMetaData, ExternalMessage, InjectedProposal, Proposal,
-        RequestBlock,
+        Block, BlockRequest, BlockResponse, ChainMetaData, ExternalMessage, InjectedProposal,
+        Proposal, RequestBlock,
     },
     node::MessageSender,
     time::SystemTime,
@@ -78,7 +78,7 @@ pub struct Sync {
     // complete chain metadata, in-memory
     chain_metadata: BTreeMap<Hash, ChainMetaData>,
     // markers to segments in the chain, and the source peer for that segment.
-    chain_segments: Vec<(PeerId, ChainMetaData)>,
+    chain_segments: Vec<(PeerInfo, ChainMetaData)>,
     // fixed-size queue of the most recent proposals
     recent_proposals: VecDeque<Proposal>,
 }
@@ -223,7 +223,7 @@ impl Sync {
         }
 
         // remove the last segment from the chain metadata
-        let (peer, meta) = self.chain_segments.pop().unwrap();
+        let (peer_info, meta) = self.chain_segments.pop().unwrap();
         let mut key = meta.parent_hash;
         while let Some(p) = self.chain_metadata.remove(&key) {
             key = p.parent_hash;
@@ -231,8 +231,9 @@ impl Sync {
 
         // allow retry from p1
         tracing::info!(
-            "sync::RetryPhase1 : retrying block {} from {peer}",
-            meta.parent_hash
+            "sync::RetryPhase1 : retrying block {} from {}",
+            meta.parent_hash,
+            peer_info.peer_id,
         );
         self.state = SyncState::Phase1(meta);
         if DO_SPECULATIVE {
@@ -276,12 +277,12 @@ impl Sync {
         );
 
         // Spurious response
-        let Some((peer_id, meta)) = self.chain_segments.last() else {
+        let Some((peer_info, meta)) = self.chain_segments.last() else {
             anyhow::bail!("sync::MultiBlockResponse: no more chain_segments!");
         };
 
         // If the response is not from the expected peer, retry phase 2.
-        if *peer_id != from {
+        if peer_info.peer_id != from {
             tracing::warn!("sync::MultiBlockResponse: unknown peer {from}, will retry");
             return Ok(());
         }
@@ -399,12 +400,15 @@ impl Sync {
         // will be re-inserted below
         if let Some(peer) = self.get_next_peer() {
             // If we have no chain_segments, we have nothing to do
-            if let Some((peer_id, meta)) = self.chain_segments.last() {
+            if let Some((peer_info, meta)) = self.chain_segments.last() {
+                let to_view = meta.view_number.saturating_sub(1);
+                let mut from_view = meta.view_number;
                 let mut request_hashes = Vec::with_capacity(self.max_batch_size);
                 let mut key = meta.parent_hash; // start from this block
                 while let Some(meta) = self.chain_metadata.remove(&key) {
                     request_hashes.push(meta.block_hash);
                     key = meta.parent_hash;
+                    from_view = meta.view_number;
                     self.chain_metadata.insert(meta.block_hash, meta); // reinsert, for retries
                 }
 
@@ -422,19 +426,33 @@ impl Sync {
                     "sync::RequestMissingBlocks : requesting {} blocks of segment #{} from {}",
                     request_hashes.len(),
                     self.chain_segments.len(),
-                    peer_id,
+                    peer_info.peer_id,
                 );
-                self.message_sender.send_external_message(
-                    *peer_id,
-                    ExternalMessage::MultiBlockRequest(request_hashes),
-                )?;
+
                 self.peers.push(peer); // reinsert peer, as we will be using a faux peer below
-                self.in_flight = Some(PeerInfo {
-                    version: PeerVer::V2,
-                    peer_id: *peer_id,
-                    last_used: std::time::Instant::now(),
-                    score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
-                });
+
+                let message = match peer_info.version {
+                    PeerVer::V2 => {
+                        self.in_flight = Some(PeerInfo {
+                            version: PeerVer::V2,
+                            peer_id: peer_info.peer_id,
+                            last_used: std::time::Instant::now(),
+                            score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
+                        });
+                        ExternalMessage::MultiBlockRequest(request_hashes)
+                    }
+                    PeerVer::V1 => {
+                        self.in_flight = Some(PeerInfo {
+                            version: PeerVer::V1,
+                            peer_id: peer_info.peer_id,
+                            last_used: std::time::Instant::now(),
+                            score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
+                        });
+                        ExternalMessage::BlockRequest(BlockRequest { to_view, from_view })
+                    }
+                };
+                self.message_sender
+                    .send_external_message(peer_info.peer_id, message)?;
             } else {
                 // No more chain_segments, we're done
                 self.peers.push(peer);
@@ -456,9 +474,32 @@ impl Sync {
         // ...
         match self.state {
             // Phase 1
+            SyncState::Phase1(_) => {
+                // TODO: Should be buffer the proposals? Probably not!
+                let metadata = response
+                    .proposals
+                    .into_iter()
+                    .sorted_by(|a, b| b.view().cmp(&a.view()))
+                    .map(|p| ChainMetaData {
+                        block_hash: p.hash(),
+                        parent_hash: p.header.qc.block_hash,
+                        block_number: p.number(),
+                        view_number: p.view(),
+                    })
+                    .collect_vec();
+                self.handle_metadata_response(from, metadata)?;
+            }
             // Phase 2
+            SyncState::Phase2(_) => {
+                let multi_blocks = response
+                    .proposals
+                    .into_iter()
+                    .sorted_by(|a, b| b.view().cmp(&a.view()))
+                    .collect_vec();
+                self.handle_multiblock_response(from, multi_blocks)?;
+            }
             _ => {
-                tracing::debug!(
+                tracing::error!(
                     "sync::HandleBlockResponse : from={from} response={:?}",
                     response
                 );
@@ -476,6 +517,7 @@ impl Sync {
         from: PeerId,
         response: Vec<ChainMetaData>,
     ) -> Result<()> {
+        let segment_peer = self.in_flight.as_ref().unwrap().clone();
         // Process whatever we have received.
         if response.is_empty() {
             // Empty response, downgrade peer and retry with a new peer.
@@ -522,7 +564,7 @@ impl Sync {
         let segment = response;
 
         // Record landmark, including peer that has this set of blocks
-        self.chain_segments.push((from, meta.clone()));
+        self.chain_segments.push((segment_peer, meta.clone()));
 
         // Record the oldest block in the chain's parent
         self.state = SyncState::Phase1(segment.last().cloned().unwrap());
@@ -628,26 +670,44 @@ impl Sync {
 
         if let Some(peer) = self.get_next_peer() {
             let message = match self.state {
-                SyncState::Phase1(ChainMetaData { parent_hash, .. }) => {
+                SyncState::Phase1(ChainMetaData { parent_hash, .. })
+                    if matches!(peer.version, PeerVer::V2) =>
+                {
                     ExternalMessage::MetaDataRequest(RequestBlock {
                         request_at: SystemTime::now(),
                         from_hash: parent_hash,
                         batch_size: self.max_batch_size,
                     })
                 }
-                SyncState::Phase0 if meta.is_some() => {
+                SyncState::Phase1(ChainMetaData { view_number, .. })
+                    if matches!(peer.version, PeerVer::V1) =>
+                {
+                    ExternalMessage::BlockRequest(BlockRequest {
+                        to_view: view_number.saturating_sub(1), // we want the parent i.e. earlier view
+                        from_view: view_number.saturating_sub(self.max_batch_size as u64),
+                    })
+                }
+                SyncState::Phase0 if meta.is_some() && matches!(peer.version, PeerVer::V2) => {
                     let meta = meta.unwrap();
-                    self.state = SyncState::Phase1(meta.clone());
-                    let ChainMetaData { parent_hash, .. } = meta;
+                    let parent_hash = meta.parent_hash;
+                    self.state = SyncState::Phase1(meta);
                     ExternalMessage::MetaDataRequest(RequestBlock {
                         request_at: SystemTime::now(),
                         from_hash: parent_hash,
                         batch_size: self.max_batch_size,
                     })
                 }
+                SyncState::Phase0 if meta.is_some() && matches!(peer.version, PeerVer::V1) => {
+                    let meta = meta.unwrap();
+                    let view_number = meta.view_number;
+                    self.state = SyncState::Phase1(meta);
+                    ExternalMessage::BlockRequest(BlockRequest {
+                        to_view: view_number.saturating_sub(1), // we want the parent i.e. earlier view
+                        from_view: view_number.saturating_sub(self.max_batch_size as u64),
+                    })
+                }
                 _ => anyhow::bail!("sync::MissingMetadata : invalid state"),
             };
-
             tracing::info!(
                 ?message,
                 "sync::RequestMissingMetadata : requesting missing chain from {}",

From feecd20a3ad56d2f0363f1691246bade88a86324 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 9 Jan 2025 20:21:19 +0800
Subject: [PATCH 053/119] feat: default to V1 peer; upgrade to V2 peer upon
 getting invalid response.

---
 zilliqa/src/message.rs |   4 +-
 zilliqa/src/node.rs    |   5 +-
 zilliqa/src/sync.rs    | 104 ++++++++++++++++++++++-------------------
 3 files changed, 61 insertions(+), 52 deletions(-)

diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index 2826190d5..c9a758a31 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -228,7 +228,7 @@ impl fmt::Debug for BlockResponse {
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct RequestBlock {
+pub struct BlockRequestV2 {
     pub request_at: SystemTime,
     pub from_hash: Hash,
     pub batch_size: usize,
@@ -286,7 +286,7 @@ pub enum ExternalMessage {
     AddPeer,
     RemovePeer,
     InjectedProposal(InjectedProposal),
-    MetaDataRequest(RequestBlock),
+    MetaDataRequest(BlockRequestV2),
     MetaDataResponse(Vec<ChainMetaData>),
     MultiBlockRequest(Vec<Hash>),
     MultiBlockResponse(Vec<Proposal>),
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 410d29c21..589d7065c 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -294,11 +294,12 @@ impl Node {
                 self.handle_injected_proposal(from, p)?;
             }
             // Respond negatively to block request from old nodes
-            ExternalMessage::BlockRequest(req) => {
+            ExternalMessage::BlockRequest(_) => {
+                // respond with an invalid response
                 let message = ExternalMessage::BlockResponse(BlockResponse {
                     availability: None,
                     proposals: vec![],
-                    from_view: req.from_view,
+                    from_view: u64::MAX,
                 });
                 self.request_responses.send((response_channel, message))?;
             }
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 56333424d..5a688ef72 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -14,8 +14,8 @@ use crate::{
     crypto::Hash,
     db::Db,
     message::{
-        Block, BlockRequest, BlockResponse, ChainMetaData, ExternalMessage, InjectedProposal,
-        Proposal, RequestBlock,
+        Block, BlockRequest, BlockRequestV2, BlockResponse, ChainMetaData, ExternalMessage,
+        InjectedProposal, Proposal,
     },
     node::MessageSender,
     time::SystemTime,
@@ -51,7 +51,10 @@ use crate::{
 // 4. If it does, we inject the entire queue into the pipeline.
 // 5. We are caught up.
 
-const DO_SPECULATIVE: bool = false; // Speeds up syncing by speculatively fetching blocks.
+#[cfg(debug_assertions)]
+const DO_SPECULATIVE: bool = false;
+#[cfg(not(debug_assertions))]
+const DO_SPECULATIVE: bool = true; // Speeds up syncing by speculatively fetching blocks.
 
 #[derive(Debug)]
 pub struct Sync {
@@ -81,6 +84,8 @@ pub struct Sync {
     chain_segments: Vec<(PeerInfo, ChainMetaData)>,
     // fixed-size queue of the most recent proposals
     recent_proposals: VecDeque<Proposal>,
+    // for statistics only
+    inject_at: Option<(std::time::Instant, usize)>,
 }
 
 impl Sync {
@@ -93,7 +98,7 @@ impl Sync {
         let peers = peers
             .into_iter()
             .map(|peer_id| PeerInfo {
-                version: PeerVer::V2, // default to V2 peer
+                version: PeerVer::V1, // default to V1 peer, until otherwise proven.
                 score: 0,
                 peer_id,
                 last_used: Instant::now(),
@@ -117,6 +122,7 @@ impl Sync {
             chain_segments: Vec::new(),
             state: SyncState::Phase0,
             recent_proposals: VecDeque::with_capacity(max_batch_size),
+            inject_at: None,
         })
     }
 
@@ -215,10 +221,11 @@ impl Sync {
     /// Retry phase 1
     ///
     /// If something went wrong, phase 1 may need to be retried for the most recent segment.
-    /// Pop the segment from the landmark, and continue phase 1.
+    /// Pop the segment from the segment marker, and continue phase 1.
     fn retry_phase1(&mut self) -> Result<()> {
         if self.chain_segments.is_empty() {
             tracing::error!("sync::RetryPhase1 : cannot retry phase 1 without chain_segments!");
+            self.state = SyncState::Phase0;
             return Ok(());
         }
 
@@ -277,26 +284,16 @@ impl Sync {
         );
 
         // Spurious response
-        let Some((peer_info, meta)) = self.chain_segments.last() else {
+        let Some((peer_info, _)) = self.chain_segments.last() else {
             anyhow::bail!("sync::MultiBlockResponse: no more chain_segments!");
         };
 
-        // If the response is not from the expected peer, retry phase 2.
+        // If the response is not from the expected peer e.g. delayed response, retry phase 2.
         if peer_info.peer_id != from {
             tracing::warn!("sync::MultiBlockResponse: unknown peer {from}, will retry");
             return Ok(());
         }
 
-        // Segment history does not match, retry phase 1.
-        let prop_hash = response.first().as_ref().unwrap().hash();
-        if meta.parent_hash != prop_hash {
-            tracing::error!(
-                "sync::MultiBlockResponse : mismatched landmark {} != {prop_hash}",
-                meta.parent_hash
-            );
-            return self.retry_phase1();
-        }
-
         // If the checksum does not match, retry phase 1. Maybe the node has pruned the segment.
         let checksum = response
             .iter()
@@ -373,7 +370,7 @@ impl Sync {
     ///
     /// It constructs a set of hashes, which constitute the series of blocks that are missing.
     /// These hashes are then sent to a Peer for retrieval.
-    /// This is Part 2 of the syncing algorithm.
+    /// This is phase 2 of the syncing algorithm.
     fn request_missing_blocks(&mut self) -> Result<()> {
         if !matches!(self.state, SyncState::Phase2(_)) {
             anyhow::bail!("sync::RequestMissingBlocks : invalid state");
@@ -399,6 +396,9 @@ impl Sync {
 
         // will be re-inserted below
         if let Some(peer) = self.get_next_peer() {
+            // reinsert peer, as we will use a faux peer below, to force the request to go to the original responder
+            self.peers.push(peer);
+
             // If we have no chain_segments, we have nothing to do
             if let Some((peer_info, meta)) = self.chain_segments.last() {
                 let to_view = meta.view_number.saturating_sub(1);
@@ -429,8 +429,6 @@ impl Sync {
                     peer_info.peer_id,
                 );
 
-                self.peers.push(peer); // reinsert peer, as we will be using a faux peer below
-
                 let message = match peer_info.version {
                     PeerVer::V2 => {
                         self.in_flight = Some(PeerInfo {
@@ -453,9 +451,6 @@ impl Sync {
                 };
                 self.message_sender
                     .send_external_message(peer_info.peer_id, message)?;
-            } else {
-                // No more chain_segments, we're done
-                self.peers.push(peer);
             }
         } else {
             tracing::warn!(
@@ -468,14 +463,26 @@ impl Sync {
     /// Handle a V1 block response
     ///
     /// This will be called during both Phase 1 & Phase 2 block responses.
+    /// If the response if from a V2 peer, it will upgrade that peer to V2.
     /// In phase 1, it will extract the metadata and feed it into handle_metadata_response.
     /// In phase 2, it will extract the blocks and feed it into handle_multiblock_response.
     pub fn handle_block_response(&mut self, from: PeerId, response: BlockResponse) -> Result<()> {
-        // ...
+        // Upgrade to V2 peer.
+        if response.availability.is_none()
+            && response.proposals.is_empty()
+            && response.from_view == u64::MAX
+        {
+            tracing::info!("sync::HandleBlockResponse : upgrading {from} to V2",);
+            self.in_flight.as_mut().unwrap().version = PeerVer::V2;
+            self.done_with_peer(DownGrade::None);
+            return Ok(());
+        }
+
+        // Convert the V1 response into a V2 response.
         match self.state {
             // Phase 1
             SyncState::Phase1(_) => {
-                // TODO: Should be buffer the proposals? Probably not!
+                // We do not buffer the proposals, as it takes 250MB/day!
                 let metadata = response
                     .proposals
                     .into_iter()
@@ -491,11 +498,12 @@ impl Sync {
             }
             // Phase 2
             SyncState::Phase2(_) => {
-                let multi_blocks = response
+                let mut multi_blocks = response
                     .proposals
                     .into_iter()
                     .sorted_by(|a, b| b.view().cmp(&a.view()))
                     .collect_vec();
+                multi_blocks.retain(|p| self.chain_metadata.contains_key(&p.hash()));
                 self.handle_multiblock_response(from, multi_blocks)?;
             }
             _ => {
@@ -511,7 +519,8 @@ impl Sync {
     /// Handle a response to a metadata request.
     ///
     /// This is the first step in the syncing algorithm, where we receive a set of metadata and use it to
-    /// construct a chain history.
+    /// construct a chain history. We check that the metadata does indeed constitute a chain. If it does,
+    /// we record its segment marker and store the entire chain in-memory.
     pub fn handle_metadata_response(
         &mut self,
         from: PeerId,
@@ -603,7 +612,7 @@ impl Sync {
     pub fn handle_metadata_request(
         &mut self,
         from: PeerId,
-        request: RequestBlock,
+        request: BlockRequestV2,
     ) -> Result<ExternalMessage> {
         tracing::debug!(
             "sync::MetadataRequest : received a metadata request from {}",
@@ -643,7 +652,7 @@ impl Sync {
     ///
     /// This constructs a chain history by requesting blocks from a peer, going backwards from a given block.
     /// If phase 1 is in progress, it continues requesting blocks from the last known phase 1 block.
-    /// Otherwise, it requests blocks from the given omega_block.
+    /// Otherwise, it requests blocks from the given starting metadata.
     pub fn request_missing_metadata(&mut self, meta: Option<ChainMetaData>) -> Result<()> {
         if matches!(self.state, SyncState::Phase2(_)) || matches!(self.state, SyncState::Phase3) {
             anyhow::bail!("sync::RequestMissingMetadata : invalid state");
@@ -660,7 +669,7 @@ impl Sync {
                 return Ok(());
             }
         } else if self.in_pipeline > self.max_batch_size {
-            // anything more than this and we cannot check whether the segment hits history
+            // anything more than this and we cannot be sure whether the segment hits history
             tracing::warn!(
                 "sync::RequestMissingMetadata :  syncing {} blocks in pipeline",
                 self.in_pipeline
@@ -673,7 +682,7 @@ impl Sync {
                 SyncState::Phase1(ChainMetaData { parent_hash, .. })
                     if matches!(peer.version, PeerVer::V2) =>
                 {
-                    ExternalMessage::MetaDataRequest(RequestBlock {
+                    ExternalMessage::MetaDataRequest(BlockRequestV2 {
                         request_at: SystemTime::now(),
                         from_hash: parent_hash,
                         batch_size: self.max_batch_size,
@@ -691,7 +700,7 @@ impl Sync {
                     let meta = meta.unwrap();
                     let parent_hash = meta.parent_hash;
                     self.state = SyncState::Phase1(meta);
-                    ExternalMessage::MetaDataRequest(RequestBlock {
+                    ExternalMessage::MetaDataRequest(BlockRequestV2 {
                         request_at: SystemTime::now(),
                         from_hash: parent_hash,
                         batch_size: self.max_batch_size,
@@ -727,14 +736,22 @@ impl Sync {
 
     /// Inject the proposals into the chain.
     ///
-    /// Besides pumping the set of Proposals into the processing pipeline, it also records the
-    /// last known Proposal in the pipeline. This is used for speculative fetches, and also for
-    /// knowing where to continue fetching from.
+    /// It adds the list of proposals into the pipeline for execution.
+    /// It also outputs some syncing statistics.
     fn inject_proposals(&mut self, proposals: Vec<Proposal>) -> Result<()> {
         if proposals.is_empty() {
             return Ok(());
         }
 
+        // Output some stats
+        if let Some((when, injected)) = self.inject_at {
+            tracing::debug!(
+                "sync::InjectProposals : synced {}/{:?}",
+                injected - self.in_pipeline,
+                when.elapsed()
+            );
+        }
+
         // Increment proposals injected
         self.in_pipeline = self.in_pipeline.saturating_add(proposals.len());
         let len = proposals.len();
@@ -742,7 +759,7 @@ impl Sync {
         // Just pump the Proposals back to ourselves.
         for p in proposals {
             tracing::trace!(
-                "Injecting proposal number: {} hash: {}",
+                "sync::InjectProposals : injecting number: {} hash: {}",
                 p.number(),
                 p.hash(),
             );
@@ -756,6 +773,8 @@ impl Sync {
             )?;
         }
 
+        self.inject_at = Some((std::time::Instant::now(), self.in_pipeline));
+
         tracing::debug!(
             "sync::InjectProposals : injected {}/{} proposals",
             len,
@@ -788,16 +807,6 @@ impl Sync {
     /// Downgrade a peer based on the response received.
     fn done_with_peer(&mut self, downgrade: DownGrade) {
         if let Some(mut peer) = self.in_flight.take() {
-            // TODO: Double-check version logic
-            peer.version = match downgrade {
-                // a V1 will not respond with anything to a V2 request.
-                DownGrade::Timeout if matches!(peer.version, PeerVer::V2) => PeerVer::V1,
-                // a V2 will respond with availability = None to a V1 request.
-                DownGrade::Unavailable if matches!(peer.version, PeerVer::V1) => PeerVer::V2,
-                // Otherwise, maintain
-                _ => peer.version,
-            };
-
             // Downgrade peer, if necessary
             peer.score = peer.score.saturating_add(downgrade as u32);
             // Ensure that the next peer is equal or better, to avoid a single source of truth.
@@ -813,7 +822,7 @@ impl Sync {
     pub fn add_peer(&mut self, peer: PeerId) {
         // new peers should be tried last, which gives them time to sync first.
         let new_peer = PeerInfo {
-            version: PeerVer::V2, // default V2
+            version: PeerVer::V1, // default V2
             score: self.peers.iter().map(|p| p.score).max().unwrap_or_default(),
             peer_id: peer,
             last_used: Instant::now(),
@@ -865,7 +874,6 @@ impl PartialOrd for PeerInfo {
 #[derive(Debug)]
 enum DownGrade {
     None,
-    Unavailable,
     Partial,
     Timeout,
     Empty,

From ad4cc3bae3734163cec2c62efdf023c7c0f7614a Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 10 Jan 2025 10:05:35 +0800
Subject: [PATCH 054/119] feat: filter V1 responses for gaps and forks.

---
 zilliqa/src/sync.rs | 53 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 13 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 5a688ef72..e64794b12 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -401,7 +401,7 @@ impl Sync {
 
             // If we have no chain_segments, we have nothing to do
             if let Some((peer_info, meta)) = self.chain_segments.last() {
-                let to_view = meta.view_number.saturating_sub(1);
+                let to_view = meta.view_number.saturating_add(Self::VIEW_DRIFT);
                 let mut from_view = meta.view_number;
                 let mut request_hashes = Vec::with_capacity(self.max_batch_size);
                 let mut key = meta.parent_hash; // start from this block
@@ -460,6 +460,11 @@ impl Sync {
         Ok(())
     }
 
+    // we request a little more than we need, due to drift
+    // 10 ~ 1min
+    // 20 ~ 1hr
+    const VIEW_DRIFT: u64 = 10;
+
     /// Handle a V1 block response
     ///
     /// This will be called during both Phase 1 & Phase 2 block responses.
@@ -478,15 +483,33 @@ impl Sync {
             return Ok(());
         }
 
+        if response.proposals.is_empty() {
+            tracing::info!("sync::HandleBlockResponse : empty V1 from {from}");
+            self.done_with_peer(DownGrade::Empty);
+            return Ok(());
+        }
+
         // Convert the V1 response into a V2 response.
         match self.state {
-            // Phase 1
-            SyncState::Phase1(_) => {
+            // Phase 1 - extract metadata from the set of proposals
+            SyncState::Phase1(ChainMetaData {
+                block_number,
+                mut parent_hash,
+                ..
+            }) => {
                 // We do not buffer the proposals, as it takes 250MB/day!
                 let metadata = response
                     .proposals
                     .into_iter()
-                    .sorted_by(|a, b| b.view().cmp(&a.view()))
+                    .filter(|p| p.number() < block_number) // filter extras
+                    .sorted_by(|a, b| b.number().cmp(&a.number()))
+                    .filter(|p| {
+                        if parent_hash != p.hash() {
+                            return false;
+                        }
+                        parent_hash = p.header.qc.block_hash;
+                        true
+                    }) // filter forks
                     .map(|p| ChainMetaData {
                         block_hash: p.hash(),
                         parent_hash: p.header.qc.block_hash,
@@ -494,16 +517,17 @@ impl Sync {
                         view_number: p.view(),
                     })
                     .collect_vec();
+
                 self.handle_metadata_response(from, metadata)?;
             }
-            // Phase 2
+            // Phase 2 - extract the requested blocks only
             SyncState::Phase2(_) => {
-                let mut multi_blocks = response
+                let multi_blocks = response
                     .proposals
                     .into_iter()
-                    .sorted_by(|a, b| b.view().cmp(&a.view()))
+                    .filter(|p| self.chain_metadata.contains_key(&p.hash())) // filter extras
+                    .sorted_by(|a, b| b.number().cmp(&a.number()))
                     .collect_vec();
-                multi_blocks.retain(|p| self.chain_metadata.contains_key(&p.hash()));
                 self.handle_multiblock_response(from, multi_blocks)?;
             }
             _ => {
@@ -560,7 +584,9 @@ impl Sync {
                 // TODO: possibly, discard and rebuild entire chain
                 // if something does not match, do nothing and retry the request with the next peer.
                 tracing::error!(
-                    "sync::MetadataResponse : retry metadata history for {block_hash}/{block_num}"
+                    "sync::MetadataResponse : retry metadata expected hash={block_hash} != {} num={block_num} != {}",
+                    meta.block_hash,
+                    meta.block_number,
                 );
                 return Ok(());
             }
@@ -692,7 +718,7 @@ impl Sync {
                     if matches!(peer.version, PeerVer::V1) =>
                 {
                     ExternalMessage::BlockRequest(BlockRequest {
-                        to_view: view_number.saturating_sub(1), // we want the parent i.e. earlier view
+                        to_view: view_number.saturating_add(Self::VIEW_DRIFT),
                         from_view: view_number.saturating_sub(self.max_batch_size as u64),
                     })
                 }
@@ -711,7 +737,7 @@ impl Sync {
                     let view_number = meta.view_number;
                     self.state = SyncState::Phase1(meta);
                     ExternalMessage::BlockRequest(BlockRequest {
-                        to_view: view_number.saturating_sub(1), // we want the parent i.e. earlier view
+                        to_view: view_number.saturating_add(Self::VIEW_DRIFT),
                         from_view: view_number.saturating_sub(self.max_batch_size as u64),
                     })
                 }
@@ -870,13 +896,14 @@ impl PartialOrd for PeerInfo {
     }
 }
 
-/// Peer downgrade states/values, for downgrading an internal peer from selection.
+/// For downgrading a peer from being selected in get_next_peer().
+/// Ordered by degree of offence i.e. None is good, Timeout is worst
 #[derive(Debug)]
 enum DownGrade {
     None,
     Partial,
-    Timeout,
     Empty,
+    Timeout,
 }
 
 /// Sync state

From ee3e322682ad67240e45089d919d0fc942a4f27d Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 10 Jan 2025 15:18:52 +0800
Subject: [PATCH 055/119] feat: working phase 1 with protomainnet.

---
 zilliqa/src/sync.rs | 91 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 82 insertions(+), 9 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index e64794b12..c0af4d5b3 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -3,8 +3,10 @@ use std::{
     collections::{BTreeMap, BinaryHeap, VecDeque},
     sync::Arc,
     time::{Duration, Instant},
+    u64,
 };
 
+use alloy::primitives::BlockNumber;
 use anyhow::Result;
 use itertools::Itertools;
 use libp2p::PeerId;
@@ -86,6 +88,8 @@ pub struct Sync {
     recent_proposals: VecDeque<Proposal>,
     // for statistics only
     inject_at: Option<(std::time::Instant, usize)>,
+    // record starting number, for eth_syncing() RPC call.
+    started_at_block_number: u64,
 }
 
 impl Sync {
@@ -123,6 +127,7 @@ impl Sync {
             state: SyncState::Phase0,
             recent_proposals: VecDeque::with_capacity(max_batch_size),
             inject_at: None,
+            started_at_block_number: u64::MIN,
         })
     }
 
@@ -158,6 +163,16 @@ impl Sync {
                         view_number,
                     };
                     self.request_missing_metadata(Some(meta))?;
+
+                    let highest_block = self
+                        .db
+                        .get_canonical_block_by_number(
+                            self.db
+                                .get_highest_canonical_block_number()?
+                                .expect("no highest block"),
+                        )?
+                        .expect("missing highest block");
+                    self.started_at_block_number = highest_block.number();
                 }
             }
             // Continue phase 1, until we hit history/genesis.
@@ -258,6 +273,19 @@ impl Sync {
         from: PeerId,
         response: Vec<Proposal>,
     ) -> Result<()> {
+        if let Some(peer) = self.in_flight.as_ref() {
+            if peer.peer_id != from {
+                tracing::warn!(
+                    "sync::MultiBlockResponse : unexpected peer={} != {from}",
+                    peer.peer_id
+                );
+                return Ok(());
+            }
+        } else {
+            tracing::warn!("sync::MultiBlockResponse : spurious response {from}");
+            return Ok(());
+        }
+
         // Process only a full response
         if response.is_empty() {
             // Empty response, downgrade peer and retry phase 1.
@@ -265,9 +293,9 @@ impl Sync {
             self.done_with_peer(DownGrade::Empty);
             return self.retry_phase1();
         } else if response.len() < self.max_batch_size {
-            // Partial response, downgrade peer but process the block.
+            // Partial response, process blocks.
             tracing::warn!("sync::MultiBlockResponse : partial blocks {from}",);
-            self.done_with_peer(DownGrade::Partial);
+            self.done_with_peer(DownGrade::None);
         } else {
             self.done_with_peer(DownGrade::None);
         }
@@ -550,6 +578,19 @@ impl Sync {
         from: PeerId,
         response: Vec<ChainMetaData>,
     ) -> Result<()> {
+        if let Some(peer) = self.in_flight.as_ref() {
+            if peer.peer_id != from {
+                tracing::warn!(
+                    "sync::MetadataResponse : unexpected peer={} != {from}",
+                    peer.peer_id
+                );
+                return Ok(());
+            }
+        } else {
+            tracing::warn!("sync::MetadataResponse : spurious response {from}");
+            return Ok(());
+        }
+
         let segment_peer = self.in_flight.as_ref().unwrap().clone();
         // Process whatever we have received.
         if response.is_empty() {
@@ -558,9 +599,9 @@ impl Sync {
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
         } else if response.len() < self.max_batch_size {
-            // Partial response, downgrade peer but process the response.
+            // Partial response, process the response.
             tracing::warn!("sync::MetadataResponse : partial blocks {from}",);
-            self.done_with_peer(DownGrade::Partial);
+            self.done_with_peer(DownGrade::None);
         } else {
             self.done_with_peer(DownGrade::None);
         }
@@ -704,6 +745,7 @@ impl Sync {
         }
 
         if let Some(peer) = self.get_next_peer() {
+            let peer_id = peer.peer_id;
             let message = match self.state {
                 SyncState::Phase1(ChainMetaData { parent_hash, .. })
                     if matches!(peer.version, PeerVer::V2) =>
@@ -746,12 +788,11 @@ impl Sync {
             tracing::info!(
                 ?message,
                 "sync::RequestMissingMetadata : requesting missing chain from {}",
-                peer.peer_id
+                peer_id
             );
-            self.message_sender
-                .send_external_message(peer.peer_id, message)?;
-
             self.in_flight = Some(peer);
+            self.message_sender
+                .send_external_message(peer_id, message)?;
         } else {
             tracing::warn!(
                 "sync::RequestMissingMetadata : insufficient peers to request missing blocks"
@@ -871,6 +912,39 @@ impl Sync {
         peer.last_used = std::time::Instant::now(); // used to determine stale in-flight requests.
         Some(peer)
     }
+
+    /// Returns (am_syncing, current_highest_block)
+    pub fn am_syncing(&self) -> Result<(bool, Block)> {
+        let highest_block = self
+            .db
+            .get_canonical_block_by_number(
+                self.db
+                    .get_highest_canonical_block_number()?
+                    .expect("no highest block"),
+            )?
+            .expect("missing highest block");
+        Ok((
+            self.in_pipeline > 0 || !matches!(self.state, SyncState::Phase0),
+            highest_block,
+        ))
+    }
+
+    // Returns (starting_block, current_block,  highest_block) if we're syncing,
+    // None if we're not.
+    pub fn get_sync_data(&self) -> Result<Option<(BlockNumber, BlockNumber, BlockNumber)>> {
+        let (flag, highest_block) = self.am_syncing()?;
+        if !flag {
+            Ok(None)
+        } else {
+            let highest_saved_block_number = highest_block.number();
+            let highest_block_number_seen = self.recent_proposals.back().unwrap().number();
+            Ok(Some((
+                self.started_at_block_number,
+                highest_saved_block_number,
+                highest_block_number_seen,
+            )))
+        }
+    }
 }
 
 #[derive(Debug, Clone, Eq, PartialEq)]
@@ -901,7 +975,6 @@ impl PartialOrd for PeerInfo {
 #[derive(Debug)]
 enum DownGrade {
     None,
-    Partial,
     Empty,
     Timeout,
 }

From 2e779e90d35d8e33d0ac4b102365c459df652215 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 10 Jan 2025 15:19:50 +0800
Subject: [PATCH 056/119] feat: removed sending BlockRequest from
 block_store.rs

---
 zilliqa/src/block_store.rs | 209 +------------------------------------
 zilliqa/src/sync.rs        |   5 +-
 2 files changed, 5 insertions(+), 209 deletions(-)

diff --git a/zilliqa/src/block_store.rs b/zilliqa/src/block_store.rs
index 32bf71a71..e17b60e96 100644
--- a/zilliqa/src/block_store.rs
+++ b/zilliqa/src/block_store.rs
@@ -388,8 +388,6 @@ pub struct BlockStore {
     peers: BTreeMap<PeerId, PeerInfo>,
     /// The maximum number of blocks to send requests for at a time.
     max_blocks_in_flight: u64,
-    /// When a request to a peer fails, do not send another request to this peer for this amount of time.
-    failed_request_sleep_duration: Duration,
     /// Our block strategies.
     strategies: Vec<BlockStrategy>,
     /// The block views we have available. This is read once from the DB at start-up and incrementally updated whenever
@@ -402,9 +400,6 @@ pub struct BlockStore {
     unserviceable_requests: Option<RangeMap>,
     message_sender: MessageSender,
 
-    /// Clock pointer - see request_blocks()
-    clock: usize,
-
     /// Where we last started syncing, so we can report it in get_sync_data()
     started_syncing_at: BlockNumber,
     /// Previous sync flag, so we can tell when it changes.
@@ -426,8 +421,6 @@ struct PeerInfo {
     availability: BlockAvailability,
     /// When did we last update availability?
     availability_updated_at: Option<SystemTime>,
-    /// Last availability query - don't send them too often.
-    availability_requested_at: Option<SystemTime>,
     /// Requests we've sent to the peer.
     pending_requests: HashMap<RequestId, (SystemTime, u64, u64)>,
     /// If `Some`, the time of the most recently failed request.
@@ -439,50 +432,10 @@ impl PeerInfo {
         Self {
             availability: BlockAvailability::new(),
             availability_updated_at: None,
-            availability_requested_at: None,
             pending_requests: HashMap::new(),
             last_request_failed_at: None,
         }
     }
-
-    /// Do we have availability, or should we get it again?
-    fn have_availability(&self) -> bool {
-        self.availability_updated_at.is_some()
-    }
-
-    /// Converts a set of block strategies into a rangemap
-    fn get_ranges(&self, max_view: Option<u64>) -> RangeMap {
-        let mut result = RangeMap::new();
-        if let Some(strat) = &self.availability.strategies {
-            let mut max_end: Option<u64> = None;
-            let mut last_n: Option<u64> = None;
-            for s in strat {
-                match s {
-                    BlockStrategy::CachedViewRange(views, until_view) => {
-                        if until_view.map_or(true, |x| self.availability.highest_known_view <= x) {
-                            result.with_range(views);
-                            max_end = Some(
-                                max_end.map_or(views.end - 1, |v| std::cmp::max(v, views.end - 1)),
-                            );
-                        }
-                    }
-                    BlockStrategy::Latest(n) => {
-                        last_n = Some(last_n.map_or(*n, |x| std::cmp::max(x, *n)));
-                    }
-                }
-            }
-            if let Some(the_n) = last_n {
-                if let Some(max_view_nr) = max_view {
-                    let start = max_view_nr.saturating_sub(the_n);
-                    result.with_range(&Range {
-                        start,
-                        end: max_view_nr,
-                    });
-                }
-            }
-        }
-        result
-    }
 }
 
 /// Data about a peer
@@ -568,13 +521,11 @@ impl BlockStore {
             highest_confirmed_view: 0,
             peers: BTreeMap::new(),
             max_blocks_in_flight: config.max_blocks_in_flight as u64,
-            failed_request_sleep_duration: config.failed_request_sleep_duration,
             strategies: vec![BlockStrategy::Latest(constants::RETAINS_LAST_N_BLOCKS)],
             available_blocks,
             buffered: BlockCache::new(config.max_blocks_in_flight as u64),
             unserviceable_requests: None,
             message_sender,
-            clock: 0,
             started_syncing_at: 0,
             last_sync_flag: false,
         })
@@ -604,13 +555,11 @@ impl BlockStore {
             highest_confirmed_view: 0,
             peers: BTreeMap::new(),
             max_blocks_in_flight: 0,
-            failed_request_sleep_duration: Duration::ZERO,
             strategies: self.strategies.clone(),
             available_blocks: RangeMap::new(),
             buffered: BlockCache::new(0),
             unserviceable_requests: None,
             message_sender: self.message_sender.clone(),
-            clock: 0,
             started_syncing_at: self.started_syncing_at,
             last_sync_flag: self.last_sync_flag,
         })
@@ -809,162 +758,8 @@ impl BlockStore {
     /// Make a request for the blocks associated with a range of views. Returns `true` if a request was made and `false` if the request had to be
     /// buffered because no peers were available.
     /// Public so we can trigger it from the debug API
-    pub fn request_blocks(&mut self, req: &RangeMap) -> Result<bool> {
-        let mut remain = req.clone();
-        let to = req.max();
-
-        // Prune the pending requests
-        self.prune_pending_requests()?;
-
-        // If it's in our input queue, don't expect it again.
-        let expected = self.buffered.expectant_block_ranges();
-        (_, remain) = remain.diff_inter(&expected);
-
-        // If it's already buffered, don't request it again - wait for us to reject it and
-        // then we can re-request.
-        let extant = self.buffered.extant_block_ranges();
-
-        (_, remain) = remain.diff_inter(&extant);
-        (_, remain) = remain.diff_inter(&self.buffered.empty_view_ranges);
-
-        // If it's in flight, don't request it again.
-        let mut in_flight = RangeMap::new();
-        for peer in self.peers.values() {
-            for (_, start, end) in peer.pending_requests.values() {
-                in_flight.with_range(&Range {
-                    start: *start,
-                    end: end + 1,
-                });
-            }
-        }
-        (_, remain) = remain.diff_inter(&in_flight);
-
-        let now = SystemTime::now();
-        let failed_request_sleep_duration = self.failed_request_sleep_duration;
-
-        // If everything we have is in flight, we'll skip trying to request them (or update availability)
-        if remain.is_empty() {
-            trace!("block_store::request_blocks() : .. no non in_flight requests. Returning early");
-            return Ok(true);
-        }
-
-        for chance in 0..2 {
-            trace!(
-                "block_store::request_blocks() : chance = {chance} clock = {} peers = {}",
-                self.clock,
-                self.peers.len()
-            );
-            // There may be no peers ...
-            self.clock = (self.clock + 1) % std::cmp::max(1, self.peers.len());
-            // Slightly horrid - generate a list of peers which is the BTreeMap's list, shifted by clock.
-            let peers = self
-                .peers
-                .keys()
-                .skip(self.clock)
-                .chain(self.peers.keys().take(self.clock))
-                .cloned()
-                .collect::<Vec<PeerId>>();
-
-            for peer in &peers {
-                debug!("block_store::request_blocks() : considering peer = {peer}");
-                // If the last request failed < 10s or so ago, skip this peer, unless we're second-chance in
-                // which case, hey, why not?
-                let (requests, rem, query_availability) = {
-                    let peer_info = self.peer_info(*peer);
-                    if chance == 0
-                        && !peer_info
-                            .last_request_failed_at
-                            .and_then(|at| at.elapsed().ok())
-                            .map(|time_since| time_since > failed_request_sleep_duration)
-                            .unwrap_or(true)
-                    {
-                        trace!("block_store::request_blocks() : .. Last request failed; skipping this peer");
-                        continue;
-                    }
-
-                    if peer_info.pending_requests.len()
-                        >= constants::MAX_PENDING_BLOCK_REQUESTS_PER_PEER
-                    {
-                        trace!(
-                            "block_store::request_blocks() : .. Skipping peer {peer} - too many pending requests {0}",
-                            peer_info.pending_requests.len()
-                        );
-                        continue;
-                    }
-                    // Split ..
-                    let left = constants::MAX_PENDING_BLOCK_REQUESTS_PER_PEER
-                        - peer_info.pending_requests.len();
-                    let ranges = peer_info.get_ranges(to);
-                    let (req, rem) = remain.diff_inter_limited(&ranges, Some(left));
-                    // If we are not about to make a request, and we do not have recent availability then
-                    // make a synthetic request to get that availability.
-                    let query_availability = req.is_empty()
-                        && peer_info.pending_requests.is_empty()
-                        && (!peer_info.have_availability()
-                            || peer_info.availability_requested_at.map_or(true, |x| {
-                                x.elapsed()
-                                    .map(|v| {
-                                        v > constants::REQUEST_PEER_VIEW_AVAILABILITY_NOT_BEFORE
-                                    })
-                                    .unwrap_or(true)
-                            }));
-                    (req, rem, query_availability)
-                };
-
-                let mut request_sent = false;
-                // Send all requests now ..
-                for request in requests.ranges.iter() {
-                    if !request.is_empty() {
-                        trace!(
-                            "block_store::request_blocks() : peer = {:?} request = {:?}: sending block request",
-                            peer,
-                            request,
-                        );
-                        // Yay!
-                        let message = ExternalMessage::BlockRequest(BlockRequest {
-                            from_view: request.start,
-                            to_view: request.end,
-                        });
-                        let request_id =
-                            self.message_sender.send_external_message(*peer, message)?;
-                        self.peer_info(*peer)
-                            .pending_requests
-                            .insert(request_id, (now, request.start, request.end));
-                        request_sent = true;
-                    }
-                }
-                // If we haven't got recent availability, and we haven't already asked for it, ask ..
-                if !request_sent && chance == 0 && query_availability {
-                    trace!("block_store::request_blocks() : Querying availability");
-                    // Executive decision: Don't ask for any blocks here, because we are about to do so in duplicate
-                    // later and we don't want to duplicate work - you could viably go for a slightly faster
-                    // sync by just asking for all the blocks and letting the peer send what it has.
-                    let message = ExternalMessage::BlockRequest(BlockRequest {
-                        from_view: 0,
-                        to_view: 0,
-                    });
-                    let peer_info = self.peer_info(*peer);
-                    peer_info.availability_requested_at = Some(now);
-                    let _ = self.message_sender.send_external_message(*peer, message);
-                }
-
-                // We only need to request stuff from peers if we haven't already done so.
-                remain = rem;
-            }
-        }
-        trace!("block_store::request_blocks() : all done");
-        if !remain.is_empty() {
-            warn!(
-                "block_store::request_blocks() : Could not find peers for views {:?}",
-                remain
-            );
-            if let Some(us) = &mut self.unserviceable_requests {
-                us.with_range_map(&remain);
-            } else {
-                self.unserviceable_requests = Some(remain);
-            }
-        }
-        Ok(true)
+    pub fn request_blocks(&mut self, _req: &RangeMap) -> Result<bool> {
+        Ok(false) // FIXME: Stub
     }
 
     pub fn get_block(&self, hash: Hash) -> Result<Option<Block>> {
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index c0af4d5b3..cfc6140bc 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -3,7 +3,6 @@ use std::{
     collections::{BTreeMap, BinaryHeap, VecDeque},
     sync::Arc,
     time::{Duration, Instant},
-    u64,
 };
 
 use alloy::primitives::BlockNumber;
@@ -924,7 +923,9 @@ impl Sync {
             )?
             .expect("missing highest block");
         Ok((
-            self.in_pipeline > 0 || !matches!(self.state, SyncState::Phase0),
+            !self.chain_metadata.is_empty()
+                || !self.chain_segments.is_empty()
+                || !self.recent_proposals.is_empty(),
             highest_block,
         ))
     }

From d213e263e004a072c1e9595afd847c1537d1021b Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 10 Jan 2025 16:32:15 +0800
Subject: [PATCH 057/119] chore: comments, cleanup.

---
 zilliqa/src/sync.rs | 145 +++++++++++++++++++++++++-------------------
 1 file changed, 81 insertions(+), 64 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index cfc6140bc..c300780ef 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -29,33 +29,29 @@ use crate::{
 //
 // PHASE 1: Request missing chain metadata.
 // The entire chain metadata is stored in-memory, and is used to construct a chain of metadata.
+// Each metadata basically contains the block_hash, block_number, parent_hash, and view_number.
 // 1. We start with the latest Proposal and request the chain of metadata from a peer.
 // 2. We construct the chain of metadata, based on the response received.
-// 3. If the last block does not exist in our canonical history, we request for additional metadata.
-// 4. If the last block exists, we have hit our canonical history.
-// 5. Move to Phase 2.
+// 3. If the last block does not exist in our history, we request for additional metadata.
+// 4. If the last block exists, we have hit our history, we move to Phase 2.
 //
 // PHASE 2: Request missing blocks.
-// Once the chain metadata is constructed, we request the missing blocks to replay the history.
+// Once the chain metadata is constructed, we fill in the missing blocks to replay the history.
+// We do not make any judgements (other than sanity) on the block and leave that up to consensus.
 // 1. We construct a set of hashes, from the in-memory chain metadata.
-// 2. We send these block hashes to the same Peer (that sent the metadata) for retrieval.
-// 3. We inject the Proposals into the pipeline, when the response is received.
-// 4. If there are still missing blocks, we ask for more, from 1.
-// 5. If there are no more missing blocks, we have filled up all blocks from the chain metadata.
-// 6. Ready for Phase 3.
+// 2. We request these blocks from the same Peer that sent the metadata.
+// 3. We inject the received Proposals into the pipeline.
+// 4. If there are still missing blocks, we ask for more.
+// 5. If there are no more missing blocks, we move to Phase 3.
 //
 // PHASE 3: Zip it up.
-// Phase 1&2 may run several times that brings up 99% of the chain. This closes the final gap.
-// 1. We queue all newly received Proposals, while Phase 1 & 2 were in progress.
-// 2. We check the head of the queue if its parent exists in our canonical history.
-// 3. If it does not, we trigger Phase 1&2.
+// Phase 1&2 may run several times and bring up 99% of the chain, but it will never catch up.
+// This closes the final gap.
+// 1. We queue all recently received Proposals, while Phase 1 & 2 were in progress.
+// 2. We check the head of the queue, if its parent exists in our history.
+// 3. If it does not, our history is too far away, we run Phase 1 again.
 // 4. If it does, we inject the entire queue into the pipeline.
-// 5. We are caught up.
-
-#[cfg(debug_assertions)]
-const DO_SPECULATIVE: bool = false;
-#[cfg(not(debug_assertions))]
-const DO_SPECULATIVE: bool = true; // Speeds up syncing by speculatively fetching blocks.
+// 5. We are fully synced.
 
 #[derive(Debug)]
 pub struct Sync {
@@ -92,6 +88,20 @@ pub struct Sync {
 }
 
 impl Sync {
+    // Speed up syncing by speculatively fetching blocks in Phase 1 & 2.
+    #[cfg(not(debug_assertions))]
+    const DO_SPECULATIVE: bool = true;
+    #[cfg(debug_assertions)]
+    const DO_SPECULATIVE: bool = false;
+
+    // For V1 BlockRequest, we request a little more than we need, due to drift
+    // Since the view number is an 'internal' clock, it is possible for the same block number
+    // to have different view numbers.
+    // 10 ~ 1-min
+    // 20 ~ 1-hr
+    // 30 ~ 2-days
+    const VIEW_DRIFT: u64 = 10;
+
     pub fn new(
         config: &NodeConfig,
         db: Arc<Db>,
@@ -108,7 +118,9 @@ impl Sync {
             })
             .collect();
         let peer_id = message_sender.our_peer_id;
-        let max_batch_size = config.block_request_batch_size.clamp(30, 180); // 30-180 sec of blocks at a time.
+        let max_batch_size = config
+            .block_request_batch_size
+            .clamp(Self::VIEW_DRIFT as usize * 2, 180); // up to 180 sec of blocks at a time.
         let max_blocks_in_flight = config.max_blocks_in_flight.clamp(max_batch_size, 1800); // up to 30-mins worth of blocks in-pipeline.
 
         Ok(Self {
@@ -130,12 +142,12 @@ impl Sync {
         })
     }
 
-    /// Sync a block proposal.
+    /// Phase 0: Sync a block proposal.
     ///
     /// This is the main entry point for syncing a block proposal.
     /// We start by enqueuing all proposals, and then check if the parent block exists in history.
-    /// If the parent block exists, we do nothing. Ttherwise, we check the oldest one in the queue.
-    /// If we find its parent in history, we inject the entire queue.
+    /// If the parent block exists, we do nothing. Otherwise, we check the least recent one.
+    /// If we find its parent in history, we inject the entire queue. Otherwise, we start syncing.
     ///
     /// We do not perform checks on the Proposal here. This is done in the consensus layer.
     pub fn sync_proposal(&mut self, proposal: Proposal) -> Result<()> {
@@ -232,10 +244,13 @@ impl Sync {
         }
     }
 
-    /// Retry phase 1
+    /// Phase 2: Retry Phase 1
     ///
-    /// If something went wrong, phase 1 may need to be retried for the most recent segment.
-    /// Pop the segment from the segment marker, and continue phase 1.
+    /// If something went wrong in Phase 2, Phase 1 may need to be retried for the recently used segment.
+    /// Things that could go wrong e.g. the peer went offline, the peer pruned history, etc.
+    ///
+    /// Pop the most recently used segment from the segment marker, and retry phase 1.
+    /// This will rebuild history from the previous marker, with another peer.
     fn retry_phase1(&mut self) -> Result<()> {
         if self.chain_segments.is_empty() {
             tracing::error!("sync::RetryPhase1 : cannot retry phase 1 without chain_segments!");
@@ -250,22 +265,22 @@ impl Sync {
             key = p.parent_hash;
         }
 
-        // allow retry from p1
+        // retry from Phase 1
         tracing::info!(
             "sync::RetryPhase1 : retrying block {} from {}",
             meta.parent_hash,
             peer_info.peer_id,
         );
         self.state = SyncState::Phase1(meta);
-        if DO_SPECULATIVE {
+        if Self::DO_SPECULATIVE {
             self.request_missing_metadata(None)?;
         }
         Ok(())
     }
 
-    /// Handle a multi-block response.
+    /// Phase 2: Handle a multi-block response.
     ///
-    /// This is phase 2 in the syncing algorithm, where we receive a set of blocks and inject them into the pipeline.
+    /// This is Phase 2 in the syncing algorithm, where we receive a set of blocks and inject them into the pipeline.
     /// We also remove the blocks from the chain metadata, because they are now in the pipeline.
     pub fn handle_multiblock_response(
         &mut self,
@@ -354,7 +369,7 @@ impl Sync {
         // Done with phase 2
         if self.chain_segments.is_empty() {
             self.state = SyncState::Phase3;
-        } else if DO_SPECULATIVE {
+        } else if Self::DO_SPECULATIVE {
             // Speculatively request more blocks
             self.request_missing_blocks()?;
         }
@@ -393,7 +408,7 @@ impl Sync {
         Ok(message)
     }
 
-    /// Request missing blocks from the chain.
+    /// Phase 2: Request missing blocks from the chain.
     ///
     /// It constructs a set of hashes, which constitute the series of blocks that are missing.
     /// These hashes are then sent to a Peer for retrieval.
@@ -487,14 +502,8 @@ impl Sync {
         Ok(())
     }
 
-    // we request a little more than we need, due to drift
-    // 10 ~ 1min
-    // 20 ~ 1hr
-    const VIEW_DRIFT: u64 = 10;
-
-    /// Handle a V1 block response
+    /// Phase 1 / 2: Handle a V1 block response
     ///
-    /// This will be called during both Phase 1 & Phase 2 block responses.
     /// If the response if from a V2 peer, it will upgrade that peer to V2.
     /// In phase 1, it will extract the metadata and feed it into handle_metadata_response.
     /// In phase 2, it will extract the blocks and feed it into handle_multiblock_response.
@@ -510,6 +519,7 @@ impl Sync {
             return Ok(());
         }
 
+        // Downgrade empty responses
         if response.proposals.is_empty() {
             tracing::info!("sync::HandleBlockResponse : empty V1 from {from}");
             self.done_with_peer(DownGrade::Empty);
@@ -518,25 +528,28 @@ impl Sync {
 
         // Convert the V1 response into a V2 response.
         match self.state {
-            // Phase 1 - extract metadata from the set of proposals
+            // Phase 1 - construct the metadata chain from the set of received proposals
             SyncState::Phase1(ChainMetaData {
                 block_number,
                 mut parent_hash,
                 ..
             }) => {
                 // We do not buffer the proposals, as it takes 250MB/day!
+                // Instead, we will re-request the proposals again, in Phase 2.
                 let metadata = response
                     .proposals
                     .into_iter()
-                    .filter(|p| p.number() < block_number) // filter extras
+                    // filter extras due to drift
+                    .filter(|p| p.number() < block_number)
                     .sorted_by(|a, b| b.number().cmp(&a.number()))
+                    // filter any forks
                     .filter(|p| {
                         if parent_hash != p.hash() {
                             return false;
                         }
                         parent_hash = p.header.qc.block_hash;
                         true
-                    }) // filter forks
+                    })
                     .map(|p| ChainMetaData {
                         block_hash: p.hash(),
                         parent_hash: p.header.qc.block_hash,
@@ -547,14 +560,17 @@ impl Sync {
 
                 self.handle_metadata_response(from, metadata)?;
             }
-            // Phase 2 - extract the requested blocks only
+
+            // Phase 2 - extract the requested proposals only.
             SyncState::Phase2(_) => {
                 let multi_blocks = response
                     .proposals
                     .into_iter()
-                    .filter(|p| self.chain_metadata.contains_key(&p.hash())) // filter extras
+                    // filter any blocks that are not needed
+                    .filter(|p| self.chain_metadata.contains_key(&p.hash()))
                     .sorted_by(|a, b| b.number().cmp(&a.number()))
                     .collect_vec();
+
                 self.handle_multiblock_response(from, multi_blocks)?;
             }
             _ => {
@@ -567,17 +583,18 @@ impl Sync {
         Ok(())
     }
 
-    /// Handle a response to a metadata request.
+    /// Phase 1: Handle a response to a metadata request.
     ///
     /// This is the first step in the syncing algorithm, where we receive a set of metadata and use it to
-    /// construct a chain history. We check that the metadata does indeed constitute a chain. If it does,
-    /// we record its segment marker and store the entire chain in-memory.
+    /// construct a chain history. We check that the metadata does indeed constitute a segment of a chain.
+    /// If it does, we record its segment marker and store the entire chain in-memory.
     pub fn handle_metadata_response(
         &mut self,
         from: PeerId,
         response: Vec<ChainMetaData>,
     ) -> Result<()> {
-        if let Some(peer) = self.in_flight.as_ref() {
+        // Check for expected response
+        let segment_peer = if let Some(peer) = self.in_flight.as_ref() {
             if peer.peer_id != from {
                 tracing::warn!(
                     "sync::MetadataResponse : unexpected peer={} != {from}",
@@ -585,12 +602,13 @@ impl Sync {
                 );
                 return Ok(());
             }
+            peer.clone()
         } else {
+            // We ignore any responses that arrived late, since the original request has already 'timed-out'.
             tracing::warn!("sync::MetadataResponse : spurious response {from}");
             return Ok(());
-        }
+        };
 
-        let segment_peer = self.in_flight.as_ref().unwrap().clone();
         // Process whatever we have received.
         if response.is_empty() {
             // Empty response, downgrade peer and retry with a new peer.
@@ -624,7 +642,7 @@ impl Sync {
                 // TODO: possibly, discard and rebuild entire chain
                 // if something does not match, do nothing and retry the request with the next peer.
                 tracing::error!(
-                    "sync::MetadataResponse : retry metadata expected hash={block_hash} != {} num={block_num} != {}",
+                    "sync::MetadataResponse : unexpected metadata hash={block_hash} != {}, num={block_num} != {}",
                     meta.block_hash,
                     meta.block_number,
                 );
@@ -652,18 +670,17 @@ impl Sync {
             from
         );
 
-        // Record the actual chain metadata
+        // Record the constructed chain metadata, check for loops
         for meta in segment {
             if self.chain_metadata.insert(meta.block_hash, meta).is_some() {
-                anyhow::bail!("loop in chain!"); // there is a possible loop in the chain
+                anyhow::bail!("sync::MetadataResponse : loop in chain!"); // there is a possible loop in the chain
             }
         }
 
-        // If the segment does not link to our canonical history, fire the next request
+        // If the segment hits our history, start Phase 2.
         if self.db.get_block_by_hash(&last_block_hash)?.is_some() {
-            // Hit our internal history. Next, phase 2.
             self.state = SyncState::Phase2(Hash::ZERO);
-        } else if DO_SPECULATIVE {
+        } else if Self::DO_SPECULATIVE {
             self.request_missing_metadata(None)?;
         }
 
@@ -714,10 +731,10 @@ impl Sync {
         Ok(message)
     }
 
-    /// Request missing chain from a peer.
+    /// Phase 1: Request chain metadata from a peer.
     ///
     /// This constructs a chain history by requesting blocks from a peer, going backwards from a given block.
-    /// If phase 1 is in progress, it continues requesting blocks from the last known phase 1 block.
+    /// If Phase 1 is in progress, it continues requesting blocks from the last known Phase 1 block.
     /// Otherwise, it requests blocks from the given starting metadata.
     pub fn request_missing_metadata(&mut self, meta: Option<ChainMetaData>) -> Result<()> {
         if matches!(self.state, SyncState::Phase2(_)) || matches!(self.state, SyncState::Phase3) {
@@ -800,7 +817,7 @@ impl Sync {
         Ok(())
     }
 
-    /// Inject the proposals into the chain.
+    /// Phase 2 / 3: Inject the proposals into the chain.
     ///
     /// It adds the list of proposals into the pipeline for execution.
     /// It also outputs some syncing statistics.
@@ -852,7 +869,7 @@ impl Sync {
 
     /// Mark a received proposal
     ///
-    /// Mark a proposal as received, and remove it from the cache.
+    /// Mark a proposal as received, and remove it from the chain.
     pub fn mark_received_proposal(&mut self, prop: &InjectedProposal) -> Result<()> {
         if prop.from != self.peer_id {
             tracing::error!(
@@ -873,7 +890,6 @@ impl Sync {
     /// Downgrade a peer based on the response received.
     fn done_with_peer(&mut self, downgrade: DownGrade) {
         if let Some(mut peer) = self.in_flight.take() {
-            // Downgrade peer, if necessary
             peer.score = peer.score.saturating_add(downgrade as u32);
             // Ensure that the next peer is equal or better, to avoid a single source of truth.
             peer.score = peer.score.max(self.peers.peek().unwrap().score);
@@ -886,10 +902,10 @@ impl Sync {
 
     /// Add a peer to the list of peers.
     pub fn add_peer(&mut self, peer: PeerId) {
-        // new peers should be tried last, which gives them time to sync first.
+        // new peers should be tried later, which gives them time to sync first.
         let new_peer = PeerInfo {
             version: PeerVer::V1, // default V2
-            score: self.peers.iter().map(|p| p.score).max().unwrap_or_default(),
+            score: self.peers.iter().map(|p| p.score).min().unwrap_or_default(),
             peer_id: peer,
             last_used: Instant::now(),
         };
@@ -901,6 +917,7 @@ impl Sync {
         self.peers.retain(|p| p.peer_id != peer);
     }
 
+    /// Get the next best peer to use
     fn get_next_peer(&mut self) -> Option<PeerInfo> {
         // Minimum of 2 peers to avoid single source of truth.
         if self.peers.len() < 2 {
@@ -908,7 +925,7 @@ impl Sync {
         }
 
         let mut peer = self.peers.pop()?;
-        peer.last_used = std::time::Instant::now(); // used to determine stale in-flight requests.
+        peer.last_used = std::time::Instant::now(); // used to determine stale requests.
         Some(peer)
     }
 

From 3f44c0a3b951de448043c0d3b299478a104f5247 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 10 Jan 2025 17:15:40 +0800
Subject: [PATCH 058/119] fix: correct Phase 2 range, the stored value is
 accurate.

---
 zilliqa/src/sync.rs | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index c300780ef..86f66c8c5 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -306,10 +306,6 @@ impl Sync {
             tracing::warn!("sync::MultiBlockResponse : empty blocks {from}",);
             self.done_with_peer(DownGrade::Empty);
             return self.retry_phase1();
-        } else if response.len() < self.max_batch_size {
-            // Partial response, process blocks.
-            tracing::warn!("sync::MultiBlockResponse : partial blocks {from}",);
-            self.done_with_peer(DownGrade::None);
         } else {
             self.done_with_peer(DownGrade::None);
         }
@@ -443,14 +439,12 @@ impl Sync {
 
             // If we have no chain_segments, we have nothing to do
             if let Some((peer_info, meta)) = self.chain_segments.last() {
-                let to_view = meta.view_number.saturating_add(Self::VIEW_DRIFT);
-                let mut from_view = meta.view_number;
+                // let mut from_view = meta.view_number;
                 let mut request_hashes = Vec::with_capacity(self.max_batch_size);
                 let mut key = meta.parent_hash; // start from this block
                 while let Some(meta) = self.chain_metadata.remove(&key) {
                     request_hashes.push(meta.block_hash);
                     key = meta.parent_hash;
-                    from_view = meta.view_number;
                     self.chain_metadata.insert(meta.block_hash, meta); // reinsert, for retries
                 }
 
@@ -488,7 +482,11 @@ impl Sync {
                             last_used: std::time::Instant::now(),
                             score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
                         });
-                        ExternalMessage::BlockRequest(BlockRequest { to_view, from_view })
+                        // do not add VIEW_DRIFT - the stored marker is accurate!
+                        ExternalMessage::BlockRequest(BlockRequest {
+                            to_view: meta.view_number.saturating_sub(1),
+                            from_view: meta.view_number.saturating_sub(self.max_batch_size as u64),
+                        })
                     }
                 };
                 self.message_sender
@@ -615,10 +613,6 @@ impl Sync {
             tracing::warn!("sync::MetadataResponse : empty blocks {from}",);
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
-        } else if response.len() < self.max_batch_size {
-            // Partial response, process the response.
-            tracing::warn!("sync::MetadataResponse : partial blocks {from}",);
-            self.done_with_peer(DownGrade::None);
         } else {
             self.done_with_peer(DownGrade::None);
         }

From 866794c23bfea5796c2503915891396bbed64bf6 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Sat, 11 Jan 2025 13:11:32 +0800
Subject: [PATCH 059/119] feat: ensure unique peers.

---
 zilliqa/src/sync.rs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 86f66c8c5..f876234e4 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -882,10 +882,13 @@ impl Sync {
     }
 
     /// Downgrade a peer based on the response received.
+    ///
+    /// This algorithm favours good peers that respond quickly (i.e. no timeout).
+    /// In most cases, it eventually degenerates into 2 sources - avoid a single source of truth.
     fn done_with_peer(&mut self, downgrade: DownGrade) {
         if let Some(mut peer) = self.in_flight.take() {
             peer.score = peer.score.saturating_add(downgrade as u32);
-            // Ensure that the next peer is equal or better, to avoid a single source of truth.
+            // Ensure that the next peer is equal or better
             peer.score = peer.score.max(self.peers.peek().unwrap().score);
             // Reinsert peers that are good
             if peer.score < u32::MAX {
@@ -896,7 +899,10 @@ impl Sync {
 
     /// Add a peer to the list of peers.
     pub fn add_peer(&mut self, peer: PeerId) {
-        // new peers should be tried later, which gives them time to sync first.
+        // ensure that it is unique - avoids single source of truth
+        self.remove_peer(peer);
+        // if the new peer is not synced, it will get downgraded to the back of heap.
+        // but by placing them at the back of the 'best' pack, we get to try them out soon.
         let new_peer = PeerInfo {
             version: PeerVer::V1, // default V2
             score: self.peers.iter().map(|p| p.score).min().unwrap_or_default(),
@@ -908,7 +914,7 @@ impl Sync {
 
     /// Remove a peer from the list of peers.
     pub fn remove_peer(&mut self, peer: PeerId) {
-        self.peers.retain(|p| p.peer_id != peer);
+        self.peers.retain(|p: &PeerInfo| p.peer_id != peer);
     }
 
     /// Get the next best peer to use

From b05f0987dacf5eeec802fa338a63a6b7de04b79c Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Sat, 11 Jan 2025 13:16:18 +0800
Subject: [PATCH 060/119] feat: output rate stats in block/s

---
 zilliqa/src/sync.rs | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index f876234e4..a860e765d 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -822,11 +822,9 @@ impl Sync {
 
         // Output some stats
         if let Some((when, injected)) = self.inject_at {
-            tracing::debug!(
-                "sync::InjectProposals : synced {}/{:?}",
-                injected - self.in_pipeline,
-                when.elapsed()
-            );
+            let diff = injected - self.in_pipeline;
+            let rate = diff as f32 / when.elapsed().as_secs_f32();
+            tracing::debug!("sync::InjectProposals : synced {} block/s", rate);
         }
 
         // Increment proposals injected

From 84900bf1eab451147a2b239de92c8fa2796a5be0 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Sat, 11 Jan 2025 23:12:36 +0800
Subject: [PATCH 061/119] feat: minor reorg, logging.

---
 zilliqa/src/sync.rs | 122 +++++++++++++++++++++-----------------------
 1 file changed, 59 insertions(+), 63 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index a860e765d..d853964c1 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -102,6 +102,9 @@ impl Sync {
     // 30 ~ 2-days
     const VIEW_DRIFT: u64 = 10;
 
+    // Minimum of 2 peers to avoid single source of truth.
+    const MIN_PEERS: usize = 2;
+
     pub fn new(
         config: &NodeConfig,
         db: Arc<Db>,
@@ -199,8 +202,10 @@ impl Sync {
                 let ancestor_hash = self.recent_proposals.front().unwrap().header.qc.block_hash;
                 if self.db.get_block_by_hash(&ancestor_hash)?.is_some() {
                     tracing::info!(
-                        "sync::SyncProposal : finishing up {} blocks for segment #0 from {ancestor_hash}",
-                        self.recent_proposals.len()
+                        "sync::SyncProposal : finishing {} blocks for segment #{} from {}",
+                        self.recent_proposals.len(),
+                        self.chain_segments.len(),
+                        self.peer_id,
                     );
                     // inject the proposals
                     let proposals = self.recent_proposals.drain(..).collect_vec();
@@ -253,24 +258,24 @@ impl Sync {
     /// This will rebuild history from the previous marker, with another peer.
     fn retry_phase1(&mut self) -> Result<()> {
         if self.chain_segments.is_empty() {
-            tracing::error!("sync::RetryPhase1 : cannot retry phase 1 without chain_segments!");
+            tracing::error!("sync::RetryPhase1 : cannot retry phase 1 without chain segments!");
             self.state = SyncState::Phase0;
             return Ok(());
         }
 
+        tracing::debug!(
+            "sync::RetryPhase1 : retrying segment #{}",
+            self.chain_segments.len(),
+        );
+
         // remove the last segment from the chain metadata
-        let (peer_info, meta) = self.chain_segments.pop().unwrap();
+        let (_, meta) = self.chain_segments.pop().unwrap();
         let mut key = meta.parent_hash;
         while let Some(p) = self.chain_metadata.remove(&key) {
             key = p.parent_hash;
         }
 
         // retry from Phase 1
-        tracing::info!(
-            "sync::RetryPhase1 : retrying block {} from {}",
-            meta.parent_hash,
-            peer_info.peer_id,
-        );
         self.state = SyncState::Phase1(meta);
         if Self::DO_SPECULATIVE {
             self.request_missing_metadata(None)?;
@@ -310,10 +315,6 @@ impl Sync {
             self.done_with_peer(DownGrade::None);
         }
 
-        let SyncState::Phase2(p2_hash) = self.state else {
-            anyhow::bail!("sync::MultiBlockResponse : invalid state");
-        };
-
         tracing::info!(
             "sync::MultiBlockResponse : received {} blocks for segment #{} from {}",
             response.len(),
@@ -321,18 +322,11 @@ impl Sync {
             from
         );
 
-        // Spurious response
-        let Some((peer_info, _)) = self.chain_segments.last() else {
-            anyhow::bail!("sync::MultiBlockResponse: no more chain_segments!");
+        // If the checksum does not match, retry phase 1. Maybe the node has pruned the segment.
+        let SyncState::Phase2(check_sum) = self.state else {
+            anyhow::bail!("sync::MultiBlockResponse : invalid state");
         };
 
-        // If the response is not from the expected peer e.g. delayed response, retry phase 2.
-        if peer_info.peer_id != from {
-            tracing::warn!("sync::MultiBlockResponse: unknown peer {from}, will retry");
-            return Ok(());
-        }
-
-        // If the checksum does not match, retry phase 1. Maybe the node has pruned the segment.
         let checksum = response
             .iter()
             .fold(Hash::builder().with(Hash::ZERO.as_bytes()), |sum, p| {
@@ -340,8 +334,10 @@ impl Sync {
             })
             .finalize();
 
-        if p2_hash != checksum {
-            tracing::error!("sync::MultiBlockResponse : mismatch history {checksum}");
+        if check_sum != checksum {
+            tracing::error!(
+                "sync::MultiBlockResponse : unexpected checksum={check_sum} != {checksum}"
+            );
             return self.retry_phase1();
         }
 
@@ -464,7 +460,6 @@ impl Sync {
                     self.chain_segments.len(),
                     peer_info.peer_id,
                 );
-
                 let message = match peer_info.version {
                     PeerVer::V2 => {
                         self.in_flight = Some(PeerInfo {
@@ -493,9 +488,7 @@ impl Sync {
                     .send_external_message(peer_info.peer_id, message)?;
             }
         } else {
-            tracing::warn!(
-                "sync::RequestMissingBlocks : insufficient peers to request missing blocks"
-            );
+            tracing::warn!("sync::RequestMissingBlocks : insufficient peers to handle request");
         }
         Ok(())
     }
@@ -511,7 +504,7 @@ impl Sync {
             && response.proposals.is_empty()
             && response.from_view == u64::MAX
         {
-            tracing::info!("sync::HandleBlockResponse : upgrading {from} to V2",);
+            tracing::info!("sync::HandleBlockResponse : upgrading {from}",);
             self.in_flight.as_mut().unwrap().version = PeerVer::V2;
             self.done_with_peer(DownGrade::None);
             return Ok(());
@@ -519,11 +512,16 @@ impl Sync {
 
         // Downgrade empty responses
         if response.proposals.is_empty() {
-            tracing::info!("sync::HandleBlockResponse : empty V1 from {from}");
+            tracing::info!("sync::HandleBlockResponse : empty response {from}");
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
         }
 
+        tracing::trace!(
+            "sync::HandleBlockResponse : received {} blocks from {from}",
+            response.proposals.len()
+        );
+
         // Convert the V1 response into a V2 response.
         match self.state {
             // Phase 1 - construct the metadata chain from the set of received proposals
@@ -564,7 +562,7 @@ impl Sync {
                 let multi_blocks = response
                     .proposals
                     .into_iter()
-                    // filter any blocks that are not needed
+                    // filter any blocks that are not in the chain e.g. forks
                     .filter(|p| self.chain_metadata.contains_key(&p.hash()))
                     .sorted_by(|a, b| b.number().cmp(&a.number()))
                     .collect_vec();
@@ -731,7 +729,7 @@ impl Sync {
     /// If Phase 1 is in progress, it continues requesting blocks from the last known Phase 1 block.
     /// Otherwise, it requests blocks from the given starting metadata.
     pub fn request_missing_metadata(&mut self, meta: Option<ChainMetaData>) -> Result<()> {
-        if matches!(self.state, SyncState::Phase2(_)) || matches!(self.state, SyncState::Phase3) {
+        if !matches!(self.state, SyncState::Phase1(_)) && !matches!(self.state, SyncState::Phase0) {
             anyhow::bail!("sync::RequestMissingMetadata : invalid state");
         }
         // Early exit if there's a request in-flight; and if it has not expired.
@@ -755,7 +753,12 @@ impl Sync {
         }
 
         if let Some(peer) = self.get_next_peer() {
-            let peer_id = peer.peer_id;
+            tracing::info!(
+                "sync::RequestMissingMetadata : requesting {} metadata of segment #{} from {}",
+                self.max_batch_size,
+                self.chain_segments.len() + 1,
+                peer.peer_id
+            );
             let message = match self.state {
                 SyncState::Phase1(ChainMetaData { parent_hash, .. })
                     if matches!(peer.version, PeerVer::V2) =>
@@ -795,18 +798,11 @@ impl Sync {
                 }
                 _ => anyhow::bail!("sync::MissingMetadata : invalid state"),
             };
-            tracing::info!(
-                ?message,
-                "sync::RequestMissingMetadata : requesting missing chain from {}",
-                peer_id
-            );
-            self.in_flight = Some(peer);
             self.message_sender
-                .send_external_message(peer_id, message)?;
+                .send_external_message(peer.peer_id, message)?;
+            self.in_flight = Some(peer);
         } else {
-            tracing::warn!(
-                "sync::RequestMissingMetadata : insufficient peers to request missing blocks"
-            );
+            tracing::warn!("sync::RequestMissingMetadata : insufficient peers to handle request");
         }
         Ok(())
     }
@@ -829,7 +825,11 @@ impl Sync {
 
         // Increment proposals injected
         self.in_pipeline = self.in_pipeline.saturating_add(proposals.len());
-        let len = proposals.len();
+        tracing::debug!(
+            "sync::InjectProposals : injecting {}/{} proposals",
+            proposals.len(),
+            self.in_pipeline
+        );
 
         // Just pump the Proposals back to ourselves.
         for p in proposals {
@@ -849,12 +849,6 @@ impl Sync {
         }
 
         self.inject_at = Some((std::time::Instant::now(), self.in_pipeline));
-
-        tracing::debug!(
-            "sync::InjectProposals : injected {}/{} proposals",
-            len,
-            self.in_pipeline
-        );
         // return last proposal
         Ok(())
     }
@@ -885,6 +879,7 @@ impl Sync {
     /// In most cases, it eventually degenerates into 2 sources - avoid a single source of truth.
     fn done_with_peer(&mut self, downgrade: DownGrade) {
         if let Some(mut peer) = self.in_flight.take() {
+            tracing::trace!("sync::DoneWithPeer {} {:?}", peer.peer_id, downgrade);
             peer.score = peer.score.saturating_add(downgrade as u32);
             // Ensure that the next peer is equal or better
             peer.score = peer.score.max(self.peers.peek().unwrap().score);
@@ -897,8 +892,6 @@ impl Sync {
 
     /// Add a peer to the list of peers.
     pub fn add_peer(&mut self, peer: PeerId) {
-        // ensure that it is unique - avoids single source of truth
-        self.remove_peer(peer);
         // if the new peer is not synced, it will get downgraded to the back of heap.
         // but by placing them at the back of the 'best' pack, we get to try them out soon.
         let new_peer = PeerInfo {
@@ -907,24 +900,27 @@ impl Sync {
             peer_id: peer,
             last_used: Instant::now(),
         };
+        tracing::trace!("sync::AddPeer {peer}");
+        // ensure that it is unique - avoids single source of truth
+        self.peers.retain(|p: &PeerInfo| p.peer_id != peer);
         self.peers.push(new_peer);
     }
 
     /// Remove a peer from the list of peers.
     pub fn remove_peer(&mut self, peer: PeerId) {
+        tracing::trace!("sync::RemovePeer {peer}");
         self.peers.retain(|p: &PeerInfo| p.peer_id != peer);
     }
 
     /// Get the next best peer to use
     fn get_next_peer(&mut self) -> Option<PeerInfo> {
-        // Minimum of 2 peers to avoid single source of truth.
-        if self.peers.len() < 2 {
-            return None;
+        if self.peers.len() >= Self::MIN_PEERS {
+            let mut peer = self.peers.pop()?;
+            peer.last_used = std::time::Instant::now(); // used to determine stale requests.
+            tracing::trace!("sync::GetNextPeer {} ({})", peer.peer_id, peer.score);
+            return Some(peer);
         }
-
-        let mut peer = self.peers.pop()?;
-        peer.last_used = std::time::Instant::now(); // used to determine stale requests.
-        Some(peer)
+        None
     }
 
     /// Returns (am_syncing, current_highest_block)
@@ -938,9 +934,9 @@ impl Sync {
             )?
             .expect("missing highest block");
         Ok((
-            !self.chain_metadata.is_empty()
-                || !self.chain_segments.is_empty()
-                || !self.recent_proposals.is_empty(),
+            !self.recent_proposals.is_empty()
+                || !self.chain_metadata.is_empty()
+                || !self.chain_segments.is_empty(),
             highest_block,
         ))
     }

From 805313eb272286d89d18ed1cd7b0ba5bbf280461 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Sun, 12 Jan 2025 21:23:15 +0800
Subject: [PATCH 062/119] feat: added saving of metadata/segments to DB -
 allows continuation.

---
 zilliqa/src/sync.rs | 143 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 138 insertions(+), 5 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index d853964c1..242209c71 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -9,6 +9,7 @@ use alloy::primitives::BlockNumber;
 use anyhow::Result;
 use itertools::Itertools;
 use libp2p::PeerId;
+use rusqlite::named_params;
 
 use crate::{
     cfg::NodeConfig,
@@ -126,6 +127,75 @@ impl Sync {
             .clamp(Self::VIEW_DRIFT as usize * 2, 180); // up to 180 sec of blocks at a time.
         let max_blocks_in_flight = config.max_blocks_in_flight.clamp(max_batch_size, 1800); // up to 30-mins worth of blocks in-pipeline.
 
+        // FIXME: Move these to db.rs later
+        db.with_sqlite_tx(|c| {
+            c.execute_batch(
+                "CREATE TABLE IF NOT EXISTS sync_data (
+                block_hash BLOB NOT NULL UNIQUE,
+                parent_hash BLOB NOT NULL,
+                block_number INTEGER NOT NULL PRIMARY KEY,
+                view_number INTEGER NOT NULL,
+                peer BLOB DEFAULT NULL
+            );",
+            )?;
+            Ok(())
+        })?;
+
+        // Restore metadata/segments
+        let mut metadata: BTreeMap<Hash, ChainMetaData> = BTreeMap::new();
+        let mut segments: Vec<(PeerInfo, ChainMetaData)> = Vec::new();
+
+        db.with_sqlite_tx(|c| {
+            let _ = c.prepare(
+                "SELECT parent_hash, block_hash, block_number, view_number, peer FROM sync_data ORDER BY rowid DESC",
+            )?
+            .query_map([], |row| {
+                let m = ChainMetaData{
+                    parent_hash: row.get_unwrap(0),
+                    block_hash: row.get_unwrap(1),
+                    block_number: row.get_unwrap(2),
+                    view_number: row.get_unwrap(3),
+                };
+                metadata.insert(m.block_hash, m.clone());
+
+                if let Ok(p) = row.get::<_, Vec<u8>>(4) {
+                    if let Ok(peer_id) = PeerId::from_bytes(&p) {
+                        segments.push((
+                            PeerInfo {
+                                version: PeerVer::V1,
+                                score: 0,
+                                peer_id,
+                                last_used: Instant::now(),
+                            },
+                            m.clone(),
+                        ));
+                    }
+                }
+
+                Ok(m)
+        })?.collect_vec();
+            Ok(())
+        })?;
+
+        // remove last segment
+        if let Some((_, meta)) = segments.pop() {
+            let mut key = meta.parent_hash;
+            while let Some(p) = metadata.remove(&key) {
+                key = p.parent_hash;
+            }
+        }
+
+        let state = if segments.is_empty() {
+            SyncState::Phase0
+        } else {
+            tracing::info!(
+                "sync::New : continue from segment #{} with {} metadata",
+                segments.len(),
+                metadata.len()
+            );
+            SyncState::Phase1(segments.last().as_ref().unwrap().1.clone())
+        };
+
         Ok(Self {
             db,
             message_sender,
@@ -136,15 +206,64 @@ impl Sync {
             max_blocks_in_flight,
             in_flight: None,
             in_pipeline: usize::MIN,
-            chain_metadata: BTreeMap::new(),
-            chain_segments: Vec::new(),
-            state: SyncState::Phase0,
+            chain_metadata: metadata,
+            chain_segments: segments,
+            state,
             recent_proposals: VecDeque::with_capacity(max_batch_size),
             inject_at: None,
             started_at_block_number: u64::MIN,
         })
     }
 
+    fn pop_segment(&self, meta: ChainMetaData) -> Result<()> {
+        self.db.with_sqlite_tx(|c| {
+            c.execute(
+                "UPDATE sync_data SET peer = NULL WHERE block_hash = :block_hash",
+                named_params! {
+                    ":block_hash": meta.block_hash,
+                },
+            )?;
+            Ok(())
+        })
+    }
+
+    fn push_segment(&self, peer: PeerInfo, meta: ChainMetaData) -> Result<()> {
+        self.db.with_sqlite_tx(|c| {
+            c.execute(
+                "UPDATE sync_data SET peer = :peer WHERE block_hash = :block_hash",
+                named_params! {
+                    ":peer": peer.peer_id.to_bytes(),
+                    ":block_hash": meta.block_hash,
+                },
+            )?;
+            Ok(())
+        })
+    }
+
+    // TODO: Move into db.rs, optimise
+    fn insert_metadata(&mut self, meta: ChainMetaData) -> Result<()> {
+        self.db.with_sqlite_tx(|c| {
+            c.execute(
+                "INSERT INTO sync_data (parent_hash, block_hash, block_number, view_number) VALUES (:parent_hash, :block_hash, :block_number, :view_number)",
+                named_params! {
+                    ":parent_hash": meta.parent_hash,
+                    ":block_hash": meta.block_hash,
+                    ":block_number": meta.block_number,
+                    ":view_number": meta.view_number,
+                },
+            )?;
+            Ok(())
+        })
+    }
+
+    // TODO: Move into db.rs, optimise
+    fn remove_metadata(&self, hash: Hash) -> Result<()> {
+        self.db.with_sqlite_tx(|c| {
+            c.execute("DELETE FROM sync_data WHERE block_hash = ?1", [hash])?;
+            Ok(())
+        })
+    }
+
     /// Phase 0: Sync a block proposal.
     ///
     /// This is the main entry point for syncing a block proposal.
@@ -270,9 +389,12 @@ impl Sync {
 
         // remove the last segment from the chain metadata
         let (_, meta) = self.chain_segments.pop().unwrap();
+        self.pop_segment(meta.clone())?;
+
         let mut key = meta.parent_hash;
         while let Some(p) = self.chain_metadata.remove(&key) {
             key = p.parent_hash;
+            self.remove_metadata(p.block_hash)?;
         }
 
         // retry from Phase 1
@@ -351,11 +473,15 @@ impl Sync {
         for p in &proposals {
             if self.chain_metadata.remove(&p.hash()).is_none() {
                 anyhow::bail!("missing chain data for proposal"); // this should never happen!
+            } else {
+                self.remove_metadata(p.hash())?;
             }
         }
 
         // Done with this segment
-        self.chain_segments.pop();
+        if let Some((_, meta)) = self.chain_segments.pop() {
+            self.pop_segment(meta)?;
+        }
         self.inject_proposals(proposals)?;
 
         // Done with phase 2
@@ -649,6 +775,7 @@ impl Sync {
         let segment = response;
 
         // Record landmark, including peer that has this set of blocks
+        self.push_segment(segment_peer.clone(), meta.clone())?;
         self.chain_segments.push((segment_peer, meta.clone()));
 
         // Record the oldest block in the chain's parent
@@ -664,8 +791,14 @@ impl Sync {
 
         // Record the constructed chain metadata, check for loops
         for meta in segment {
-            if self.chain_metadata.insert(meta.block_hash, meta).is_some() {
+            if self
+                .chain_metadata
+                .insert(meta.block_hash, meta.clone())
+                .is_some()
+            {
                 anyhow::bail!("sync::MetadataResponse : loop in chain!"); // there is a possible loop in the chain
+            } else {
+                self.insert_metadata(meta)?;
             }
         }
 

From f23e71b19df8494a7009cc60176cd38e1bf8010a Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Sun, 12 Jan 2025 21:49:30 +0800
Subject: [PATCH 063/119] feat: added stateful sync algorithm feature - can
 continue after restart.

---
 zilliqa/src/sync.rs | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 242209c71..c43e4e33b 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -142,8 +142,8 @@ impl Sync {
         })?;
 
         // Restore metadata/segments
-        let mut metadata: BTreeMap<Hash, ChainMetaData> = BTreeMap::new();
-        let mut segments: Vec<(PeerInfo, ChainMetaData)> = Vec::new();
+        let mut metadata = BTreeMap::new();
+        let mut segments = Vec::new();
 
         db.with_sqlite_tx(|c| {
             let _ = c.prepare(
@@ -182,13 +182,20 @@ impl Sync {
             let mut key = meta.parent_hash;
             while let Some(p) = metadata.remove(&key) {
                 key = p.parent_hash;
+                db.with_sqlite_tx(|c| {
+                    c.execute(
+                        "DELETE FROM sync_data WHERE block_hash = ?1",
+                        [p.block_hash],
+                    )?;
+                    Ok(())
+                })?;
             }
         }
 
         let state = if segments.is_empty() {
             SyncState::Phase0
         } else {
-            tracing::info!(
+            tracing::debug!(
                 "sync::New : continue from segment #{} with {} metadata",
                 segments.len(),
                 metadata.len()
@@ -196,6 +203,12 @@ impl Sync {
             SyncState::Phase1(segments.last().as_ref().unwrap().1.clone())
         };
 
+        let start_at = if let SyncState::Phase1(m) = &state {
+            m.block_number
+        } else {
+            u64::MIN
+        };
+
         Ok(Self {
             db,
             message_sender,
@@ -211,7 +224,7 @@ impl Sync {
             state,
             recent_proposals: VecDeque::with_capacity(max_batch_size),
             inject_at: None,
-            started_at_block_number: u64::MIN,
+            started_at_block_number: start_at,
         })
     }
 
@@ -244,7 +257,7 @@ impl Sync {
     fn insert_metadata(&mut self, meta: ChainMetaData) -> Result<()> {
         self.db.with_sqlite_tx(|c| {
             c.execute(
-                "INSERT INTO sync_data (parent_hash, block_hash, block_number, view_number) VALUES (:parent_hash, :block_hash, :block_number, :view_number)",
+                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number) VALUES (:parent_hash, :block_hash, :block_number, :view_number)",
                 named_params! {
                     ":parent_hash": meta.parent_hash,
                     ":block_hash": meta.block_hash,
@@ -561,13 +574,11 @@ impl Sync {
 
             // If we have no chain_segments, we have nothing to do
             if let Some((peer_info, meta)) = self.chain_segments.last() {
-                // let mut from_view = meta.view_number;
                 let mut request_hashes = Vec::with_capacity(self.max_batch_size);
                 let mut key = meta.parent_hash; // start from this block
-                while let Some(meta) = self.chain_metadata.remove(&key) {
+                while let Some(meta) = self.chain_metadata.get(&key) {
                     request_hashes.push(meta.block_hash);
                     key = meta.parent_hash;
-                    self.chain_metadata.insert(meta.block_hash, meta); // reinsert, for retries
                 }
 
                 // Checksum of the request hashes

From f2f57992a3e3ed38ae501a46dcc8839e7740acb0 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 13 Jan 2025 16:56:20 +0800
Subject: [PATCH 064/119] feat: rebuilt the algorithm to use DB for state,
 instead of in-memory.

---
 zilliqa/src/sync.rs | 339 ++++++++++++++++++++++++--------------------
 1 file changed, 185 insertions(+), 154 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index c43e4e33b..149f44e57 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -1,6 +1,6 @@
 use std::{
     cmp::Ordering,
-    collections::{BTreeMap, BinaryHeap, VecDeque},
+    collections::{BinaryHeap, VecDeque},
     sync::Arc,
     time::{Duration, Instant},
 };
@@ -9,7 +9,7 @@ use alloy::primitives::BlockNumber;
 use anyhow::Result;
 use itertools::Itertools;
 use libp2p::PeerId;
-use rusqlite::named_params;
+use rusqlite::{named_params, OptionalExtension};
 
 use crate::{
     cfg::NodeConfig,
@@ -76,10 +76,6 @@ pub struct Sync {
     peer_id: PeerId,
     // internal sync state
     state: SyncState,
-    // complete chain metadata, in-memory
-    chain_metadata: BTreeMap<Hash, ChainMetaData>,
-    // markers to segments in the chain, and the source peer for that segment.
-    chain_segments: Vec<(PeerInfo, ChainMetaData)>,
     // fixed-size queue of the most recent proposals
     recent_proposals: VecDeque<Proposal>,
     // for statistics only
@@ -127,7 +123,8 @@ impl Sync {
             .clamp(Self::VIEW_DRIFT as usize * 2, 180); // up to 180 sec of blocks at a time.
         let max_blocks_in_flight = config.max_blocks_in_flight.clamp(max_batch_size, 1800); // up to 30-mins worth of blocks in-pipeline.
 
-        // FIXME: Move these to db.rs later
+        // This DB could be left in-here as it is only used in this module
+        // TODO: Make this in-memory by exploiting SQLite TEMP tables i.e. CREATE TEMP TABLE
         db.with_sqlite_tx(|c| {
             c.execute_batch(
                 "CREATE TABLE IF NOT EXISTS sync_data (
@@ -136,77 +133,26 @@ impl Sync {
                 block_number INTEGER NOT NULL PRIMARY KEY,
                 view_number INTEGER NOT NULL,
                 peer BLOB DEFAULT NULL
-            );",
+            );
+            CREATE INDEX IF NOT EXISTS idx_sync_data ON sync_data(block_number) WHERE peer IS NOT NULL;",
             )?;
             Ok(())
         })?;
 
         // Restore metadata/segments
-        let mut metadata = BTreeMap::new();
-        let mut segments = Vec::new();
-
+        let mut segments = 0;
         db.with_sqlite_tx(|c| {
-            let _ = c.prepare(
-                "SELECT parent_hash, block_hash, block_number, view_number, peer FROM sync_data ORDER BY rowid DESC",
-            )?
-            .query_map([], |row| {
-                let m = ChainMetaData{
-                    parent_hash: row.get_unwrap(0),
-                    block_hash: row.get_unwrap(1),
-                    block_number: row.get_unwrap(2),
-                    view_number: row.get_unwrap(3),
-                };
-                metadata.insert(m.block_hash, m.clone());
-
-                if let Ok(p) = row.get::<_, Vec<u8>>(4) {
-                    if let Ok(peer_id) = PeerId::from_bytes(&p) {
-                        segments.push((
-                            PeerInfo {
-                                version: PeerVer::V1,
-                                score: 0,
-                                peer_id,
-                                last_used: Instant::now(),
-                            },
-                            m.clone(),
-                        ));
-                    }
-                }
-
-                Ok(m)
-        })?.collect_vec();
+            segments = c
+                .prepare_cached("SELECT COUNT(block_number) FROM sync_data WHERE peer IS NOT NULL")?
+                .query_row([], |row| row.get::<_, usize>(0))
+                .optional()?
+                .unwrap_or_default();
             Ok(())
         })?;
-
-        // remove last segment
-        if let Some((_, meta)) = segments.pop() {
-            let mut key = meta.parent_hash;
-            while let Some(p) = metadata.remove(&key) {
-                key = p.parent_hash;
-                db.with_sqlite_tx(|c| {
-                    c.execute(
-                        "DELETE FROM sync_data WHERE block_hash = ?1",
-                        [p.block_hash],
-                    )?;
-                    Ok(())
-                })?;
-            }
-        }
-
-        let state = if segments.is_empty() {
+        let state = if segments == 0 {
             SyncState::Phase0
         } else {
-            tracing::debug!(
-                "sync::New : continue from segment #{} with {} metadata",
-                segments.len(),
-                metadata.len()
-            );
-            SyncState::Phase1(segments.last().as_ref().unwrap().1.clone())
-        };
-
-        let start_at = if let SyncState::Phase1(m) = &state {
-            m.block_number
-        } else {
-            u64::MIN
+            SyncState::Retry1
         };
 
         Ok(Self {
@@ -219,45 +165,147 @@ impl Sync {
             max_blocks_in_flight,
             in_flight: None,
             in_pipeline: usize::MIN,
-            chain_metadata: metadata,
-            chain_segments: segments,
             state,
             recent_proposals: VecDeque::with_capacity(max_batch_size),
             inject_at: None,
-            started_at_block_number: start_at,
+            started_at_block_number: 0,
         })
     }
 
-    fn pop_segment(&self, meta: ChainMetaData) -> Result<()> {
+    /// Returns the number of stored segments
+    fn count_segments(&self) -> Result<usize> {
+        let mut segments = 0;
         self.db.with_sqlite_tx(|c| {
-            c.execute(
-                "UPDATE sync_data SET peer = NULL WHERE block_hash = :block_hash",
-                named_params! {
-                    ":block_hash": meta.block_hash,
+            segments = c
+                .prepare_cached("SELECT COUNT(block_number) FROM sync_data WHERE peer IS NOT NULL")?
+                .query_row([], |row| row.get(0))
+                .optional()?
+                .unwrap_or_default();
+            Ok(())
+        })?;
+        Ok(segments)
+    }
+
+    /// Checks if the stored metadata exists
+    fn contains_metadata(&self, hash: &Hash) -> Result<bool> {
+        let mut result = false;
+        self.db.with_sqlite_tx(|c| {
+            result = c
+                .prepare_cached("SELECT block_number FROM sync_data WHERE block_hash = ?1")?
+                .query_row([hash], |row| row.get::<_, u64>(0))
+                .optional()?
+                .is_some();
+            Ok(())
+        })?;
+        Ok(result)
+    }
+
+    /// Retrieves bulk metadata information from the given block_hash (inclusive)
+    fn get_segment(&self, hash: Hash) -> Result<Vec<Hash>> {
+        let mut hashes = Vec::with_capacity(self.max_batch_size);
+        let mut block_hash = hash;
+        self.db.with_sqlite_tx(|c| {
+            while let Some(parent_hash) = c
+                .prepare_cached("SELECT parent_hash FROM sync_data WHERE block_hash = ?1")?
+                .query_row([block_hash], |row| row.get::<_, Hash>(0))
+                .optional()?
+            {
+                hashes.push(block_hash);
+                block_hash = parent_hash;
+            }
+            Ok(())
+        })?;
+        Ok(hashes)
+    }
+
+    /// Peeks into the top of the segment stack.
+    fn last_segment(&self) -> Result<Option<(ChainMetaData, PeerInfo)>> {
+        let mut result = None;
+        self.db.with_sqlite_tx(|c| {
+            result = c
+                .prepare_cached("SELECT parent_hash, block_hash, block_number, view_number, peer FROM sync_data WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
+                .query_row([], |row| Ok((
+                    ChainMetaData{
+                    parent_hash: row.get(0)?,
+                    block_hash: row.get(1)?,
+                    block_number: row.get(2)?,
+                    view_number: row.get(3)?,
                 },
-            )?;
+                PeerInfo {
+                    last_used: Instant::now(),
+                    score:u32::MAX,
+                    version: PeerVer::V1,
+                    peer_id: PeerId::from_bytes(row.get::<_,Vec<u8>>(4)?.as_slice()).unwrap(),
+                },
+            )))
+                .optional()?;
+            Ok(())
+        })?;
+        Ok(result)
+    }
+
+    /// Pops a segment from the stack; and bulk removes all metadata associated with it.
+    fn pop_segment(&self) -> Result<()> {
+        self.db.with_sqlite_tx(|c| {
+            if let Some(block_hash) = c.prepare_cached("SELECT block_hash FROM sync_data WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
+            .query_row([], |row| row.get::<_,Hash>(0)).optional()? {
+                if let Some(parent_hash) = c.prepare_cached("SELECT parent_hash FROM sync_data WHERE block_hash = ?1")?
+                .query_row([block_hash], |row| row.get(0)).optional()? {
+
+                // update marker
+                c.prepare_cached(
+                    "UPDATE sync_data SET peer = NULL WHERE block_hash = ?1")?
+                    .execute(
+                    [block_hash]
+                )?;
+
+                // remove segment                
+                let mut hashes = Vec::with_capacity(self.max_batch_size);
+                let mut block_hash = parent_hash;
+                while let Some(parent_hash) = c
+                        .prepare_cached("SELECT parent_hash FROM sync_data WHERE block_hash = ?1")?
+                        .query_row([block_hash], |row| row.get::<_, Hash>(0))
+                        .optional()?
+                    {
+                        hashes.push(block_hash);
+                        block_hash = parent_hash;
+                    }
+
+                for hash in hashes {
+                    c.prepare_cached("DELETE FROM sync_data WHERE block_hash = ?1")?
+                    .execute([hash])?;
+                }
+                }
+            }
             Ok(())
         })
     }
 
+    /// Pushes a particular segment into the stack.
     fn push_segment(&self, peer: PeerInfo, meta: ChainMetaData) -> Result<()> {
         self.db.with_sqlite_tx(|c| {
-            c.execute(
-                "UPDATE sync_data SET peer = :peer WHERE block_hash = :block_hash",
+            c.prepare_cached(
+                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number, peer) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :peer)")?
+                .execute(
                 named_params! {
-                    ":peer": peer.peer_id.to_bytes(),
+                    ":parent_hash": meta.parent_hash,
                     ":block_hash": meta.block_hash,
+                    ":block_number": meta.block_number,
+                    ":view_number": meta.view_number,
+                    ":peer": peer.peer_id.to_bytes(),
                 },
             )?;
             Ok(())
         })
     }
 
-    // TODO: Move into db.rs, optimise
-    fn insert_metadata(&mut self, meta: ChainMetaData) -> Result<()> {
+    /// Bulk inserts a bunch of metadata.
+    fn insert_metadata(&self, metas: Vec<ChainMetaData>) -> Result<()> {
         self.db.with_sqlite_tx(|c| {
-            c.execute(
-                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number) VALUES (:parent_hash, :block_hash, :block_number, :view_number)",
+            for meta in metas {
+            c.prepare_cached(
+                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number) VALUES (:parent_hash, :block_hash, :block_number, :view_number)")?
+                .execute(
                 named_params! {
                     ":parent_hash": meta.parent_hash,
                     ":block_hash": meta.block_hash,
@@ -265,14 +313,15 @@ impl Sync {
                     ":view_number": meta.view_number,
                 },
             )?;
+        }
             Ok(())
         })
     }
 
-    // TODO: Move into db.rs, optimise
-    fn remove_metadata(&self, hash: Hash) -> Result<()> {
+    /// Empty the metadata table.
+    fn empty_metadata(&self) -> Result<()> {
         self.db.with_sqlite_tx(|c| {
-            c.execute("DELETE FROM sync_data WHERE block_hash = ?1", [hash])?;
+            c.execute("DELETE FROM sync_data", [])?;
             Ok(())
         })
     }
@@ -336,15 +385,31 @@ impl Sync {
                     tracing::info!(
                         "sync::SyncProposal : finishing {} blocks for segment #{} from {}",
                         self.recent_proposals.len(),
-                        self.chain_segments.len(),
+                        self.count_segments()?,
                         self.peer_id,
                     );
                     // inject the proposals
                     let proposals = self.recent_proposals.drain(..).collect_vec();
                     self.inject_proposals(proposals)?;
                 }
+                self.empty_metadata()?;
                 self.state = SyncState::Phase0;
             }
+            // Retry to fix sync issues e.g. peers that are now offline
+            SyncState::Retry1 if self.in_pipeline == 0 => {
+                self.retry_phase1()?;
+                if self.started_at_block_number == 0 {
+                    let highest_block = self
+                        .db
+                        .get_canonical_block_by_number(
+                            self.db
+                                .get_highest_canonical_block_number()?
+                                .expect("no highest block"),
+                        )?
+                        .expect("missing highest block");
+                    self.started_at_block_number = highest_block.number();
+                }
+            }
             _ => {
                 tracing::debug!(
                     "sync::SyncProposal : syncing {} blocks in pipeline",
@@ -388,8 +453,9 @@ impl Sync {
     ///
     /// Pop the most recently used segment from the segment marker, and retry phase 1.
     /// This will rebuild history from the previous marker, with another peer.
+    /// If this function is called many times, it will eventually restart from Phase 0.
     fn retry_phase1(&mut self) -> Result<()> {
-        if self.chain_segments.is_empty() {
+        if self.count_segments()? == 0 {
             tracing::error!("sync::RetryPhase1 : cannot retry phase 1 without chain segments!");
             self.state = SyncState::Phase0;
             return Ok(());
@@ -397,24 +463,14 @@ impl Sync {
 
         tracing::debug!(
             "sync::RetryPhase1 : retrying segment #{}",
-            self.chain_segments.len(),
+            self.count_segments()?,
         );
 
         // remove the last segment from the chain metadata
-        let (_, meta) = self.chain_segments.pop().unwrap();
-        self.pop_segment(meta.clone())?;
-
-        let mut key = meta.parent_hash;
-        while let Some(p) = self.chain_metadata.remove(&key) {
-            key = p.parent_hash;
-            self.remove_metadata(p.block_hash)?;
-        }
-
-        // retry from Phase 1
+        let (meta, _) = self.last_segment()?.unwrap();
+        self.pop_segment()?;
         self.state = SyncState::Phase1(meta);
-        if Self::DO_SPECULATIVE {
-            self.request_missing_metadata(None)?;
-        }
+
         Ok(())
     }
 
@@ -445,7 +501,8 @@ impl Sync {
             // Empty response, downgrade peer and retry phase 1.
             tracing::warn!("sync::MultiBlockResponse : empty blocks {from}",);
             self.done_with_peer(DownGrade::Empty);
-            return self.retry_phase1();
+            self.state = SyncState::Retry1;
+            return Ok(());
         } else {
             self.done_with_peer(DownGrade::None);
         }
@@ -453,7 +510,7 @@ impl Sync {
         tracing::info!(
             "sync::MultiBlockResponse : received {} blocks for segment #{} from {}",
             response.len(),
-            self.chain_segments.len(),
+            self.count_segments()?,
             from
         );
 
@@ -473,7 +530,8 @@ impl Sync {
             tracing::error!(
                 "sync::MultiBlockResponse : unexpected checksum={check_sum} != {checksum}"
             );
-            return self.retry_phase1();
+            self.state = SyncState::Retry1;
+            return Ok(());
         }
 
         // Response seems sane.
@@ -482,23 +540,11 @@ impl Sync {
             .sorted_by_key(|p| p.number())
             .collect_vec();
 
-        // Remove the blocks from the chain metadata
-        for p in &proposals {
-            if self.chain_metadata.remove(&p.hash()).is_none() {
-                anyhow::bail!("missing chain data for proposal"); // this should never happen!
-            } else {
-                self.remove_metadata(p.hash())?;
-            }
-        }
-
-        // Done with this segment
-        if let Some((_, meta)) = self.chain_segments.pop() {
-            self.pop_segment(meta)?;
-        }
+        self.pop_segment()?;
         self.inject_proposals(proposals)?;
 
         // Done with phase 2
-        if self.chain_segments.is_empty() {
+        if self.count_segments()? == 0 {
             self.state = SyncState::Phase3;
         } else if Self::DO_SPECULATIVE {
             // Speculatively request more blocks
@@ -573,13 +619,8 @@ impl Sync {
             self.peers.push(peer);
 
             // If we have no chain_segments, we have nothing to do
-            if let Some((peer_info, meta)) = self.chain_segments.last() {
-                let mut request_hashes = Vec::with_capacity(self.max_batch_size);
-                let mut key = meta.parent_hash; // start from this block
-                while let Some(meta) = self.chain_metadata.get(&key) {
-                    request_hashes.push(meta.block_hash);
-                    key = meta.parent_hash;
-                }
+            if let Some((meta, peer_info)) = self.last_segment()? {
+                let request_hashes = self.get_segment(meta.parent_hash)?;
 
                 // Checksum of the request hashes
                 let checksum = request_hashes
@@ -594,7 +635,7 @@ impl Sync {
                 tracing::info!(
                     "sync::RequestMissingBlocks : requesting {} blocks of segment #{} from {}",
                     request_hashes.len(),
-                    self.chain_segments.len(),
+                    self.count_segments()?,
                     peer_info.peer_id,
                 );
                 let message = match peer_info.version {
@@ -700,7 +741,7 @@ impl Sync {
                     .proposals
                     .into_iter()
                     // filter any blocks that are not in the chain e.g. forks
-                    .filter(|p| self.chain_metadata.contains_key(&p.hash()))
+                    .filter(|p| self.contains_metadata(&p.hash()).unwrap_or_default())
                     .sorted_by(|a, b| b.number().cmp(&a.number()))
                     .collect_vec();
 
@@ -786,8 +827,7 @@ impl Sync {
         let segment = response;
 
         // Record landmark, including peer that has this set of blocks
-        self.push_segment(segment_peer.clone(), meta.clone())?;
-        self.chain_segments.push((segment_peer, meta.clone()));
+        self.push_segment(segment_peer, meta.clone())?;
 
         // Record the oldest block in the chain's parent
         self.state = SyncState::Phase1(segment.last().cloned().unwrap());
@@ -796,22 +836,12 @@ impl Sync {
         tracing::info!(
             "sync::MetadataResponse : received {} metadata segment #{} from {}",
             segment.len(),
-            self.chain_segments.len(),
+            self.count_segments()?,
             from
         );
 
-        // Record the constructed chain metadata, check for loops
-        for meta in segment {
-            if self
-                .chain_metadata
-                .insert(meta.block_hash, meta.clone())
-                .is_some()
-            {
-                anyhow::bail!("sync::MetadataResponse : loop in chain!"); // there is a possible loop in the chain
-            } else {
-                self.insert_metadata(meta)?;
-            }
-        }
+        // Record the constructed chain metadata
+        self.insert_metadata(segment)?;
 
         // If the segment hits our history, start Phase 2.
         if self.db.get_block_by_hash(&last_block_hash)?.is_some() {
@@ -900,7 +930,7 @@ impl Sync {
             tracing::info!(
                 "sync::RequestMissingMetadata : requesting {} metadata of segment #{} from {}",
                 self.max_batch_size,
-                self.chain_segments.len() + 1,
+                self.count_segments()? + 1,
                 peer.peer_id
             );
             let message = match self.state {
@@ -1007,12 +1037,12 @@ impl Sync {
                 prop.from
             );
         }
-        if let Some(p) = self.chain_metadata.remove(&prop.block.hash()) {
-            tracing::warn!(
-                "sync::MarkReceivedProposal : removing stale metadata {}",
-                p.block_hash
-            );
-        }
+        // if let Some(p) = self.chain_metadata.remove(&prop.block.hash()) {
+        //     tracing::warn!(
+        //         "sync::MarkReceivedProposal : removing stale metadata {}",
+        //         p.block_hash
+        //     );
+        // }
         self.in_pipeline = self.in_pipeline.saturating_sub(1);
         Ok(())
     }
@@ -1078,9 +1108,9 @@ impl Sync {
             )?
             .expect("missing highest block");
         Ok((
-            !self.recent_proposals.is_empty()
-                || !self.chain_metadata.is_empty()
-                || !self.chain_segments.is_empty(),
+            self.in_pipeline != 0
+                || !self.recent_proposals.is_empty()
+                || self.count_segments()? != 0,
             highest_block,
         ))
     }
@@ -1142,6 +1172,7 @@ enum SyncState {
     Phase1(ChainMetaData),
     Phase2(Hash),
     Phase3,
+    Retry1,
 }
 
 /// Peer Version

From d95dd984ea67d459523170f1ca40e3e72dd2ac26 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 13 Jan 2025 18:27:07 +0800
Subject: [PATCH 065/119] feat: added PeerVer info to DB.

---
 zilliqa/src/sync.rs | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 149f44e57..37d1ac287 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -9,7 +9,11 @@ use alloy::primitives::BlockNumber;
 use anyhow::Result;
 use itertools::Itertools;
 use libp2p::PeerId;
-use rusqlite::{named_params, OptionalExtension};
+use rusqlite::{
+    named_params,
+    types::{FromSql, FromSqlResult, ToSql, ToSqlOutput, ValueRef},
+    OptionalExtension,
+};
 
 use crate::{
     cfg::NodeConfig,
@@ -132,7 +136,8 @@ impl Sync {
                 parent_hash BLOB NOT NULL,
                 block_number INTEGER NOT NULL PRIMARY KEY,
                 view_number INTEGER NOT NULL,
-                peer BLOB DEFAULT NULL
+                peer BLOB DEFAULT NULL,
+                version INTEGER DEFAULT 0
             );
             CREATE INDEX IF NOT EXISTS idx_sync_data ON sync_data(block_number) WHERE peer IS NOT NULL;",
             )?;
@@ -223,7 +228,7 @@ impl Sync {
         let mut result = None;
         self.db.with_sqlite_tx(|c| {
             result = c
-                .prepare_cached("SELECT parent_hash, block_hash, block_number, view_number, peer FROM sync_data WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
+                .prepare_cached("SELECT parent_hash, block_hash, block_number, view_number, peer, version FROM sync_data WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
                 .query_row([], |row| Ok((
                     ChainMetaData{
                     parent_hash: row.get(0)?,
@@ -234,7 +239,7 @@ impl Sync {
                 PeerInfo {
                     last_used: Instant::now(),
                     score:u32::MAX,
-                    version: PeerVer::V1,
+                    version: row.get(5)?,
                     peer_id: PeerId::from_bytes(row.get::<_,Vec<u8>>(4)?.as_slice()).unwrap(),
                 },
             )))
@@ -285,7 +290,7 @@ impl Sync {
     fn push_segment(&self, peer: PeerInfo, meta: ChainMetaData) -> Result<()> {
         self.db.with_sqlite_tx(|c| {
             c.prepare_cached(
-                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number, peer) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :peer)")?
+                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number, peer, version) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :peer, :version)")?
                 .execute(
                 named_params! {
                     ":parent_hash": meta.parent_hash,
@@ -293,6 +298,7 @@ impl Sync {
                     ":block_number": meta.block_number,
                     ":view_number": meta.view_number,
                     ":peer": peer.peer_id.to_bytes(),
+                    ":version": peer.version,
                 },
             )?;
             Ok(())
@@ -1178,6 +1184,22 @@ enum SyncState {
 /// Peer Version
 #[derive(Debug, Clone, Eq, PartialEq)]
 enum PeerVer {
-    V1,
-    V2,
+    V1 = 1,
+    V2 = 2,
+}
+
+impl FromSql for PeerVer {
+    fn column_result(value: ValueRef) -> FromSqlResult<Self> {
+        u32::column_result(value).map(|i| match i {
+            1 => PeerVer::V1,
+            2 => PeerVer::V2,
+            _ => todo!("invalid version"),
+        })
+    }
+}
+
+impl ToSql for PeerVer {
+    fn to_sql(&self) -> Result<ToSqlOutput, rusqlite::Error> {
+        Ok((self.clone() as u32).into())
+    }
 }

From f6b40958de54e4b02e5ea22b3ccddcc30d3ca8ed Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 14 Jan 2025 16:44:58 +0800
Subject: [PATCH 066/119] chore: post-rebase.

---
 zilliqa/src/block_store.rs | 2 +-
 zilliqa/src/message.rs     | 2 +-
 zilliqa/src/sync.rs        | 5 +----
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/zilliqa/src/block_store.rs b/zilliqa/src/block_store.rs
index e17b60e96..0cf063564 100644
--- a/zilliqa/src/block_store.rs
+++ b/zilliqa/src/block_store.rs
@@ -19,7 +19,7 @@ use crate::{
     constants,
     crypto::Hash,
     db::Db,
-    message::{Block, BlockRequest, BlockStrategy, ExternalMessage, Proposal},
+    message::{Block, BlockStrategy, Proposal},
     node::{MessageSender, OutgoingMessageFailure, RequestId},
     range_map::RangeMap,
     time::SystemTime,
diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index c9a758a31..690ad67c6 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -209,7 +209,7 @@ pub struct BlockRequest {
     pub to_view: u64,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Clone, Serialize, Deserialize)]
 pub struct BlockResponse {
     pub proposals: Vec<Proposal>,
     pub from_view: u64,
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 37d1ac287..6b0348dfa 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -19,10 +19,7 @@ use crate::{
     cfg::NodeConfig,
     crypto::Hash,
     db::Db,
-    message::{
-        Block, BlockRequest, BlockRequestV2, BlockResponse, ChainMetaData, ExternalMessage,
-        InjectedProposal, Proposal,
-    },
+    message::{Block, BlockRequest, BlockRequestV2, BlockResponse, ChainMetaData, ExternalMessage, InjectedProposal, Proposal},
     node::MessageSender,
     time::SystemTime,
 };

From 8aae098b1239cad3abc36113188c6116e1899484 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 16 Jan 2025 10:57:05 +0800
Subject: [PATCH 067/119] feat: removed block_store.rs

---
 z2/src/converter.rs        |   22 +-
 zilliqa/src/api/zilliqa.rs |   14 +-
 zilliqa/src/block_store.rs | 1023 ------------------------------------
 zilliqa/src/consensus.rs   |  201 ++-----
 zilliqa/src/db.rs          |    2 +-
 zilliqa/src/exec.rs        |    9 +-
 zilliqa/src/lib.rs         |    1 -
 zilliqa/src/node.rs        |   20 -
 zilliqa/src/pool.rs        |   18 +-
 zilliqa/src/state.rs       |   49 +-
 zilliqa/src/sync.rs        |   42 +-
 11 files changed, 99 insertions(+), 1302 deletions(-)
 delete mode 100644 zilliqa/src/block_store.rs

diff --git a/z2/src/converter.rs b/z2/src/converter.rs
index 251a0a324..dbb1445d1 100644
--- a/z2/src/converter.rs
+++ b/z2/src/converter.rs
@@ -14,18 +14,14 @@ use bitvec::{bitarr, order::Msb0};
 use eth_trie::{EthTrie, MemoryDB, Trie};
 use indicatif::{ProgressBar, ProgressFinish, ProgressIterator, ProgressStyle};
 use itertools::Itertools;
-use libp2p::PeerId;
 use sha2::{Digest, Sha256};
-use tokio::sync::mpsc;
 use tracing::{debug, trace, warn};
 use zilliqa::{
-    block_store::BlockStore,
     cfg::{scilla_ext_libs_path_default, Amount, Config, NodeConfig},
     crypto::{Hash, SecretKey},
     db::Db,
     exec::store_external_libraries,
     message::{Block, QuorumCertificate, Vote, MAX_COMMITTEE_SIZE},
-    node::{MessageSender, RequestId},
     schnorr,
     scilla::{storage_key, CheckOutput, ParamValue, Transition},
     state::{Account, Code, ContractInit, State},
@@ -346,27 +342,15 @@ pub async fn convert_persistence(
         "{msg} {wide_bar} [{per_sec}] {human_pos}/~{human_len} ({elapsed}/~{duration})",
     )?;
 
-    let (outbound_message_sender, _a) = mpsc::unbounded_channel();
-    let (local_message_sender, _b) = mpsc::unbounded_channel();
-    let message_sender = MessageSender {
-        our_shard: 0,
-        our_peer_id: PeerId::random(),
-        outbound_channel: outbound_message_sender,
-        local_channel: local_message_sender,
-        request_id: RequestId::default(),
-    };
+    // let (outbound_message_sender, _a) = mpsc::unbounded_channel();
+    // let (local_message_sender, _b) = mpsc::unbounded_channel();
 
     let zq2_db = Arc::new(zq2_db);
     let node_config = &zq2_config.nodes[0];
-    let block_store = Arc::new(BlockStore::new(
-        node_config,
-        zq2_db.clone(),
-        message_sender.clone(),
-    )?);
     let mut state = State::new_with_genesis(
         zq2_db.clone().state_trie()?,
         node_config.clone(),
-        block_store,
+        zq2_db.clone(),
     )?;
 
     let mut scilla_docker = run_scilla_docker()?;
diff --git a/zilliqa/src/api/zilliqa.rs b/zilliqa/src/api/zilliqa.rs
index e587597cf..5d0cc44b4 100644
--- a/zilliqa/src/api/zilliqa.rs
+++ b/zilliqa/src/api/zilliqa.rs
@@ -508,7 +508,7 @@ fn get_blockchain_info(_: Params, node: &Arc<Mutex<Node>>) -> Result<BlockchainI
     let num_peers = node.get_peer_num();
     let num_tx_blocks = node.get_chain_tip();
     let num_ds_blocks = (num_tx_blocks / TX_BLOCKS_PER_DS_BLOCK) + 1;
-    let num_transactions = node.consensus.block_store.get_num_transactions()?;
+    let num_transactions = node.consensus.get_num_transactions()?;
     let ds_block_rate = tx_block_rate / TX_BLOCKS_PER_DS_BLOCK as f64;
 
     // num_txns_ds_epoch
@@ -518,7 +518,6 @@ fn get_blockchain_info(_: Params, node: &Arc<Mutex<Node>>) -> Result<BlockchainI
     for i in current_epoch_first..node.get_chain_tip() {
         let block = node
             .consensus
-            .block_store
             .get_canonical_block_by_number(i)?
             .ok_or_else(|| anyhow!("Block not found"))?;
         num_txns_ds_epoch += block.transactions.len();
@@ -527,7 +526,6 @@ fn get_blockchain_info(_: Params, node: &Arc<Mutex<Node>>) -> Result<BlockchainI
     // num_txns_tx_epoch
     let latest_block = node
         .consensus
-        .block_store
         .get_canonical_block_by_number(node.get_chain_tip())?;
     let num_txns_tx_epoch = match latest_block {
         Some(block) => block.transactions.len(),
@@ -1247,11 +1245,7 @@ fn get_recent_transactions(
     let mut txns = Vec::new();
     let mut blocks_searched = 0;
     while block_number > 0 && txns.len() < 100 && blocks_searched < 100 {
-        let block = match node
-            .consensus
-            .block_store
-            .get_canonical_block_by_number(block_number)?
-        {
+        let block = match node.consensus.get_canonical_block_by_number(block_number)? {
             Some(block) => block,
             None => continue,
         };
@@ -1274,7 +1268,7 @@ fn get_recent_transactions(
 // GetNumTransactions
 fn get_num_transactions(_params: Params, node: &Arc<Mutex<Node>>) -> Result<String> {
     let node = node.lock().unwrap();
-    let num_transactions = node.consensus.block_store.get_num_transactions()?;
+    let num_transactions = node.consensus.get_num_transactions()?;
     Ok(num_transactions.to_string())
 }
 
@@ -1283,7 +1277,6 @@ fn get_num_txns_tx_epoch(_params: Params, node: &Arc<Mutex<Node>>) -> Result<Str
     let node = node.lock().unwrap();
     let latest_block = node
         .consensus
-        .block_store
         .get_canonical_block_by_number(node.get_chain_tip())?;
     let num_transactions = match latest_block {
         Some(block) => block.transactions.len(),
@@ -1302,7 +1295,6 @@ fn get_num_txns_ds_epoch(_params: Params, node: &Arc<Mutex<Node>>) -> Result<Str
     for i in current_epoch_first..node.get_chain_tip() {
         let block = node
             .consensus
-            .block_store
             .get_canonical_block_by_number(i)?
             .ok_or_else(|| anyhow!("Block not found"))?;
         num_txns_epoch += block.transactions.len();
diff --git a/zilliqa/src/block_store.rs b/zilliqa/src/block_store.rs
deleted file mode 100644
index 0cf063564..000000000
--- a/zilliqa/src/block_store.rs
+++ /dev/null
@@ -1,1023 +0,0 @@
-use std::{
-    cmp,
-    collections::{BTreeMap, BTreeSet, HashMap, HashSet},
-    num::NonZeroUsize,
-    ops::Range,
-    sync::{Arc, RwLock},
-    time::Duration,
-};
-
-use alloy::primitives::BlockNumber;
-use anyhow::{anyhow, Result};
-use libp2p::PeerId;
-use lru::LruCache;
-use serde::{Deserialize, Serialize};
-use tracing::*;
-
-use crate::{
-    cfg::NodeConfig,
-    constants,
-    crypto::Hash,
-    db::Db,
-    message::{Block, BlockStrategy, Proposal},
-    node::{MessageSender, OutgoingMessageFailure, RequestId},
-    range_map::RangeMap,
-    time::SystemTime,
-};
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct BlockCacheEntry {
-    pub parent_hash: Hash,
-    pub from: PeerId,
-    pub proposal: Proposal,
-}
-
-impl BlockCacheEntry {
-    pub fn new(parent_hash: Hash, from: PeerId, proposal: Proposal) -> Self {
-        Self {
-            parent_hash,
-            from,
-            proposal,
-        }
-    }
-}
-
-/// A block cache.
-/// We need to be careful to conserve block space in the presence of block flooding attacks, and we need to
-/// make sure we don't lose blocks that form part of the main chain repeatedly, else we will never be able
-/// to construct it.
-///
-/// Similarly, we should ensure that we always buffer proposals close to the head of the tree, else we will
-/// lose sync frequently and have to request, which will slow down block production.
-///
-/// An easy way to do this is to put a hash of the node address (actually, we just use the low bits) in the
-/// bottom (log2(N_WAYS)) bits of the view number. We then evict the largest tag le (max_view - buffer).
-///
-/// I don't think it actually matters whether we use the view or the block number here, since we're not using
-/// fixed-size arrays.
-///
-#[derive(Debug, Serialize, Deserialize)]
-pub struct BlockCache {
-    /// Caches proposals that are not yet blocks, and are before the head_cache.
-    pub cache: BTreeMap<u128, BlockCacheEntry>,
-    /// Caches proposals close to the head.
-    /// This buys us out of the situation where we are, say, 2 blocks behind the head.
-    /// We request those blocks, but by the time we get them, a new block is proposed.
-    /// So we're now a block behind. We request it, and then, by the time we get it ...
-    /// and so on. The head_cache caches broadcast proposals at the head of the chain
-    /// so we only need to get to (head_of_chain - head_cache_entries) and we can
-    /// then catch up using the head cache.
-    pub head_cache: BTreeMap<u128, BlockCacheEntry>,
-    /// Caches ranges where we think there is no block at all (just an empty view)
-    pub empty_view_ranges: RangeMap,
-    /// The head cache - this caches
-    /// An index into the cache by parent hash
-    pub by_parent_hash: HashMap<Hash, HashSet<u128>>,
-    /// Set associative shift
-    pub shift: usize,
-    /// This is used to count the number of times we've looked for a fork.
-    /// The counter is zeroed when we receive (or pop) a new block, and counts 1 every
-    /// time we looked.
-    pub fork_counter: usize,
-    /// Copied from the parent to minimise the number of additional parameters we need.
-    pub max_blocks_in_flight: u64,
-    /// These are views which we have removed from the cache to process later. Remember not to re-request them, or
-    /// we will end up asking peers for views which we are about to process.
-    /// We need to remember to clear these flags once we have the proposal, because it might be a lie and we may need
-    /// to rerequest in order to get the right view (there will only ever be one legitimate view with a given number,
-    /// but peers may lie to us about what it is)
-    pub views_expecting_proposals: BTreeSet<u64>,
-}
-
-impl BlockCache {
-    pub fn new(max_blocks_in_flight: u64) -> Self {
-        Self {
-            cache: BTreeMap::new(),
-            head_cache: BTreeMap::new(),
-            empty_view_ranges: RangeMap::new(),
-            by_parent_hash: HashMap::new(),
-            shift: 8 - constants::BLOCK_CACHE_LOG2_WAYS,
-            fork_counter: 0,
-            max_blocks_in_flight,
-            views_expecting_proposals: BTreeSet::new(),
-        }
-    }
-
-    pub fn key_from_view(&self, peer: &PeerId, view_num: u64) -> u128 {
-        let ways = peer.to_bytes().pop().unwrap_or(0x00);
-        u128::from(ways >> self.shift) | (u128::from(view_num) << self.shift)
-    }
-
-    pub fn view_from_key(&self, key: u128) -> u64 {
-        u64::try_from(key >> self.shift).unwrap()
-    }
-
-    pub fn min_key_for_view(&self, view: u64) -> u128 {
-        u128::from(view) << self.shift
-    }
-
-    pub fn expect_process_proposal(&mut self, view: u64) {
-        self.views_expecting_proposals.insert(view);
-    }
-
-    pub fn received_process_proposal(&mut self, view: u64) {
-        self.views_expecting_proposals.remove(&view);
-    }
-
-    /// returns the minimum key (view << shift) that we are prepared to store in the head cache.
-    /// keys smaller than this are stored in the main cache.
-    /// We compute this by subtracting a constant from (highest_known_view +1)<< shift - which is
-    /// the highest key we think could currently exist (highest view we've ever seen +1 shifted up).
-    /// (the constant is preshifted for efficiency)
-    /// This aims to keep the head cache at roughly BLOCK_CACHE_HEAD_BUFFER_ENTRIES entries
-    /// (note that this will be BLOCK_CACHE_HEAD_BUFFER_ENTRIES >> shift cached views, since the
-    /// head cache is set associative)
-    pub fn min_head_cache_key(&self, highest_known_view: u64) -> u128 {
-        let delta = u128::try_from(constants::BLOCK_CACHE_HEAD_BUFFER_ENTRIES).unwrap();
-        let highest_key = u128::from(highest_known_view + 1) << self.shift;
-        highest_key // prevent underflowing for low views
-            .saturating_sub(delta)
-    }
-
-    pub fn destructive_proposals_from_parent_hashes(
-        &mut self,
-        hashes: &[Hash],
-    ) -> Vec<(PeerId, Proposal)> {
-        // For each hash, find the list of blocks that have it as the parent.
-        let cache_keys = hashes
-            .iter()
-            .filter_map(|x| self.by_parent_hash.remove(x))
-            .flatten()
-            .collect::<Vec<u128>>();
-        let maybe = cache_keys
-            .iter()
-            .filter_map(|key| {
-                self.cache
-                    .remove(key)
-                    .or_else(|| self.head_cache.remove(key))
-                    .map(|entry| (entry.from, entry.proposal))
-            })
-            .collect::<Vec<(PeerId, Proposal)>>();
-        if !cache_keys.is_empty() {
-            let max_view =
-                self.view_from_key(cache_keys.iter().fold(0, |v1, v2| std::cmp::max(v1, *v2)));
-            // Ignore any gaps up to this point, because they may be lies.
-            (_, self.empty_view_ranges) =
-                self.empty_view_ranges
-                    .diff_inter(&RangeMap::from_range(&Range {
-                        start: 0,
-                        end: max_view + 1,
-                    }));
-            // We got a real block! Reset the fork counter.
-            self.fork_counter = 0;
-        }
-        maybe
-    }
-
-    /// Delete all blocks in the cache up to and including block_number
-    pub fn delete_blocks_up_to(&mut self, block_number: u64) {
-        // note that this code embodies the assumption that increasing block number implies
-        // increasing view number.
-        self.trim_with_fn(|_, v| -> bool { v.proposal.number() <= block_number });
-    }
-
-    pub fn trim(&mut self, highest_confirmed_view: u64) {
-        let lowest_ignored_key = self.min_key_for_view(highest_confirmed_view);
-        debug!("trim: lowest_ignored_key = {0}", lowest_ignored_key);
-        self.trim_with_fn(|k, _| -> bool { *k < lowest_ignored_key });
-        // We don't care about anything lower than what we're about to flush
-        self.views_expecting_proposals = self
-            .views_expecting_proposals
-            .split_off(&highest_confirmed_view);
-    }
-
-    /// DANGER WILL ROBINSON! This function only searches from the minimum key to the maximum, so
-    /// any selector function which is not monotonic in key will not work properly.
-    fn trim_with_fn<F: Fn(&u128, &BlockCacheEntry) -> bool>(&mut self, selector: F) {
-        // We've deleted or replaced this key with this parent hash; remove it from the index.
-        fn unlink_parent_hash(cache: &mut HashMap<Hash, HashSet<u128>>, key: &u128, hash: &Hash) {
-            let mut do_remove = false;
-            if let Some(val) = cache.get_mut(hash) {
-                val.remove(key);
-                if val.is_empty() {
-                    do_remove = true
-                }
-            }
-            if do_remove {
-                cache.remove(hash);
-            }
-        }
-
-        let cache_entries = self.max_blocks_in_flight << constants::BLOCK_CACHE_LOG2_WAYS;
-        // debug!("trim: cache had: {0}", self.extant_block_ranges()?);
-        // Should really be an option, but given that there is a convenient sentinel..
-        let mut lowest_view_in_cache: Option<u64> = None;
-        let shift = self.shift;
-
-        for cache_ptr in [&mut self.cache, &mut self.head_cache] {
-            while let Some((k, v)) = cache_ptr.first_key_value() {
-                if selector(k, v) {
-                    // Kill it!
-                    if let Some((k, v)) = cache_ptr.pop_first() {
-                        unlink_parent_hash(&mut self.by_parent_hash, &k, &v.parent_hash);
-                    };
-                } else {
-                    let view_number = u64::try_from(*k >> shift).unwrap();
-                    lowest_view_in_cache = Some(
-                        lowest_view_in_cache.map_or(view_number, |x| std::cmp::min(x, view_number)),
-                    );
-                    break;
-                }
-            }
-        }
-
-        // Empty view ranges below the thing we last trimmed might not exist - zap them.
-        if let Some(v) = lowest_view_in_cache {
-            (_, self.empty_view_ranges) =
-                self.empty_view_ranges
-                    .diff_inter(&RangeMap::from_range(&Range {
-                        start: 0,
-                        end: v + 1,
-                    }));
-        }
-        // And trim.
-        let cache_size = usize::try_from(cache_entries).unwrap();
-        self.empty_view_ranges.truncate(cache_size);
-
-        while self.head_cache.len() > constants::BLOCK_CACHE_HEAD_BUFFER_ENTRIES {
-            if let Some((k, v)) = self.head_cache.pop_first() {
-                // Push this into the main cache, otherwise we will get into the state where
-                // blocks are removed from the head cache and lost and we are constantly
-                // requesting blocks to replace them.
-                self.cache.insert(k, v);
-            }
-        }
-        while self.cache.len() > cache_size {
-            if let Some((k, v)) = self.cache.pop_last() {
-                unlink_parent_hash(&mut self.by_parent_hash, &k, &v.parent_hash);
-            }
-        }
-        // Both caches are now at most the "right" number of entries long.
-    }
-
-    pub fn no_blocks_at(&mut self, no_blocks_in: &Range<u64>) {
-        self.empty_view_ranges.with_range(no_blocks_in);
-    }
-
-    pub fn delete_empty_view_range_cache(&mut self) {
-        self.empty_view_ranges = RangeMap::new();
-    }
-
-    /// Insert this proposal into the cache.
-    pub fn insert(
-        &mut self,
-        from: &PeerId,
-        parent_hash: &Hash,
-        proposal: Proposal,
-        highest_confirmed_view: u64,
-        highest_known_view: u64,
-    ) -> Result<()> {
-        fn insert_with_replacement(
-            into: &mut BTreeMap<u128, BlockCacheEntry>,
-            by_parent_hash: &mut HashMap<Hash, HashSet<u128>>,
-            from: &PeerId,
-            parent_hash: &Hash,
-            key: u128,
-            value: Proposal,
-        ) {
-            into.insert(key, BlockCacheEntry::new(*parent_hash, *from, value))
-                .map(|entry| {
-                    by_parent_hash
-                        .get_mut(&entry.parent_hash)
-                        .map(|x| x.remove(&key))
-                });
-            if let Some(v) = by_parent_hash.get_mut(parent_hash) {
-                v.insert(key);
-            } else {
-                let mut new_set = HashSet::new();
-                new_set.insert(key);
-                by_parent_hash.insert(*parent_hash, new_set);
-            }
-        }
-
-        if proposal.header.view <= highest_confirmed_view {
-            // nothing to do.
-            return Ok(());
-        }
-        // First, insert us.
-        let key = self.key_from_view(from, proposal.header.view);
-        if key > self.min_head_cache_key(highest_known_view) {
-            insert_with_replacement(
-                &mut self.head_cache,
-                &mut self.by_parent_hash,
-                from,
-                parent_hash,
-                key,
-                proposal,
-            );
-        } else {
-            insert_with_replacement(
-                &mut self.cache,
-                &mut self.by_parent_hash,
-                from,
-                parent_hash,
-                key,
-                proposal,
-            );
-        }
-        // Zero the fork counter.
-        self.fork_counter = 0;
-        // Now evict the worst entry
-        self.trim(highest_confirmed_view);
-        Ok(())
-    }
-
-    pub fn inc_fork_counter(&mut self) -> usize {
-        self.fork_counter += 1;
-        self.fork_counter
-    }
-
-    pub fn reset_fork_counter(&mut self) {
-        self.fork_counter = 0;
-    }
-
-    // For debugging - what view number ranges are in the cache?
-    pub fn extant_block_ranges(&self) -> RangeMap {
-        let mut result = RangeMap::new();
-        let shift = 8 - constants::BLOCK_CACHE_LOG2_WAYS;
-        for key in self.cache.keys() {
-            let _ = u128::try_into(key >> shift).map(|x| result.with_elem(x));
-        }
-        for key in self.head_cache.keys() {
-            let _ = u128::try_into(key >> shift).map(|x| result.with_elem(x));
-        }
-        result
-    }
-
-    pub fn expectant_block_ranges(&self) -> RangeMap {
-        let mut result = RangeMap::new();
-        self.views_expecting_proposals.iter().for_each(|v| {
-            result.with_elem(*v);
-        });
-        result
-    }
-}
-
-/// Stores and manages the node's list of blocks. Also responsible for making requests for new blocks.
-///
-/// # Syncing Algorithm
-///
-/// We rely on [crate::consensus::Consensus] informing us of newly received block proposals via:
-/// * [BlockStore::process_block] for blocks that can be part of our chain, because we already have their parent.
-/// * [BlockStore::buffer_proposal] for blocks that can't (yet) be part of our chain.
-///
-/// Both these code paths also call [BlockStore::request_missing_blocks]. This finds the greatest view of any proposal
-/// we've seen (whether its part of our chain or not).
-///
-///
-/// TODO(#1096): Retries for blocks we request but never receive.
-#[derive(Debug)]
-pub struct BlockStore {
-    db: Arc<Db>,
-    block_cache: Arc<RwLock<LruCache<Hash, Block>>>,
-    /// The maximum view of any proposal we have received, even if it is not part of our chain yet.
-    highest_known_view: u64,
-    /// Highest confirmed view - blocks we know to be correct.
-    highest_confirmed_view: u64,
-    /// Information we keep about our peers' state.
-    peers: BTreeMap<PeerId, PeerInfo>,
-    /// The maximum number of blocks to send requests for at a time.
-    max_blocks_in_flight: u64,
-    /// Our block strategies.
-    strategies: Vec<BlockStrategy>,
-    /// The block views we have available. This is read once from the DB at start-up and incrementally updated whenever
-    /// we receive a new block. We do this because obtaining the data from the DB is expensive.
-    available_blocks: RangeMap,
-
-    /// Buffered block proposals.
-    buffered: BlockCache,
-    /// Requests we would like to send, but haven't been able to (e.g. because we have no peers).
-    unserviceable_requests: Option<RangeMap>,
-    message_sender: MessageSender,
-
-    /// Where we last started syncing, so we can report it in get_sync_data()
-    started_syncing_at: BlockNumber,
-    /// Previous sync flag, so we can tell when it changes.
-    last_sync_flag: bool,
-}
-
-/// Data about block availability sent between peers
-#[derive(Clone, Debug, Serialize, Deserialize)]
-struct BlockAvailability {
-    /// None means no information, Some([]) means the other node shouldn't be relied upon for any blocks at all.
-    strategies: Option<Vec<BlockStrategy>>,
-    /// The largest view we've seen from a block that this peer sent us.
-    highest_known_view: u64,
-}
-
-#[derive(Clone, Debug)]
-struct PeerInfo {
-    /// Availability from this peer
-    availability: BlockAvailability,
-    /// When did we last update availability?
-    availability_updated_at: Option<SystemTime>,
-    /// Requests we've sent to the peer.
-    pending_requests: HashMap<RequestId, (SystemTime, u64, u64)>,
-    /// If `Some`, the time of the most recently failed request.
-    last_request_failed_at: Option<SystemTime>,
-}
-
-impl PeerInfo {
-    fn new() -> Self {
-        Self {
-            availability: BlockAvailability::new(),
-            availability_updated_at: None,
-            pending_requests: HashMap::new(),
-            last_request_failed_at: None,
-        }
-    }
-}
-
-/// Data about a peer
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct PeerInfoStatus {
-    availability: BlockAvailability,
-    availability_updated_at: Option<u64>,
-    pending_requests: Vec<(String, SystemTime, u64, u64)>,
-    last_request_failed_at: Option<u64>,
-}
-
-/// Data about the block store, used for debugging.
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct BlockStoreStatus {
-    highest_known_view: u64,
-    views_held: Vec<Range<u64>>,
-    peers: Vec<(String, PeerInfoStatus)>,
-    availability: Option<Vec<BlockStrategy>>,
-}
-
-impl BlockStoreStatus {
-    pub fn new(block_store: &mut BlockStore) -> Result<Self> {
-        let peers = block_store
-            .peers
-            .iter()
-            .map(|(k, v)| (format!("{:?}", k), PeerInfoStatus::new(v)))
-            .collect::<Vec<_>>();
-        Ok(Self {
-            highest_known_view: block_store.highest_known_view,
-            views_held: block_store.db.get_view_ranges()?,
-            peers,
-            availability: block_store.availability()?,
-        })
-    }
-}
-
-impl PeerInfoStatus {
-    // Annoyingly, this can't (easily) be allowed to fail without making generating debug info hard.
-    fn new(info: &PeerInfo) -> Self {
-        fn s_from_time(q: Option<SystemTime>) -> Option<u64> {
-            q.map(|z| {
-                z.duration_since(SystemTime::UNIX_EPOCH)
-                    .unwrap_or(Duration::ZERO)
-                    .as_secs()
-            })
-        }
-        let pending_requests = info
-            .pending_requests
-            .iter()
-            .map(|(k, v)| (format!("{:?}", k), v.0, v.1, v.2))
-            .collect::<Vec<_>>();
-        Self {
-            availability: info.availability.clone(),
-            availability_updated_at: s_from_time(info.availability_updated_at),
-            pending_requests,
-            last_request_failed_at: s_from_time(info.last_request_failed_at),
-        }
-    }
-}
-
-impl BlockAvailability {
-    pub fn new() -> Self {
-        Self {
-            strategies: None,
-            highest_known_view: 0,
-        }
-    }
-}
-
-impl BlockStore {
-    pub fn new(config: &NodeConfig, db: Arc<Db>, message_sender: MessageSender) -> Result<Self> {
-        let available_blocks =
-            db.get_view_ranges()?
-                .iter()
-                .fold(RangeMap::new(), |mut range_map, range| {
-                    range_map.with_range(range);
-                    range_map
-                });
-        Ok(BlockStore {
-            db,
-            block_cache: Arc::new(RwLock::new(LruCache::new(NonZeroUsize::new(5).unwrap()))),
-            highest_known_view: 0,
-            highest_confirmed_view: 0,
-            peers: BTreeMap::new(),
-            max_blocks_in_flight: config.max_blocks_in_flight as u64,
-            strategies: vec![BlockStrategy::Latest(constants::RETAINS_LAST_N_BLOCKS)],
-            available_blocks,
-            buffered: BlockCache::new(config.max_blocks_in_flight as u64),
-            unserviceable_requests: None,
-            message_sender,
-            started_syncing_at: 0,
-            last_sync_flag: false,
-        })
-    }
-
-    /// The data set here is held in memory. It can be useful to update manually
-    /// For example after a restart to remind block_store of its peers and height
-    pub fn set_peers_and_view(
-        &mut self,
-        highest_known_view: u64,
-        peer_ids: &Vec<PeerId>,
-    ) -> Result<()> {
-        for peer_id in peer_ids {
-            self.peer_info(*peer_id);
-        }
-        self.highest_known_view = highest_known_view;
-        Ok(())
-    }
-
-    /// Create a read-only clone of this [BlockStore]. The read-only property must be upheld by the caller - Calling
-    /// any `&mut self` methods on the returned [BlockStore] will lead to problems. This clone is cheap.
-    pub fn clone_read_only(&self) -> Arc<Self> {
-        Arc::new(BlockStore {
-            db: self.db.clone(),
-            block_cache: self.block_cache.clone(),
-            highest_known_view: 0,
-            highest_confirmed_view: 0,
-            peers: BTreeMap::new(),
-            max_blocks_in_flight: 0,
-            strategies: self.strategies.clone(),
-            available_blocks: RangeMap::new(),
-            buffered: BlockCache::new(0),
-            unserviceable_requests: None,
-            message_sender: self.message_sender.clone(),
-            started_syncing_at: self.started_syncing_at,
-            last_sync_flag: self.last_sync_flag,
-        })
-    }
-
-    /// Update someone else's availability
-    pub fn update_availability(
-        &mut self,
-        from: PeerId,
-        avail: &Option<Vec<BlockStrategy>>,
-    ) -> Result<()> {
-        let the_peer = self.peer_info(from);
-        the_peer.availability.strategies.clone_from(avail);
-        the_peer.availability_updated_at = Some(SystemTime::now());
-        Ok(())
-    }
-
-    /// Retrieve our availability.
-    /// We need to do this by view range, which means that we need to account for views where there was no block.
-    /// So, the underlying db function finds the view lower and upper bounds of our contiguous block ranges and we
-    /// advertise those.
-    pub fn availability(&self) -> Result<Option<Vec<BlockStrategy>>> {
-        let mut to_return = self.strategies.clone();
-        to_return.extend(
-            self.available_blocks
-                .ranges
-                .iter()
-                .map(|range| BlockStrategy::CachedViewRange(range.clone(), None)),
-        );
-        Ok(Some(to_return))
-    }
-
-    /// Buffer a block proposal whose parent we don't yet know about.
-    pub fn buffer_proposal(&mut self, from: PeerId, proposal: Proposal) -> Result<()> {
-        let view = proposal.view();
-
-        // If this is the highest block we've seen, remember its view.
-        if view > self.highest_known_view {
-            trace!(view, "block_store:: new highest known view");
-            self.highest_known_view = view;
-        }
-
-        trace!(
-            "block_store:: buffer_proposal {view}, hc {0}",
-            self.highest_confirmed_view
-        );
-        self.buffered.insert(
-            &from,
-            &proposal.header.qc.block_hash.clone(),
-            proposal,
-            self.highest_confirmed_view,
-            self.highest_known_view,
-        )?;
-
-        let peer = self.peer_info(from);
-        if view > peer.availability.highest_known_view {
-            trace!(%from, view, "block_store:: new highest known view for peer");
-            peer.availability.highest_known_view = view;
-        }
-
-        Ok(())
-    }
-
-    /// This function:
-    ///
-    /// * Looks through the blocks we have
-    /// * Finds the next blocks it thinks we need
-    /// * Iterates through our known peers.
-    ///
-    /// If we don't have availability for a peer, we will request it by
-    /// sending an empty block request.
-    ///
-    /// If we do, we will try to request whatever blocks it has that we want.
-    ///
-    /// We limit the number of outstanding requests per peer, in order to
-    /// avoid bufferbloat at the peer's input message queue.
-    ///
-    /// We don't ask for blocks that we think are in flight (ie. we've
-    /// requested them but they have not yet arrived), those we don't think a
-    /// peer has, or those we think are gaps (remember that requests are made
-    /// by view, so you can't guarantee that every view has a block).
-    ///
-    /// We time out outstanding requests on a flat-timeout basis (our model
-    /// being that if you haven't replied by now, the whole message has
-    /// probably been lost).
-    /// Returns whether this function thinks we are syncing or not.
-    pub fn request_missing_blocks(&mut self) -> Result<bool> {
-        // Get the highest view we currently have committed to our chain.
-        // This is a bit horrid - it can go down as well as up, because we can roll back blocks
-        // when we discover that they are ahead of what we think the rest of the chain
-        // has committed to - if we don't roll back here, we won't then fetch the canonical
-        // versions of those blocks (thinking we already have them).
-        let (syncing, current_block) = self.am_syncing()?;
-        self.highest_confirmed_view = current_block.view();
-        let current_view = current_block.view();
-        trace!(
-            "block_store::request_missing_blocks() : set highest_confirmed_view {0} (current = {1})",
-            self.highest_confirmed_view,
-            current_view,
-        );
-
-        // First off, let's load up the unserviceable requests.
-        let mut to_request = if let Some(us_requests) = self.unserviceable_requests.take() {
-            us_requests
-        } else {
-            RangeMap::new()
-        };
-
-        // If we think the network might be ahead of where we currently are, attempt to download the missing blocks.
-        // This is complicated, because we mustn't request more blocks than will fit in our cache, or we might
-        // end up evicting the critical part of the chain..
-        // @todo I can't think of a more elegant way than this, but it's horrid - we want to exclude views which
-        // we might still be voting on.
-        if syncing {
-            trace!(
-                current_view,
-                self.highest_known_view,
-                self.max_blocks_in_flight,
-                "block_store::request_missing_blocks() : missing some blocks"
-            );
-            {
-                // We need to request from current_view, because these blocks might never be returned by our peers
-                // deduplication of requests is done one level lower - in request_blocks().
-                let from = current_view + 1;
-                // Never request more than current_view + max_blocks_in_flight, or the cache won't be able to hold
-                // the responses and we'll end up being unable to reconstruct the chain. Not strictly true, because
-                // the network will hold some blocks for us, but true enough that I think we ought to treat it as
-                // such.
-                let to = cmp::min(
-                    current_view + self.max_blocks_in_flight,
-                    self.highest_known_view,
-                );
-                trace!("block_store::request_missing_blocks() : requesting blocks {from} to {to}");
-                to_request.with_range(&Range {
-                    start: from,
-                    end: to + 1,
-                });
-            }
-            if !to_request.is_empty() {
-                self.request_blocks(&to_request)?;
-            }
-        } else {
-            // We're synced - no need to try and guess forks.
-            trace!(
-                "block_store::request_missing_blocks() : synced with highest_known_view {0}, current_view {1}",
-                self.highest_known_view,
-                current_view
-            );
-            self.buffered.reset_fork_counter();
-        }
-
-        if syncing && !self.last_sync_flag {
-            // We didn't used to be syncing; remember when we started.
-            self.started_syncing_at = current_block.number();
-        }
-        self.last_sync_flag = syncing;
-
-        Ok(syncing)
-    }
-
-    pub fn prune_pending_requests(&mut self) -> Result<()> {
-        // In the good old days, we could've done this by linear interpolation on the timestamp.
-        let current_time = SystemTime::now();
-        for peer in self.peers.keys().cloned().collect::<Vec<PeerId>>() {
-            let the_peer = self.peer_info(peer);
-            the_peer.pending_requests = the_peer
-                .pending_requests
-                .iter()
-                .filter_map(|(k, (v1, v2, v3))| {
-                    // How long since this request was sent?
-                    match current_time.duration_since(*v1) {
-                        Ok(since) => {
-                            if since > constants::BLOCK_REQUEST_RESPONSE_TIMEOUT {
-                                // Time out everything.
-                                trace!("block_store::prune_pending_requests: timing out pending request {k:?} {v1:?} {v2} {v3}");
-                                None
-                            } else {
-                                Some((*k, (*v1, *v2, *v3)))
-                            }
-                        }
-                        _ => None,
-                    }
-                })
-                .collect();
-        }
-        Ok(())
-    }
-
-    pub fn retry_us_requests(&mut self) -> Result<()> {
-        if let Some(us_requests) = self.unserviceable_requests.take() {
-            self.request_blocks(&us_requests)?;
-        }
-        Ok(())
-    }
-
-    /// Make a request for the blocks associated with a range of views. Returns `true` if a request was made and `false` if the request had to be
-    /// buffered because no peers were available.
-    /// Public so we can trigger it from the debug API
-    pub fn request_blocks(&mut self, _req: &RangeMap) -> Result<bool> {
-        Ok(false) // FIXME: Stub
-    }
-
-    pub fn get_block(&self, hash: Hash) -> Result<Option<Block>> {
-        let mut block_cache = self
-            .block_cache
-            .write()
-            .map_err(|e| anyhow!("Failed to get write access to block cache: {e}"))?;
-        if let Some(block) = block_cache.get(&hash) {
-            return Ok(Some(block.clone()));
-        }
-        let Some(block) = self.db.get_block_by_hash(&hash)? else {
-            return Ok(None);
-        };
-        block_cache.put(hash, block.clone());
-        Ok(Some(block))
-    }
-
-    pub fn get_block_by_view(&self, view: u64) -> Result<Option<Block>> {
-        let Some(hash) = self.db.get_block_hash_by_view(view)? else {
-            return Ok(None);
-        };
-        self.get_block(hash)
-    }
-
-    pub fn get_highest_canonical_block_number(&self) -> Result<Option<u64>> {
-        self.db.get_highest_canonical_block_number()
-    }
-
-    pub fn get_canonical_block_by_number(&self, number: u64) -> Result<Option<Block>> {
-        self.db.get_canonical_block_by_number(number)
-    }
-
-    /// Called to process a block which can be added to the chain.
-    ///   - insert the block into any necessary databases
-    ///   - update the highest known and confirmed views, if necessary,
-    ///   - Return a list of proposals that can now be made part of the chain, removing
-    ///     them from the cache to free up space as we do so.
-    ///
-    /// The caller should arrange to process the returned list asynchronously to avoid
-    /// blocking message processing for too long.
-    pub fn process_block(
-        &mut self,
-        from: Option<PeerId>,
-        block: Block,
-    ) -> Result<Vec<(PeerId, Proposal)>> {
-        trace!(?from, number = block.number(), hash = ?block.hash(), "block_store::process_block() : starting");
-        self.db.insert_block(&block)?;
-        self.available_blocks.with_elem(block.view());
-
-        if let Some(from) = from {
-            let peer = self.peer_info(from);
-            if block.view() > peer.availability.highest_known_view {
-                trace!(%from, view = block.view(), "new highest known view for peer");
-                peer.availability.highest_known_view = block.view();
-            }
-        }
-
-        // There are two sets
-        let result = self
-            .buffered
-            .destructive_proposals_from_parent_hashes(&[block.hash()]);
-
-        // Update highest_confirmed_view, but don't trim the cache if
-        // we're not changing anything.
-        if block.header.view > self.highest_confirmed_view {
-            self.highest_confirmed_view = block.header.view;
-            self.buffered.trim(self.highest_confirmed_view);
-        }
-
-        Ok(result)
-    }
-
-    pub fn report_outgoing_message_failure(
-        &mut self,
-        failure: OutgoingMessageFailure,
-    ) -> Result<()> {
-        let peer_info = self.peer_info(failure.peer);
-        let Some((_, from, to)) = peer_info.pending_requests.remove(&failure.request_id) else {
-            // A request we didn't know about failed. It must have been sent by someone else.
-            return Ok(());
-        };
-        peer_info.last_request_failed_at = Some(SystemTime::now());
-
-        trace!("block_store : outgoing_message_failure: re-requesting {from} - {to}");
-        self.request_blocks(&RangeMap::from_closed_interval(from, to))?;
-
-        Ok(())
-    }
-
-    fn peer_info(&mut self, peer: PeerId) -> &mut PeerInfo {
-        self.peers.entry(peer).or_insert_with(PeerInfo::new)
-    }
-
-    pub fn forget_block_range(&mut self, blocks: Range<u64>) -> Result<()> {
-        self.db.forget_block_range(blocks)
-    }
-
-    pub fn contains_block(&mut self, block_hash: &Hash) -> Result<bool> {
-        self.db.contains_block(block_hash)
-    }
-
-    // Retrieve the plausible next blocks for the block with this hash
-    // Because of forks there might be many of these.
-    pub fn obtain_child_block_candidates_for(
-        &mut self,
-        hashes: &[Hash],
-    ) -> Result<Vec<(PeerId, Proposal)>> {
-        // The easy case is that there's something in the buffer with us as its parent hash.
-        let with_parent_hashes = self
-            .buffered
-            .destructive_proposals_from_parent_hashes(hashes);
-        if with_parent_hashes.is_empty() {
-            // There isn't. There are three cases:
-            //
-            // 1. We simply haven't received the next block yet. Give up and wait for it.
-            // 2. We have received a lie for the next block. Delete it and try again.
-            // 3. There was a fork and so the true next block is a bit further on in the
-            //    chain than we've looked so far.
-            //
-            // There would be a few easy optimisations if we could eg. assume that forks were max length
-            // 1. As it is, I can't think of a clever way to do this, so...
-
-            // In any case, deleting any cached block that calls itself the next block is
-            // the right thing to do - if it really was the next block, we would not be
-            // executing this branch.
-            if let Some(highest_block_number) = self.db.get_highest_canonical_block_number()? {
-                self.buffered.delete_blocks_up_to(highest_block_number + 1);
-                trace!(
-                    "block_store::obtain_child_block_candidates : deleted cached blocks up to and including {0}",
-                    highest_block_number + 1
-                );
-            }
-
-            let fork_elems =
-                self.buffered.inc_fork_counter() * (1 + constants::EXAMINE_BLOCKS_PER_FORK_COUNT);
-
-            // Limit the number of forks to process otherwise the db query can take too long
-            const MAX_FORK_BLOCKS_TO_QUERY: usize = 512;
-            let fork_elems = cmp::min(fork_elems, MAX_FORK_BLOCKS_TO_QUERY);
-
-            let parent_hashes = self.db.get_highest_block_hashes(fork_elems)?;
-            let revised = self
-                .buffered
-                .destructive_proposals_from_parent_hashes(&parent_hashes);
-            if !revised.is_empty() {
-                // Found some!
-                self.buffered.reset_fork_counter();
-            }
-            Ok(revised)
-        } else {
-            Ok(with_parent_hashes)
-        }
-    }
-
-    pub fn next_proposals_if_likely(&mut self) -> Result<Vec<(PeerId, Proposal)>> {
-        // This is a bit sneaky, but the db overhead is just stepping through its B-Tree and this
-        // lets us cut out a lot of forks with 0 retries.
-        self.obtain_child_block_candidates_for(
-            &self
-                .db
-                .get_highest_block_hashes(constants::EXAMINE_BLOCKS_PER_FORK_COUNT)?,
-        )
-    }
-
-    pub fn delete_empty_view_range_cache(&mut self) {
-        self.buffered.delete_empty_view_range_cache();
-    }
-
-    /// Suppose that there is a view with no associated block.
-    /// Because we request views, not blocks, we will ask for blocks for those views.
-    /// Because there are no valid blocks in those views, we won't get them.
-    /// We will therefore ask again, and continue doing so forever, potentially exhausting our capacity for outstanding
-    /// view requests and blocking us from requesting blocks from views in which they might be extant.
-    /// We avoid this by finding the gaps between the view numbers of proposals we receive and caching
-    /// this list in the block_cache. We then arrange not to rerequest blocks in views for which we know there are no
-    /// valid blocks - remembering to clear this periodically in case a malicious node has lied to us about it.
-    ///
-    /// this function takes a list of proposals in a block response, works out the gaps between them and caches
-    /// the result. Gaps at the beginning of the sequence are recorded in the space between from_view and the view of the
-    /// first proposal; gaps at the end are ignored (and will be returned when we ask for the next view up from where
-    /// this block proposal left off).
-    pub fn buffer_lack_of_proposals(
-        &mut self,
-        from_view: u64,
-        proposals: &Vec<Proposal>,
-    ) -> Result<()> {
-        // OK. Find the gaps and register them as areas not to ask about again, because
-        // we now "know" that there is no block in this range.
-        // If this turns out to be a lie, we will pop the first block in the gap and check to see
-        // if it our next block. This will have the side-effect of forgetting about any gaps before
-        // that point, which we will then re-query, realise our mistake and carry on.
-        // @todo this is horribly slow - speed it up!
-        let mut gap_start = from_view;
-        let mut gap_end;
-        for p in proposals {
-            gap_end = p.header.view;
-            if gap_end > gap_start {
-                self.buffered.no_blocks_at(&Range {
-                    start: gap_start,
-                    end: gap_end,
-                });
-            }
-            gap_start = gap_end + 1;
-        }
-        // There's never a gap at the end, because we don't know at which view we stopped.
-        Ok(())
-    }
-
-    pub fn get_num_transactions(&self) -> Result<usize> {
-        let count = self.db.get_total_transaction_count()?;
-        Ok(count)
-    }
-
-    pub fn summarise_buffered(&self) -> RangeMap {
-        self.buffered.extant_block_ranges()
-    }
-
-    pub fn expect_process_proposal(&mut self, view: u64) {
-        self.buffered.expect_process_proposal(view);
-    }
-
-    pub fn received_process_proposal(&mut self, view: u64) {
-        self.buffered.received_process_proposal(view);
-    }
-
-    /// Returns (am_syncing, current_highest_block)
-    pub fn am_syncing(&self) -> Result<(bool, Block)> {
-        let current_block = self
-            .db
-            .get_canonical_block_by_number(
-                self.db
-                    .get_highest_canonical_block_number()?
-                    .ok_or_else(|| anyhow!("no highest block"))?,
-            )?
-            .ok_or_else(|| anyhow!("missing highest block"))?;
-        Ok((
-            (self.highest_known_view + 2) > current_block.view(),
-            current_block,
-        ))
-    }
-
-    // Returns (starting_block, current_block,  highest_block) if we're syncing,
-    // None if we're not.
-    pub fn get_sync_data(&self) -> Result<Option<(BlockNumber, BlockNumber, BlockNumber)>> {
-        let (flag, highest_block) = self.am_syncing()?;
-        if !flag {
-            Ok(None)
-        } else {
-            // Compute the highest block. We're going to do this by taking the difference between
-
-            // get an estimated block number if no more views were skipped.
-            let skipped_views = highest_block.view() - highest_block.number();
-            let expected_highest_block_number = self.highest_known_view - skipped_views;
-            Ok(Some((
-                self.started_syncing_at,
-                highest_block.number(),
-                expected_highest_block_number,
-            )))
-        }
-    }
-}
diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index cca2c722e..674b20d29 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -20,7 +20,6 @@ use tokio::sync::{broadcast, mpsc::UnboundedSender};
 use tracing::*;
 
 use crate::{
-    block_store::BlockStore,
     blockhooks,
     cfg::{ConsensusConfig, NodeConfig},
     constants::TIME_TO_ALLOW_PROPOSAL_BROADCAST,
@@ -31,12 +30,11 @@ use crate::{
     inspector::{self, ScillaInspector, TouchedAddressInspector},
     message::{
         AggregateQc, BitArray, BitSlice, Block, BlockHeader, BlockRef, BlockStrategy,
-        ExternalMessage, InternalMessage, NewView, ProcessProposal, Proposal, QuorumCertificate,
-        Vote, MAX_COMMITTEE_SIZE,
+        ExternalMessage, InternalMessage, NewView, Proposal, QuorumCertificate, Vote,
+        MAX_COMMITTEE_SIZE,
     },
     node::{MessageSender, NetworkMessage, OutgoingMessageFailure},
     pool::{TransactionPool, TxAddResult, TxPoolContent},
-    range_map::RangeMap,
     state::State,
     sync::Sync,
     time::SystemTime,
@@ -153,7 +151,6 @@ pub struct Consensus {
     message_sender: MessageSender,
     reset_timeout: UnboundedSender<Duration>,
     pub sync: Sync,
-    pub block_store: BlockStore,
     latest_leader_cache: RefCell<Option<CachedLeader>>,
     votes: BTreeMap<Hash, BlockVotes>,
     /// Votes for a block we don't have stored. They are retained in case we receive the block later.
@@ -210,18 +207,16 @@ impl Consensus {
 
         let sync = Sync::new(&config, db.clone(), message_sender.clone(), Vec::new())?;
 
-        // It is important to create the `BlockStore` after the checkpoint has been loaded into the DB. The
-        // `BlockStore` pre-loads and caches information about the currently stored blocks.
-        let block_store = BlockStore::new(&config, db.clone(), message_sender.clone())?;
-
         let latest_block = db
             .get_finalized_view()?
-            .map(|view| {
-                block_store
-                    .get_block_by_view(view)?
-                    .ok_or_else(|| anyhow!("no header found at view {view}"))
+            .and_then(|view| {
+                db.get_block_hash_by_view(view)
+                    .expect("no header found at view {view}")
             })
-            .transpose()?;
+            .and_then(|hash| {
+                db.get_block_by_hash(&hash)
+                    .expect("no block found for hash {hash}")
+            });
 
         let mut state = if let Some(latest_block) = &latest_block {
             trace!("Loading state from latest block");
@@ -229,15 +224,11 @@ impl Consensus {
                 db.state_trie()?,
                 latest_block.state_root_hash().into(),
                 config.clone(),
-                block_store.clone_read_only(),
+                db.clone(),
             )
         } else {
             trace!("Constructing new state from genesis");
-            State::new_with_genesis(
-                db.state_trie()?,
-                config.clone(),
-                block_store.clone_read_only(),
-            )?
+            State::new_with_genesis(db.state_trie()?, config.clone(), db.clone())?
         };
 
         let (latest_block, latest_block_view) = match latest_block {
@@ -251,10 +242,9 @@ impl Consensus {
         let (start_view, finalized_view, high_qc) = {
             match db.get_high_qc()? {
                 Some(qc) => {
-                    let high_block = block_store
-                        .get_block(qc.block_hash)?
+                    let high_block = db
+                        .get_block_by_hash(&qc.block_hash)?
                         .ok_or_else(|| anyhow!("missing block that high QC points to!"))?;
-
                     let finalized_number = db
                         .get_finalized_view()?
                         .ok_or_else(|| anyhow!("missing latest finalized view!"))?;
@@ -291,8 +281,7 @@ impl Consensus {
                         let highest_block_number = db
                             .get_highest_canonical_block_number()?
                             .ok_or_else(|| anyhow!("can't find highest block num in database!"))?;
-
-                        let head_block = block_store
+                        let head_block = db
                             .get_canonical_block_by_number(highest_block_number)?
                             .ok_or_else(|| anyhow!("missing head block!"))?;
                         trace!(
@@ -329,7 +318,6 @@ impl Consensus {
             secret_key,
             config,
             sync,
-            block_store,
             latest_leader_cache: RefCell::new(None),
             message_sender,
             reset_timeout,
@@ -401,8 +389,8 @@ impl Consensus {
 
             // Remind block_store of our peers and request any potentially missing blocks
             let high_block = consensus
-                .block_store
-                .get_block(high_qc.block_hash)?
+                .db
+                .get_block_by_hash(&high_qc.block_hash)?
                 .ok_or_else(|| anyhow!("missing block that high QC points to!"))?;
 
             let executed_block = BlockHeader {
@@ -413,24 +401,15 @@ impl Consensus {
 
             // Grab last seen committee's peerIds in case others also went offline
             let committee = state_at.get_stakers(executed_block)?;
-            let recent_peer_ids: Vec<_> = committee
+            let recent_peer_ids = committee
                 .iter()
                 .filter(|&&peer_public_key| peer_public_key != consensus.public_key())
                 .filter_map(|&peer_public_key| {
                     state_at.get_peer_id(peer_public_key).unwrap_or(None)
                 })
-                .collect();
+                .collect_vec();
 
-            consensus
-                .block_store
-                .set_peers_and_view(high_block.view(), &recent_peer_ids)?;
-            // It is likley that we missed the most recent proposal. Request it now
-            consensus
-                .block_store
-                .request_blocks(&RangeMap::from_closed_interval(
-                    high_block.view(),
-                    high_block.view() + 1,
-                ))?;
+            consensus.sync.add_peers(recent_peer_ids);
         }
 
         Ok(consensus)
@@ -463,11 +442,11 @@ impl Consensus {
 
     pub fn head_block(&self) -> Block {
         let highest_block_number = self
-            .block_store
+            .db
             .get_highest_canonical_block_number()
             .unwrap()
             .unwrap();
-        self.block_store
+        self.db
             .get_canonical_block_by_number(highest_block_number)
             .unwrap()
             .unwrap()
@@ -652,7 +631,7 @@ impl Consensus {
 
         // FIXME: Cleanup
 
-        if self.block_store.contains_block(&block.hash())? {
+        if self.db.contains_block(&block.hash())? {
             trace!("ignoring block proposal, block store contains this block already");
             return Ok(None);
         }
@@ -678,29 +657,11 @@ impl Consensus {
             return Ok(None);
         }
 
-        match self.check_block(&block, during_sync) {
-            Ok(()) => {}
-            Err((e, temporary)) => {
-                // If this block could become valid in the future, buffer it.
-                if temporary {
-                    self.block_store.buffer_proposal(
-                        from,
-                        Proposal::from_parts_with_hashes(
-                            block,
-                            transactions
-                                .into_iter()
-                                .map(|tx| {
-                                    let hash = tx.calculate_hash();
-                                    (tx, hash)
-                                })
-                                .collect(),
-                        ),
-                    )?;
-                } else {
-                    warn!(?e, "invalid block proposal received!");
-                }
-                return Ok(None);
+        if let Err((e, temporary)) = self.check_block(&block, during_sync) {
+            if !temporary {
+                warn!(?e, "invalid block proposal received!");
             }
+            return Ok(None);
         }
 
         self.update_high_qc_and_view(block.agg.is_some(), block.header.qc)?;
@@ -723,19 +684,6 @@ impl Consensus {
                     block.view(),
                     view
                 );
-                self.block_store.buffer_proposal(
-                    from,
-                    Proposal::from_parts_with_hashes(
-                        block,
-                        transactions
-                            .into_iter()
-                            .map(|tx| {
-                                let hash = tx.calculate_hash();
-                                (tx, hash)
-                            })
-                            .collect(),
-                    ),
-                )?;
                 return Ok(None);
             }
 
@@ -1990,7 +1938,7 @@ impl Consensus {
         new_high_qc: QuorumCertificate,
     ) -> Result<()> {
         let view = self.get_view()?;
-        let Some(new_high_qc_block) = self.block_store.get_block(new_high_qc.block_hash)? else {
+        let Some(new_high_qc_block) = self.db.get_block_by_hash(&new_high_qc.block_hash)? else {
             // We don't set high_qc to a qc if we don't have its block.
             warn!("Recieved potential high QC but didn't have the corresponding block");
             return Ok(());
@@ -2441,11 +2389,10 @@ impl Consensus {
     pub fn receive_block_availability(
         &mut self,
         from: PeerId,
-        availability: &Option<Vec<BlockStrategy>>,
+        _availability: &Option<Vec<BlockStrategy>>,
     ) -> Result<()> {
         trace!("Received block availability from {:?}", from);
-        self.block_store.update_availability(from, availability)?;
-        Ok(())
+        Ok(()) // FIXME: Stub
     }
 
     // Checks for the validity of a block and adds it to our block store if valid.
@@ -2460,8 +2407,6 @@ impl Consensus {
             proposal.number(),
             proposal.view()
         );
-        self.block_store
-            .received_process_proposal(proposal.header.view);
         let result = self.proposal(from, proposal, true)?;
         // Processing the received block can either result in:
         // * A `Proposal`, if we have buffered votes for this block which form a supermajority, meaning we can
@@ -2477,25 +2422,7 @@ impl Consensus {
         let hash = block.hash();
         debug!(?from, ?hash, ?block.header.view, ?block.header.number, "added block");
         let _ = self.new_blocks.send(block.header);
-        // We may have child blocks; process them too.
-        self.block_store
-            .process_block(from, block)?
-            .into_iter()
-            .try_for_each(|(from_id, child_proposal)| -> Result<()> {
-                // The only reason this can fail is permanent failure of the messaging mechanism, so
-                // propagate it back here.
-                // Mark this block in the cache as "we're about to process this one"
-                let view = child_proposal.header.view;
-                self.message_sender.send_external_message(
-                    self.peer_id(),
-                    ExternalMessage::ProcessProposal(ProcessProposal {
-                        from: from_id.to_bytes(),
-                        block: child_proposal,
-                    }),
-                )?;
-                self.block_store.expect_process_proposal(view);
-                Ok(())
-            })?;
+        self.db.insert_block(&block)?;
         Ok(())
     }
 
@@ -2536,15 +2463,15 @@ impl Consensus {
     }
 
     pub fn get_block(&self, key: &Hash) -> Result<Option<Block>> {
-        self.block_store.get_block(*key)
+        self.db.get_block_by_hash(key)
     }
 
     pub fn get_block_by_view(&self, view: u64) -> Result<Option<Block>> {
-        self.block_store.get_block_by_view(view)
+        self.db.get_block_by_view(view)
     }
 
     pub fn get_canonical_block_by_number(&self, number: u64) -> Result<Option<Block>> {
-        self.block_store.get_canonical_block_by_number(number)
+        self.db.get_canonical_block_by_number(number)
     }
 
     fn set_finalized_view(&mut self, view: u64) -> Result<()> {
@@ -2617,7 +2544,7 @@ impl Consensus {
 
     pub fn state_at(&self, number: u64) -> Result<Option<State>> {
         Ok(self
-            .block_store
+            .db
             .get_canonical_block_by_number(number)?
             .map(|block| self.state.at_root(block.state_root_hash().into())))
     }
@@ -3215,71 +3142,33 @@ impl Consensus {
         }
     }
 
+    pub fn get_num_transactions(&self) -> Result<usize> {
+        let count = self.db.get_total_transaction_count()?;
+        Ok(count)
+    }
+
     pub fn report_outgoing_message_failure(
         &mut self,
-        failure: OutgoingMessageFailure,
+        _failure: OutgoingMessageFailure,
     ) -> Result<()> {
-        self.block_store.report_outgoing_message_failure(failure)
+        Ok(()) // FIXME: Stub
     }
 
     pub fn tick(&mut self) -> Result<()> {
         trace!("consensus::tick()");
         trace!("request_missing_blocks from timer");
 
-        // Drives the block fetching state machine - see docs/fetching_blocks.md
-        if self.block_store.request_missing_blocks()? {
-            // We're syncing..
-            // Is it likely that the next thing in the buffer could be the next block?
-            let likely_blocks = self.block_store.next_proposals_if_likely()?;
-            if likely_blocks.is_empty() {
-                trace!("no blocks buffered");
-                // If there are no next blocks buffered, someone may well have lied to us about
-                // where the gaps in the view range are. This should be a rare occurrence, so in
-                // lieu of timing it out, just zap the view range gap and we'll take the hit on
-                // any rerequests.
-                self.block_store.delete_empty_view_range_cache();
-            } else {
-                likely_blocks.into_iter().for_each(|(from, block)| {
-                    trace!(
-                        "buffer may contain the next block - {0:?} v={1} n={2}",
-                        block.hash(),
-                        block.view(),
-                        block.number()
-                    );
-                    // Ignore errors here - just carry on and wait for re-request to clean up.
-                    let view = block.view();
-                    let _ = self.message_sender.send_external_message(
-                        self.peer_id(),
-                        ExternalMessage::ProcessProposal(ProcessProposal {
-                            from: from.to_bytes(),
-                            block,
-                        }),
-                    );
-                    self.block_store.expect_process_proposal(view);
-                });
-            }
+        // Drives syncing from timeouts, not just new Proposals
+        if self.sync.am_syncing()? {
+            // TODO: Sync from Timeouts
         } else {
             trace!("not syncing ...");
         }
         Ok(())
     }
 
-    pub fn buffer_proposal(&mut self, from: PeerId, proposal: Proposal) -> Result<()> {
-        self.block_store.buffer_proposal(from, proposal)?;
-        Ok(())
-    }
-
-    pub fn buffer_lack_of_proposals(
-        &mut self,
-        from_view: u64,
-        proposals: &Vec<Proposal>,
-    ) -> Result<()> {
-        self.block_store
-            .buffer_lack_of_proposals(from_view, proposals)
-    }
-
     pub fn get_sync_data(&self) -> Result<Option<(BlockNumber, BlockNumber, BlockNumber)>> {
-        self.block_store.get_sync_data()
+        self.sync.get_sync_data()
     }
 }
 
diff --git a/zilliqa/src/db.rs b/zilliqa/src/db.rs
index aba4e0968..31c104b90 100644
--- a/zilliqa/src/db.rs
+++ b/zilliqa/src/db.rs
@@ -1191,7 +1191,7 @@ fn decompress_file<P: AsRef<Path> + Debug>(input_file_path: P, output_file_path:
 /// An implementor of [eth_trie::DB] which uses a [Connection] to persist data.
 #[derive(Debug, Clone)]
 pub struct TrieStorage {
-    db: Arc<Mutex<Connection>>,
+    pub db: Arc<Mutex<Connection>>,
     cache: Arc<Mutex<LruCache<Vec<u8>, Vec<u8>>>>,
 }
 
diff --git a/zilliqa/src/exec.rs b/zilliqa/src/exec.rs
index 49faece60..78eaafad5 100644
--- a/zilliqa/src/exec.rs
+++ b/zilliqa/src/exec.rs
@@ -415,7 +415,6 @@ impl DatabaseRef for &State {
 
     fn block_hash_ref(&self, number: u64) -> Result<B256, Self::Error> {
         Ok(self
-            .block_store
             .get_canonical_block_by_number(number)?
             .map(|block| B256::new(block.hash().0))
             .unwrap_or_default())
@@ -1203,15 +1202,11 @@ impl PendingState {
     }
 
     pub fn get_canonical_block_by_number(&self, block_number: u64) -> Result<Option<Block>> {
-        self.pre_state
-            .block_store
-            .get_canonical_block_by_number(block_number)
+        self.pre_state.get_canonical_block_by_number(block_number)
     }
 
     pub fn get_highest_canonical_block_number(&self) -> Result<Option<u64>> {
-        self.pre_state
-            .block_store
-            .get_highest_canonical_block_number()
+        self.pre_state.get_highest_canonical_block_number()
     }
 
     pub fn load_account(&mut self, address: Address) -> Result<&mut PendingAccount> {
diff --git a/zilliqa/src/lib.rs b/zilliqa/src/lib.rs
index bbb360644..9b783a862 100644
--- a/zilliqa/src/lib.rs
+++ b/zilliqa/src/lib.rs
@@ -1,5 +1,4 @@
 pub mod api;
-pub mod block_store;
 mod blockhooks;
 pub mod cfg;
 pub mod consensus;
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 589d7065c..58744ce98 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -911,26 +911,6 @@ impl Node {
         Ok(())
     }
 
-    fn _handle_block_response(&mut self, from: PeerId, response: BlockResponse) -> Result<()> {
-        trace!(
-            "block_store::handle_block_response - received blocks response of length {}",
-            response.proposals.len()
-        );
-        self.consensus
-            .receive_block_availability(from, &response.availability)?;
-
-        self.consensus
-            .buffer_lack_of_proposals(response.from_view, &response.proposals)?;
-
-        for block in response.proposals {
-            // Buffer the block so that we know we have it - in fact, add it to the cache so
-            // that we can include it in the chain if necessary.
-            self.consensus.buffer_proposal(from, block)?;
-        }
-        trace!("block_store::handle_block_response: finished handling response");
-        Ok(())
-    }
-
     fn handle_injected_proposal(&mut self, from: PeerId, req: InjectedProposal) -> Result<()> {
         if from != self.consensus.peer_id() {
             warn!("Someone ({from}) sent me a InjectedProposal; illegal- ignoring");
diff --git a/zilliqa/src/pool.rs b/zilliqa/src/pool.rs
index 32b71d908..9239e4fc7 100644
--- a/zilliqa/src/pool.rs
+++ b/zilliqa/src/pool.rs
@@ -403,16 +403,13 @@ mod tests {
         primitives::{Address, Bytes, PrimitiveSignature, TxKind, U256},
     };
     use anyhow::Result;
-    use libp2p::PeerId;
     use rand::{seq::SliceRandom, thread_rng};
 
     use super::TransactionPool;
     use crate::{
-        block_store::BlockStore,
         cfg::NodeConfig,
         crypto::Hash,
         db::Db,
-        node::{MessageSender, RequestId},
         state::State,
         transaction::{EvmGas, SignedTransaction, TxIntershard, VerifiedTransaction},
     };
@@ -468,23 +465,10 @@ mod tests {
     fn get_in_memory_state() -> Result<State> {
         let node_config = NodeConfig::default();
 
-        let (s1, _) = tokio::sync::mpsc::unbounded_channel();
-        let (s2, _) = tokio::sync::mpsc::unbounded_channel();
-
-        let message_sender = MessageSender {
-            our_shard: 0,
-            our_peer_id: PeerId::random(),
-            outbound_channel: s1,
-            local_channel: s2,
-            request_id: RequestId::default(),
-        };
-
         let db = Db::new::<PathBuf>(None, 0, 0)?;
         let db = Arc::new(db);
 
-        let block_store = BlockStore::new(&node_config, db.clone(), message_sender.clone())?;
-
-        State::new_with_genesis(db.state_trie()?, node_config, Arc::new(block_store))
+        State::new_with_genesis(db.state_trie()?, node_config, db.clone())
     }
 
     fn create_acc(state: &mut State, address: Address, balance: u128, nonce: u64) -> Result<()> {
diff --git a/zilliqa/src/state.rs b/zilliqa/src/state.rs
index 059efe9db..d31000cd9 100644
--- a/zilliqa/src/state.rs
+++ b/zilliqa/src/state.rs
@@ -17,13 +17,12 @@ use sha3::{Digest, Keccak256};
 use tracing::debug;
 
 use crate::{
-    block_store::BlockStore,
     cfg::{Amount, Forks, NodeConfig, ScillaExtLibsPath},
     contracts::{self, Contract},
     crypto::{self, Hash},
-    db::TrieStorage,
+    db::{Db, TrieStorage},
     error::ensure_success,
-    message::{BlockHeader, MAX_COMMITTEE_SIZE},
+    message::{Block, BlockHeader, MAX_COMMITTEE_SIZE},
     node::ChainId,
     scilla::{ParamValue, Scilla, Transition},
     serde_util::vec_param_value,
@@ -40,6 +39,7 @@ use crate::{
 /// the storage root is used to index into the state
 /// all the keys are hashed and stored in the same sled tree
 pub struct State {
+    sql: Arc<Db>,
     db: Arc<TrieStorage>,
     accounts: PatriciaTrie<TrieStorage>,
     /// The Scilla interpreter interface. Note that it is lazily initialized - This is a bit of a hack to ensure that
@@ -54,11 +54,10 @@ pub struct State {
     pub scilla_call_gas_exempt_addrs: Vec<Address>,
     pub chain_id: ChainId,
     pub forks: Forks,
-    pub block_store: Arc<BlockStore>,
 }
 
 impl State {
-    pub fn new(trie: TrieStorage, config: &NodeConfig, block_store: Arc<BlockStore>) -> State {
+    pub fn new(trie: TrieStorage, config: &NodeConfig, sql: Arc<Db>) -> State {
         let db = Arc::new(trie);
         let consensus_config = &config.consensus;
         Self {
@@ -74,7 +73,7 @@ impl State {
             scilla_call_gas_exempt_addrs: consensus_config.scilla_call_gas_exempt_addrs.clone(),
             chain_id: ChainId::new(config.eth_chain_id),
             forks: consensus_config.forks.clone(),
-            block_store,
+            sql,
         }
     }
 
@@ -95,17 +94,13 @@ impl State {
         trie: TrieStorage,
         root_hash: B256,
         config: NodeConfig,
-        block_store: Arc<BlockStore>,
+        sql: Arc<Db>,
     ) -> Self {
-        Self::new(trie, &config, block_store).at_root(root_hash)
+        Self::new(trie, &config, sql).at_root(root_hash)
     }
 
-    pub fn new_with_genesis(
-        trie: TrieStorage,
-        config: NodeConfig,
-        block_store: Arc<BlockStore>,
-    ) -> Result<State> {
-        let mut state = State::new(trie, &config, block_store);
+    pub fn new_with_genesis(trie: TrieStorage, config: NodeConfig, sql: Arc<Db>) -> Result<State> {
+        let mut state = State::new(trie, &config, sql);
 
         if config.consensus.is_main {
             let shard_data = contracts::shard_registry::CONSTRUCTOR.encode_input(
@@ -285,8 +280,8 @@ impl State {
             gas_price: self.gas_price,
             scilla_call_gas_exempt_addrs: self.scilla_call_gas_exempt_addrs.clone(),
             chain_id: self.chain_id,
-            block_store: self.block_store.clone(),
             forks: self.forks.clone(),
+            sql: self.sql.clone(),
         }
     }
 
@@ -382,6 +377,14 @@ impl State {
             &bincode::serialize(&account)?,
         )?)
     }
+
+    pub fn get_canonical_block_by_number(&self, number: u64) -> Result<Option<Block>> {
+        self.sql.get_canonical_block_by_number(number)
+    }
+
+    pub fn get_highest_canonical_block_number(&self) -> Result<Option<u64>> {
+        self.sql.get_highest_canonical_block_number()
+    }
 }
 
 pub mod contract_addr {
@@ -579,37 +582,23 @@ mod tests {
     use std::{path::PathBuf, sync::Arc};
 
     use crypto::Hash;
-    use libp2p::PeerId;
     use revm::primitives::FixedBytes;
 
     use super::*;
     use crate::{
         api::to_hex::ToHex,
-        block_store::BlockStore,
         cfg::NodeConfig,
         db::Db,
         message::BlockHeader,
-        node::{MessageSender, RequestId},
     };
 
     #[test]
     fn deposit_contract_updateability() {
-        let (s1, _) = tokio::sync::mpsc::unbounded_channel();
-        let (s2, _) = tokio::sync::mpsc::unbounded_channel();
-        let message_sender = MessageSender {
-            our_shard: 0,
-            our_peer_id: PeerId::random(),
-            outbound_channel: s1,
-            local_channel: s2,
-            request_id: RequestId::default(),
-        };
         let db = Db::new::<PathBuf>(None, 0, 0).unwrap();
         let db = Arc::new(db);
         let config = NodeConfig::default();
-        let block_store =
-            Arc::new(BlockStore::new(&config, db.clone(), message_sender.clone()).unwrap());
 
-        let mut state = State::new(db.state_trie().unwrap(), &config, block_store);
+        let mut state = State::new(db.state_trie().unwrap(), &config, db);
 
         let deposit_init_addr = state.deploy_initial_deposit_contract(&config).unwrap();
 
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 6b0348dfa..44629c744 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -19,7 +19,10 @@ use crate::{
     cfg::NodeConfig,
     crypto::Hash,
     db::Db,
-    message::{Block, BlockRequest, BlockRequestV2, BlockResponse, ChainMetaData, ExternalMessage, InjectedProposal, Proposal},
+    message::{
+        Block, BlockRequest, BlockRequestV2, BlockResponse, ChainMetaData, ExternalMessage,
+        InjectedProposal, Proposal,
+    },
     node::MessageSender,
     time::SystemTime,
 };
@@ -1067,6 +1070,13 @@ impl Sync {
         }
     }
 
+    /// Add bulk peers
+    pub fn add_peers(&mut self, peers: Vec<PeerId>) {
+        for peer in peers {
+            self.add_peer(peer);
+        }
+    }
+
     /// Add a peer to the list of peers.
     pub fn add_peer(&mut self, peer: PeerId) {
         // if the new peer is not synced, it will get downgraded to the back of heap.
@@ -1101,30 +1111,28 @@ impl Sync {
     }
 
     /// Returns (am_syncing, current_highest_block)
-    pub fn am_syncing(&self) -> Result<(bool, Block)> {
-        let highest_block = self
-            .db
-            .get_canonical_block_by_number(
-                self.db
-                    .get_highest_canonical_block_number()?
-                    .expect("no highest block"),
-            )?
-            .expect("missing highest block");
-        Ok((
-            self.in_pipeline != 0
-                || !self.recent_proposals.is_empty()
-                || self.count_segments()? != 0,
-            highest_block,
-        ))
+    pub fn am_syncing(&self) -> Result<bool> {
+        Ok(self.in_pipeline != 0
+            || self.count_segments()? != 0
+            || !self.recent_proposals.is_empty())
     }
 
     // Returns (starting_block, current_block,  highest_block) if we're syncing,
     // None if we're not.
     pub fn get_sync_data(&self) -> Result<Option<(BlockNumber, BlockNumber, BlockNumber)>> {
-        let (flag, highest_block) = self.am_syncing()?;
+        let flag = self.am_syncing()?;
         if !flag {
             Ok(None)
         } else {
+            let highest_block = self
+                .db
+                .get_canonical_block_by_number(
+                    self.db
+                        .get_highest_canonical_block_number()?
+                        .expect("no highest block"),
+                )?
+                .expect("missing highest block");
+
             let highest_saved_block_number = highest_block.number();
             let highest_block_number_seen = self.recent_proposals.back().unwrap().number();
             Ok(Some((

From 0af7bf11880fa579cb6870b9e06ef14f9480381a Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 16 Jan 2025 11:10:42 +0800
Subject: [PATCH 068/119] feat: added sync from timeout, not just proposals.

---
 zilliqa/src/consensus.rs |  1 +
 zilliqa/src/sync.rs      | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index 674b20d29..f079b538d 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -3161,6 +3161,7 @@ impl Consensus {
         // Drives syncing from timeouts, not just new Proposals
         if self.sync.am_syncing()? {
             // TODO: Sync from Timeouts
+            self.sync.sync_internal()?;
         } else {
             trace!("not syncing ...");
         }
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 44629c744..4fcf76534 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -347,6 +347,16 @@ impl Sync {
         }
         self.recent_proposals.push_back(proposal);
 
+        self.sync_internal()
+    }
+
+    pub fn sync_internal(&mut self) -> Result<()> {
+        if self.recent_proposals.is_empty() {
+            // Do nothing if there's no recent proposals.
+            tracing::debug!("sync::Internal : missing recent proposals");
+            return Ok(());
+        }
+
         match self.state {
             // Check if we are out of sync
             SyncState::Phase0 if self.in_pipeline == 0 => {

From 0795f0e9b79e9cc0d6d852fac85bf4e80a4539fa Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 16 Jan 2025 17:40:58 +0800
Subject: [PATCH 069/119] feat: made the batch_size dynamic, so that it can get
 past a larger range.

---
 zilliqa/src/state.rs |  7 +------
 zilliqa/src/sync.rs  | 35 ++++++++++++++++++++++-------------
 2 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/zilliqa/src/state.rs b/zilliqa/src/state.rs
index d31000cd9..a77a6e265 100644
--- a/zilliqa/src/state.rs
+++ b/zilliqa/src/state.rs
@@ -585,12 +585,7 @@ mod tests {
     use revm::primitives::FixedBytes;
 
     use super::*;
-    use crate::{
-        api::to_hex::ToHex,
-        cfg::NodeConfig,
-        db::Db,
-        message::BlockHeader,
-    };
+    use crate::{api::to_hex::ToHex, cfg::NodeConfig, db::Db, message::BlockHeader};
 
     #[test]
     fn deposit_contract_updateability() {
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 4fcf76534..142bc686c 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -72,6 +72,7 @@ pub struct Sync {
     request_timeout: Duration,
     // how many blocks to request at once
     max_batch_size: usize,
+    max_batch_size_const: usize,
     // how many blocks to inject into the queue
     max_blocks_in_flight: usize,
     // count of proposals pending in the pipeline
@@ -95,14 +96,6 @@ impl Sync {
     #[cfg(debug_assertions)]
     const DO_SPECULATIVE: bool = false;
 
-    // For V1 BlockRequest, we request a little more than we need, due to drift
-    // Since the view number is an 'internal' clock, it is possible for the same block number
-    // to have different view numbers.
-    // 10 ~ 1-min
-    // 20 ~ 1-hr
-    // 30 ~ 2-days
-    const VIEW_DRIFT: u64 = 10;
-
     // Minimum of 2 peers to avoid single source of truth.
     const MIN_PEERS: usize = 2;
 
@@ -122,9 +115,7 @@ impl Sync {
             })
             .collect();
         let peer_id = message_sender.our_peer_id;
-        let max_batch_size = config
-            .block_request_batch_size
-            .clamp(Self::VIEW_DRIFT as usize * 2, 180); // up to 180 sec of blocks at a time.
+        let max_batch_size = config.block_request_batch_size.clamp(30, 180); // up to 180 sec of blocks at a time.
         let max_blocks_in_flight = config.max_blocks_in_flight.clamp(max_batch_size, 1800); // up to 30-mins worth of blocks in-pipeline.
 
         // This DB could be left in-here as it is only used in this module
@@ -167,6 +158,7 @@ impl Sync {
             peer_id,
             request_timeout: config.consensus.consensus_timeout,
             max_batch_size,
+            max_batch_size_const: max_batch_size,
             max_blocks_in_flight,
             in_flight: None,
             in_pipeline: usize::MIN,
@@ -707,8 +699,20 @@ impl Sync {
         // Downgrade empty responses
         if response.proposals.is_empty() {
             tracing::info!("sync::HandleBlockResponse : empty response {from}");
+
+            if let Some(availability) = response.availability {
+                tracing::info!("sync::Availability {}", availability.len());
+                // response may be too large, so reduce request range
+                // this has the impact of slowing sync progress to a crawl.
+                self.max_batch_size = self
+                    .max_batch_size
+                    .saturating_sub(self.max_batch_size_const / 20)
+                    .max(5); // 5% reduce, down to 5 - empirical value
+            }
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
+        } else {
+            self.max_batch_size = self.max_batch_size_const;
         }
 
         tracing::trace!(
@@ -962,8 +966,12 @@ impl Sync {
                 SyncState::Phase1(ChainMetaData { view_number, .. })
                     if matches!(peer.version, PeerVer::V1) =>
                 {
+                    // For V1 BlockRequest, we request a little more than we need, due to drift
+                    // Since the view number is an 'internal' clock, it is possible for the same block number
+                    // to have different view numbers.
+                    let drift = self.max_batch_size as u64 / 10;
                     ExternalMessage::BlockRequest(BlockRequest {
-                        to_view: view_number.saturating_add(Self::VIEW_DRIFT),
+                        to_view: view_number.saturating_add(drift),
                         from_view: view_number.saturating_sub(self.max_batch_size as u64),
                     })
                 }
@@ -981,8 +989,9 @@ impl Sync {
                     let meta = meta.unwrap();
                     let view_number = meta.view_number;
                     self.state = SyncState::Phase1(meta);
+                    let drift = self.max_batch_size as u64 / 10;
                     ExternalMessage::BlockRequest(BlockRequest {
-                        to_view: view_number.saturating_add(Self::VIEW_DRIFT),
+                        to_view: view_number.saturating_add(drift),
                         from_view: view_number.saturating_sub(self.max_batch_size as u64),
                     })
                 }

From 3b7ffcf1c24d1962bd9e49434d2232f3bbbdba79 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 17 Jan 2025 10:46:08 +0800
Subject: [PATCH 070/119] feat: added dynamic_batch_sizing() which is reactive,
 not pro-active.

---
 zilliqa/src/sync.rs | 62 +++++++++++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 19 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 142bc686c..c9b63b8d7 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -609,6 +609,7 @@ impl Sync {
                     "sync::RequestMissingBlocks : in-flight request {} timed out, requesting from new peer",
                     peer.peer_id
                 );
+                self.dynamic_batch_sizing(peer.peer_id, DownGrade::Timeout)?;
                 self.done_with_peer(DownGrade::Timeout);
             } else {
                 return Ok(());
@@ -679,6 +680,45 @@ impl Sync {
         Ok(())
     }
 
+    /// Phase 1: Dynamic Batch Sizing
+    ///
+    /// Due to a hard-coded 10MB response limit in libp2p, we may be limited in how many blocks we can request
+    /// for in a single request, between 1-100 blocks.
+    /// TODO: Make this a pro-active setting instead.
+    fn dynamic_batch_sizing(&mut self, from: PeerId, reason: DownGrade) -> Result<()> {
+        let Some(peer) = self.in_flight.as_ref() else {
+            todo!("invalid peer");
+        };
+
+        match (&self.state, &peer.version, reason) {
+            // V1 response may be too large. Reduce request range.
+            (SyncState::Phase1(_), PeerVer::V1, DownGrade::Timeout) => {
+                self.max_batch_size = self
+                    .max_batch_size
+                    .saturating_sub(self.max_batch_size / 2)
+                    .max(1);
+            }
+            (SyncState::Phase1(_), PeerVer::V1, DownGrade::Empty) => {
+                self.max_batch_size = self
+                    .max_batch_size
+                    .saturating_sub(self.max_batch_size / 3)
+                    .max(1);
+            }
+            // V1 responses are going well, increase the request range linearly
+            (SyncState::Phase1(_), PeerVer::V1, DownGrade::None) if from == peer.peer_id => {
+                self.max_batch_size = self
+                    .max_batch_size
+                    .saturating_add(self.max_batch_size_const / 10)
+                    // For V1, ~100 empty blocks saturates the response payload
+                    .min(100);
+            }
+            // V2 response may be too large, which can induce a timeout. Split into 10 block segments
+            _ => {}
+        }
+
+        Ok(())
+    }
+
     /// Phase 1 / 2: Handle a V1 block response
     ///
     /// If the response if from a V2 peer, it will upgrade that peer to V2.
@@ -696,25 +736,6 @@ impl Sync {
             return Ok(());
         }
 
-        // Downgrade empty responses
-        if response.proposals.is_empty() {
-            tracing::info!("sync::HandleBlockResponse : empty response {from}");
-
-            if let Some(availability) = response.availability {
-                tracing::info!("sync::Availability {}", availability.len());
-                // response may be too large, so reduce request range
-                // this has the impact of slowing sync progress to a crawl.
-                self.max_batch_size = self
-                    .max_batch_size
-                    .saturating_sub(self.max_batch_size_const / 20)
-                    .max(5); // 5% reduce, down to 5 - empirical value
-            }
-            self.done_with_peer(DownGrade::Empty);
-            return Ok(());
-        } else {
-            self.max_batch_size = self.max_batch_size_const;
-        }
-
         tracing::trace!(
             "sync::HandleBlockResponse : received {} blocks from {from}",
             response.proposals.len()
@@ -807,9 +828,11 @@ impl Sync {
         if response.is_empty() {
             // Empty response, downgrade peer and retry with a new peer.
             tracing::warn!("sync::MetadataResponse : empty blocks {from}",);
+            self.dynamic_batch_sizing(from, DownGrade::Empty)?;
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
         } else {
+            self.dynamic_batch_sizing(from, DownGrade::None)?;
             self.done_with_peer(DownGrade::None);
         }
 
@@ -933,6 +956,7 @@ impl Sync {
                     "sync::RequestMissingMetadata : in-flight request {} timed out, requesting from new peer",
                     peer.peer_id
                 );
+                self.dynamic_batch_sizing(peer.peer_id, DownGrade::Timeout)?;
                 self.done_with_peer(DownGrade::Timeout);
             } else {
                 return Ok(());

From 49ad23c9f2d859e70365d0e6a62ab5c097cef954 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 17 Jan 2025 11:29:59 +0800
Subject: [PATCH 071/119] feat: make dynamic_batch_sizing() work per-peer, not
 per sync.

---
 zilliqa/src/sync.rs | 125 ++++++++++++++++++++++++--------------------
 1 file changed, 68 insertions(+), 57 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index c9b63b8d7..9c588dff9 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -72,7 +72,6 @@ pub struct Sync {
     request_timeout: Duration,
     // how many blocks to request at once
     max_batch_size: usize,
-    max_batch_size_const: usize,
     // how many blocks to inject into the queue
     max_blocks_in_flight: usize,
     // count of proposals pending in the pipeline
@@ -105,6 +104,9 @@ impl Sync {
         message_sender: MessageSender,
         peers: Vec<PeerId>,
     ) -> Result<Self> {
+        let peer_id = message_sender.our_peer_id;
+        let max_batch_size = config.block_request_batch_size.clamp(30, 180); // up to 180 sec of blocks at a time.
+        let max_blocks_in_flight = config.max_blocks_in_flight.clamp(max_batch_size, 1800); // up to 30-mins worth of blocks in-pipeline.
         let peers = peers
             .into_iter()
             .map(|peer_id| PeerInfo {
@@ -112,11 +114,10 @@ impl Sync {
                 score: 0,
                 peer_id,
                 last_used: Instant::now(),
+                reason: DownGrade::None,
+                batch_size: max_batch_size,
             })
             .collect();
-        let peer_id = message_sender.our_peer_id;
-        let max_batch_size = config.block_request_batch_size.clamp(30, 180); // up to 180 sec of blocks at a time.
-        let max_blocks_in_flight = config.max_blocks_in_flight.clamp(max_batch_size, 1800); // up to 30-mins worth of blocks in-pipeline.
 
         // This DB could be left in-here as it is only used in this module
         // TODO: Make this in-memory by exploiting SQLite TEMP tables i.e. CREATE TEMP TABLE
@@ -158,7 +159,6 @@ impl Sync {
             peer_id,
             request_timeout: config.consensus.consensus_timeout,
             max_batch_size,
-            max_batch_size_const: max_batch_size,
             max_blocks_in_flight,
             in_flight: None,
             in_pipeline: usize::MIN,
@@ -232,7 +232,9 @@ impl Sync {
                     last_used: Instant::now(),
                     score:u32::MAX,
                     version: row.get(5)?,
-                    peer_id: PeerId::from_bytes(row.get::<_,Vec<u8>>(4)?.as_slice()).unwrap(),
+                    reason: DownGrade::None,
+                    batch_size: self.max_batch_size,
+                    peer_id: PeerId::from_bytes(row.get::<_,Vec<u8>>(4)?.as_slice()).unwrap(),                    
                 },
             )))
                 .optional()?;
@@ -609,7 +611,6 @@ impl Sync {
                     "sync::RequestMissingBlocks : in-flight request {} timed out, requesting from new peer",
                     peer.peer_id
                 );
-                self.dynamic_batch_sizing(peer.peer_id, DownGrade::Timeout)?;
                 self.done_with_peer(DownGrade::Timeout);
             } else {
                 return Ok(());
@@ -653,6 +654,8 @@ impl Sync {
                             version: PeerVer::V2,
                             peer_id: peer_info.peer_id,
                             last_used: std::time::Instant::now(),
+                            batch_size: self.max_batch_size, // unused in Phase 2
+                            reason: DownGrade::None,
                             score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
                         });
                         ExternalMessage::MultiBlockRequest(request_hashes)
@@ -662,6 +665,8 @@ impl Sync {
                             version: PeerVer::V1,
                             peer_id: peer_info.peer_id,
                             last_used: std::time::Instant::now(),
+                            batch_size: self.max_batch_size, // unused in Phase 2
+                            reason: DownGrade::None,
                             score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
                         });
                         // do not add VIEW_DRIFT - the stored marker is accurate!
@@ -680,45 +685,6 @@ impl Sync {
         Ok(())
     }
 
-    /// Phase 1: Dynamic Batch Sizing
-    ///
-    /// Due to a hard-coded 10MB response limit in libp2p, we may be limited in how many blocks we can request
-    /// for in a single request, between 1-100 blocks.
-    /// TODO: Make this a pro-active setting instead.
-    fn dynamic_batch_sizing(&mut self, from: PeerId, reason: DownGrade) -> Result<()> {
-        let Some(peer) = self.in_flight.as_ref() else {
-            todo!("invalid peer");
-        };
-
-        match (&self.state, &peer.version, reason) {
-            // V1 response may be too large. Reduce request range.
-            (SyncState::Phase1(_), PeerVer::V1, DownGrade::Timeout) => {
-                self.max_batch_size = self
-                    .max_batch_size
-                    .saturating_sub(self.max_batch_size / 2)
-                    .max(1);
-            }
-            (SyncState::Phase1(_), PeerVer::V1, DownGrade::Empty) => {
-                self.max_batch_size = self
-                    .max_batch_size
-                    .saturating_sub(self.max_batch_size / 3)
-                    .max(1);
-            }
-            // V1 responses are going well, increase the request range linearly
-            (SyncState::Phase1(_), PeerVer::V1, DownGrade::None) if from == peer.peer_id => {
-                self.max_batch_size = self
-                    .max_batch_size
-                    .saturating_add(self.max_batch_size_const / 10)
-                    // For V1, ~100 empty blocks saturates the response payload
-                    .min(100);
-            }
-            // V2 response may be too large, which can induce a timeout. Split into 10 block segments
-            _ => {}
-        }
-
-        Ok(())
-    }
-
     /// Phase 1 / 2: Handle a V1 block response
     ///
     /// If the response if from a V2 peer, it will upgrade that peer to V2.
@@ -828,11 +794,9 @@ impl Sync {
         if response.is_empty() {
             // Empty response, downgrade peer and retry with a new peer.
             tracing::warn!("sync::MetadataResponse : empty blocks {from}",);
-            self.dynamic_batch_sizing(from, DownGrade::Empty)?;
             self.done_with_peer(DownGrade::Empty);
             return Ok(());
         } else {
-            self.dynamic_batch_sizing(from, DownGrade::None)?;
             self.done_with_peer(DownGrade::None);
         }
 
@@ -956,7 +920,6 @@ impl Sync {
                     "sync::RequestMissingMetadata : in-flight request {} timed out, requesting from new peer",
                     peer.peer_id
                 );
-                self.dynamic_batch_sizing(peer.peer_id, DownGrade::Timeout)?;
                 self.done_with_peer(DownGrade::Timeout);
             } else {
                 return Ok(());
@@ -973,7 +936,7 @@ impl Sync {
         if let Some(peer) = self.get_next_peer() {
             tracing::info!(
                 "sync::RequestMissingMetadata : requesting {} metadata of segment #{} from {}",
-                self.max_batch_size,
+                peer.batch_size,
                 self.count_segments()? + 1,
                 peer.peer_id
             );
@@ -984,7 +947,7 @@ impl Sync {
                     ExternalMessage::MetaDataRequest(BlockRequestV2 {
                         request_at: SystemTime::now(),
                         from_hash: parent_hash,
-                        batch_size: self.max_batch_size,
+                        batch_size: peer.batch_size,
                     })
                 }
                 SyncState::Phase1(ChainMetaData { view_number, .. })
@@ -993,10 +956,10 @@ impl Sync {
                     // For V1 BlockRequest, we request a little more than we need, due to drift
                     // Since the view number is an 'internal' clock, it is possible for the same block number
                     // to have different view numbers.
-                    let drift = self.max_batch_size as u64 / 10;
+                    let drift = peer.batch_size as u64 / 10;
                     ExternalMessage::BlockRequest(BlockRequest {
                         to_view: view_number.saturating_add(drift),
-                        from_view: view_number.saturating_sub(self.max_batch_size as u64),
+                        from_view: view_number.saturating_sub(peer.batch_size as u64),
                     })
                 }
                 SyncState::Phase0 if meta.is_some() && matches!(peer.version, PeerVer::V2) => {
@@ -1006,17 +969,17 @@ impl Sync {
                     ExternalMessage::MetaDataRequest(BlockRequestV2 {
                         request_at: SystemTime::now(),
                         from_hash: parent_hash,
-                        batch_size: self.max_batch_size,
+                        batch_size: peer.batch_size,
                     })
                 }
                 SyncState::Phase0 if meta.is_some() && matches!(peer.version, PeerVer::V1) => {
                     let meta = meta.unwrap();
                     let view_number = meta.view_number;
                     self.state = SyncState::Phase1(meta);
-                    let drift = self.max_batch_size as u64 / 10;
+                    let drift = peer.batch_size as u64 / 10;
                     ExternalMessage::BlockRequest(BlockRequest {
                         to_view: view_number.saturating_add(drift),
-                        from_view: view_number.saturating_sub(self.max_batch_size as u64),
+                        from_view: view_number.saturating_sub(peer.batch_size as u64),
                     })
                 }
                 _ => anyhow::bail!("sync::MissingMetadata : invalid state"),
@@ -1103,6 +1066,7 @@ impl Sync {
     fn done_with_peer(&mut self, downgrade: DownGrade) {
         if let Some(mut peer) = self.in_flight.take() {
             tracing::trace!("sync::DoneWithPeer {} {:?}", peer.peer_id, downgrade);
+            peer.reason = downgrade.clone();
             peer.score = peer.score.saturating_add(downgrade as u32);
             // Ensure that the next peer is equal or better
             peer.score = peer.score.max(self.peers.peek().unwrap().score);
@@ -1129,6 +1093,8 @@ impl Sync {
             score: self.peers.iter().map(|p| p.score).min().unwrap_or_default(),
             peer_id: peer,
             last_used: Instant::now(),
+            reason: DownGrade::None,
+            batch_size: self.max_batch_size,
         };
         tracing::trace!("sync::AddPeer {peer}");
         // ensure that it is unique - avoids single source of truth
@@ -1147,12 +1113,43 @@ impl Sync {
         if self.peers.len() >= Self::MIN_PEERS {
             let mut peer = self.peers.pop()?;
             peer.last_used = std::time::Instant::now(); // used to determine stale requests.
+            peer.batch_size = self.dynamic_batch_sizing(&   peer);
             tracing::trace!("sync::GetNextPeer {} ({})", peer.peer_id, peer.score);
             return Some(peer);
         }
         None
     }
 
+    /// Phase 1: Dynamic Batch Sizing
+    ///
+    /// Due to a hard-coded 10MB response limit in libp2p, we may be limited in how many blocks we can request
+    /// for in a single request, between 1-100 blocks.
+    fn dynamic_batch_sizing(&self, peer: &PeerInfo) -> usize {
+        match (&self.state, &peer.version, &peer.reason) {
+            // V1 response may be too large. Reduce request range.
+            (SyncState::Phase1(_), PeerVer::V1, DownGrade::Timeout) => {
+                 peer.batch_size
+                    .saturating_sub(peer.batch_size / 2)
+                    .max(1)
+            }
+            // V1 response may be too large. Reduce request range.
+            (SyncState::Phase1(_), PeerVer::V1, DownGrade::Empty) => {
+                 peer.batch_size
+                    .saturating_sub(peer.batch_size / 3)
+                    .max(1)
+            }
+            // V1 responses are going well, increase the request range linearly
+            (SyncState::Phase1(_), PeerVer::V1, DownGrade::None) => {
+                 peer.batch_size
+                    .saturating_add(self.max_batch_size / 10)
+                    // For V1, ~100 empty blocks saturates the response payload
+                    .min(100)
+            }
+            // V2 response may be too large, which can induce a timeout. Split into 10 block segments
+            _ => { self.max_batch_size}
+        }
+    }
+
     /// Returns (am_syncing, current_highest_block)
     pub fn am_syncing(&self) -> Result<bool> {
         Ok(self.in_pipeline != 0
@@ -1193,6 +1190,8 @@ struct PeerInfo {
     peer_id: PeerId,
     last_used: Instant,
     version: PeerVer,
+    batch_size: usize,
+    reason: DownGrade,
 }
 
 impl Ord for PeerInfo {
@@ -1212,13 +1211,25 @@ impl PartialOrd for PeerInfo {
 
 /// For downgrading a peer from being selected in get_next_peer().
 /// Ordered by degree of offence i.e. None is good, Timeout is worst
-#[derive(Debug)]
+#[derive(Debug, Clone, Eq, PartialEq)]
 enum DownGrade {
     None,
     Empty,
     Timeout,
 }
 
+impl Ord for DownGrade {
+    fn cmp(&self, other: &Self) -> Ordering {
+        (self.clone() as u32).cmp(&(other.clone() as u32))
+    }
+}
+
+impl PartialOrd for DownGrade {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
 /// Sync state
 #[derive(Debug)]
 enum SyncState {

From f12793e89ac077f0131b9a426d3a0c135cea983d Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 17 Jan 2025 14:36:26 +0800
Subject: [PATCH 072/119] fix: wire up peers in test Network.

---
 zilliqa/src/sync.rs           | 35 +++++++++++++++++++++--------------
 zilliqa/tests/it/consensus.rs |  2 +-
 zilliqa/tests/it/main.rs      | 19 +++++++++++++++++++
 3 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 9c588dff9..6ebd89114 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -234,7 +234,7 @@ impl Sync {
                     version: row.get(5)?,
                     reason: DownGrade::None,
                     batch_size: self.max_batch_size,
-                    peer_id: PeerId::from_bytes(row.get::<_,Vec<u8>>(4)?.as_slice()).unwrap(),                    
+                    peer_id: PeerId::from_bytes(row.get::<_,Vec<u8>>(4)?.as_slice()).unwrap(),
                 },
             )))
                 .optional()?;
@@ -680,7 +680,10 @@ impl Sync {
                     .send_external_message(peer_info.peer_id, message)?;
             }
         } else {
-            tracing::warn!("sync::RequestMissingBlocks : insufficient peers to handle request");
+            tracing::warn!(
+                "sync::RequestMissingBlocks : {} insufficient peers to handle request",
+                self.peers.len()
+            );
         }
         Ok(())
     }
@@ -988,7 +991,10 @@ impl Sync {
                 .send_external_message(peer.peer_id, message)?;
             self.in_flight = Some(peer);
         } else {
-            tracing::warn!("sync::RequestMissingMetadata : insufficient peers to handle request");
+            tracing::warn!(
+                "sync::RequestMissingBlocks : {} insufficient peers to handle request",
+                self.peers.len()
+            );
         }
         Ok(())
     }
@@ -1079,8 +1085,11 @@ impl Sync {
 
     /// Add bulk peers
     pub fn add_peers(&mut self, peers: Vec<PeerId>) {
+        tracing::debug!("sync::AddPeers {:?}", peers);
         for peer in peers {
-            self.add_peer(peer);
+            if peer != self.peer_id {
+                self.add_peer(peer);
+            }
         }
     }
 
@@ -1096,10 +1105,11 @@ impl Sync {
             reason: DownGrade::None,
             batch_size: self.max_batch_size,
         };
-        tracing::trace!("sync::AddPeer {peer}");
         // ensure that it is unique - avoids single source of truth
         self.peers.retain(|p: &PeerInfo| p.peer_id != peer);
         self.peers.push(new_peer);
+
+        tracing::trace!("sync::AddPeer {peer}/{}", self.peers.len());
     }
 
     /// Remove a peer from the list of peers.
@@ -1113,10 +1123,11 @@ impl Sync {
         if self.peers.len() >= Self::MIN_PEERS {
             let mut peer = self.peers.pop()?;
             peer.last_used = std::time::Instant::now(); // used to determine stale requests.
-            peer.batch_size = self.dynamic_batch_sizing(&   peer);
+            peer.batch_size = self.dynamic_batch_sizing(&peer);
             tracing::trace!("sync::GetNextPeer {} ({})", peer.peer_id, peer.score);
             return Some(peer);
         }
+        tracing::warn!("sync::NextPeer : {} insufficient peers", self.peers.len());
         None
     }
 
@@ -1128,25 +1139,21 @@ impl Sync {
         match (&self.state, &peer.version, &peer.reason) {
             // V1 response may be too large. Reduce request range.
             (SyncState::Phase1(_), PeerVer::V1, DownGrade::Timeout) => {
-                 peer.batch_size
-                    .saturating_sub(peer.batch_size / 2)
-                    .max(1)
+                peer.batch_size.saturating_sub(peer.batch_size / 2).max(1)
             }
             // V1 response may be too large. Reduce request range.
             (SyncState::Phase1(_), PeerVer::V1, DownGrade::Empty) => {
-                 peer.batch_size
-                    .saturating_sub(peer.batch_size / 3)
-                    .max(1)
+                peer.batch_size.saturating_sub(peer.batch_size / 3).max(1)
             }
             // V1 responses are going well, increase the request range linearly
             (SyncState::Phase1(_), PeerVer::V1, DownGrade::None) => {
-                 peer.batch_size
+                peer.batch_size
                     .saturating_add(self.max_batch_size / 10)
                     // For V1, ~100 empty blocks saturates the response payload
                     .min(100)
             }
             // V2 response may be too large, which can induce a timeout. Split into 10 block segments
-            _ => { self.max_batch_size}
+            _ => self.max_batch_size,
         }
     }
 
diff --git a/zilliqa/tests/it/consensus.rs b/zilliqa/tests/it/consensus.rs
index 670ee3613..607c00fee 100644
--- a/zilliqa/tests/it/consensus.rs
+++ b/zilliqa/tests/it/consensus.rs
@@ -116,7 +116,7 @@ async fn block_production(mut network: Network) {
                     .map_or(0, |b| b.number())
                     >= 10
             },
-            100,
+            1000,
         )
         .await
         .unwrap();
diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs
index 9be342e59..481b73c8b 100644
--- a/zilliqa/tests/it/main.rs
+++ b/zilliqa/tests/it/main.rs
@@ -401,6 +401,8 @@ impl Network {
         let receive_resend_message = UnboundedReceiverStream::new(receive_resend_message).boxed();
         receivers.push(receive_resend_message);
 
+        let peers = nodes.iter().map(|n| n.peer_id).collect_vec();
+
         for node in &nodes {
             trace!(
                 "Node {}: {} (dir: {})",
@@ -408,6 +410,12 @@ impl Network {
                 node.peer_id,
                 node.dir.as_ref().unwrap().path().to_string_lossy(),
             );
+            node.inner
+                .lock()
+                .unwrap()
+                .consensus
+                .sync
+                .add_peers(peers.clone());
         }
 
         Network {
@@ -505,6 +513,9 @@ impl Network {
         let (node, receiver, local_receiver, request_responses) =
             node(config, secret_key, onchain_key, self.nodes.len(), None).unwrap();
 
+        let peers = self.nodes.iter().map(|n| n.peer_id).collect_vec();
+        node.inner.lock().unwrap().consensus.sync.add_peers(peers);
+
         trace!("Node {}: {}", node.index, node.peer_id);
 
         let index = node.index;
@@ -567,6 +578,8 @@ impl Network {
             .chain(request_response_receivers)
             .collect();
 
+        let peers = nodes.iter().map(|n| n.peer_id).collect_vec();
+
         for node in &nodes {
             trace!(
                 "Node {}: {} (dir: {})",
@@ -574,6 +587,12 @@ impl Network {
                 node.peer_id,
                 node.dir.as_ref().unwrap().path().to_string_lossy(),
             );
+            node.inner
+                .lock()
+                .unwrap()
+                .consensus
+                .sync
+                .add_peers(peers.clone());
         }
 
         let (resend_message, receive_resend_message) = mpsc::unbounded_channel::<StreamMessage>();

From b58d7f76bc78a52167887e3e517f85579bdd57c3 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 17 Jan 2025 15:57:17 +0800
Subject: [PATCH 073/119] fix: handle when V2 BlockResponse is late.

---
 zilliqa/src/sync.rs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 6ebd89114..9caea8cdf 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -700,8 +700,12 @@ impl Sync {
             && response.from_view == u64::MAX
         {
             tracing::info!("sync::HandleBlockResponse : upgrading {from}",);
-            self.in_flight.as_mut().unwrap().version = PeerVer::V2;
-            self.done_with_peer(DownGrade::None);
+            if let Some(peer) = self.in_flight.as_mut() {
+                if peer.peer_id == from {
+                    peer.version = PeerVer::V2;
+                    self.done_with_peer(DownGrade::None);
+                }
+            }
             return Ok(());
         }
 

From cf2b9c7cf2587308dc34ea3f89710acd22ca17e9 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 17 Jan 2025 17:54:35 +0800
Subject: [PATCH 074/119] feat: sync batch_size should depend on the current
 request_range, not peer specific.

---
 zilliqa/src/sync.rs | 59 +++++++++++++++++++--------------------------
 1 file changed, 25 insertions(+), 34 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 9caea8cdf..7cb9e8df0 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -68,6 +68,7 @@ pub struct Sync {
     peers: BinaryHeap<PeerInfo>,
     // peer handling an in-flight request
     in_flight: Option<PeerInfo>,
+    in_flight_reason: DownGrade,
     // in-flight request timeout, before retry
     request_timeout: Duration,
     // how many blocks to request at once
@@ -114,8 +115,6 @@ impl Sync {
                 score: 0,
                 peer_id,
                 last_used: Instant::now(),
-                reason: DownGrade::None,
-                batch_size: max_batch_size,
             })
             .collect();
 
@@ -161,6 +160,7 @@ impl Sync {
             max_batch_size,
             max_blocks_in_flight,
             in_flight: None,
+            in_flight_reason: DownGrade::None,
             in_pipeline: usize::MIN,
             state,
             recent_proposals: VecDeque::with_capacity(max_batch_size),
@@ -232,8 +232,6 @@ impl Sync {
                     last_used: Instant::now(),
                     score:u32::MAX,
                     version: row.get(5)?,
-                    reason: DownGrade::None,
-                    batch_size: self.max_batch_size,
                     peer_id: PeerId::from_bytes(row.get::<_,Vec<u8>>(4)?.as_slice()).unwrap(),
                 },
             )))
@@ -654,8 +652,6 @@ impl Sync {
                             version: PeerVer::V2,
                             peer_id: peer_info.peer_id,
                             last_used: std::time::Instant::now(),
-                            batch_size: self.max_batch_size, // unused in Phase 2
-                            reason: DownGrade::None,
                             score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
                         });
                         ExternalMessage::MultiBlockRequest(request_hashes)
@@ -665,8 +661,6 @@ impl Sync {
                             version: PeerVer::V1,
                             peer_id: peer_info.peer_id,
                             last_used: std::time::Instant::now(),
-                            batch_size: self.max_batch_size, // unused in Phase 2
-                            reason: DownGrade::None,
                             score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
                         });
                         // do not add VIEW_DRIFT - the stored marker is accurate!
@@ -943,7 +937,7 @@ impl Sync {
         if let Some(peer) = self.get_next_peer() {
             tracing::info!(
                 "sync::RequestMissingMetadata : requesting {} metadata of segment #{} from {}",
-                peer.batch_size,
+                self.max_batch_size,
                 self.count_segments()? + 1,
                 peer.peer_id
             );
@@ -954,7 +948,7 @@ impl Sync {
                     ExternalMessage::MetaDataRequest(BlockRequestV2 {
                         request_at: SystemTime::now(),
                         from_hash: parent_hash,
-                        batch_size: peer.batch_size,
+                        batch_size: self.max_batch_size,
                     })
                 }
                 SyncState::Phase1(ChainMetaData { view_number, .. })
@@ -963,10 +957,10 @@ impl Sync {
                     // For V1 BlockRequest, we request a little more than we need, due to drift
                     // Since the view number is an 'internal' clock, it is possible for the same block number
                     // to have different view numbers.
-                    let drift = peer.batch_size as u64 / 10;
+                    let drift = self.max_batch_size as u64 / 10;
                     ExternalMessage::BlockRequest(BlockRequest {
                         to_view: view_number.saturating_add(drift),
-                        from_view: view_number.saturating_sub(peer.batch_size as u64),
+                        from_view: view_number.saturating_sub(self.max_batch_size as u64),
                     })
                 }
                 SyncState::Phase0 if meta.is_some() && matches!(peer.version, PeerVer::V2) => {
@@ -976,17 +970,17 @@ impl Sync {
                     ExternalMessage::MetaDataRequest(BlockRequestV2 {
                         request_at: SystemTime::now(),
                         from_hash: parent_hash,
-                        batch_size: peer.batch_size,
+                        batch_size: self.max_batch_size,
                     })
                 }
                 SyncState::Phase0 if meta.is_some() && matches!(peer.version, PeerVer::V1) => {
                     let meta = meta.unwrap();
                     let view_number = meta.view_number;
                     self.state = SyncState::Phase1(meta);
-                    let drift = peer.batch_size as u64 / 10;
+                    let drift = self.max_batch_size as u64 / 10;
                     ExternalMessage::BlockRequest(BlockRequest {
                         to_view: view_number.saturating_add(drift),
-                        from_view: view_number.saturating_sub(peer.batch_size as u64),
+                        from_view: view_number.saturating_sub(self.max_batch_size as u64),
                     })
                 }
                 _ => anyhow::bail!("sync::MissingMetadata : invalid state"),
@@ -1076,7 +1070,7 @@ impl Sync {
     fn done_with_peer(&mut self, downgrade: DownGrade) {
         if let Some(mut peer) = self.in_flight.take() {
             tracing::trace!("sync::DoneWithPeer {} {:?}", peer.peer_id, downgrade);
-            peer.reason = downgrade.clone();
+            self.in_flight_reason = downgrade.clone();
             peer.score = peer.score.saturating_add(downgrade as u32);
             // Ensure that the next peer is equal or better
             peer.score = peer.score.max(self.peers.peek().unwrap().score);
@@ -1106,8 +1100,6 @@ impl Sync {
             score: self.peers.iter().map(|p| p.score).min().unwrap_or_default(),
             peer_id: peer,
             last_used: Instant::now(),
-            reason: DownGrade::None,
-            batch_size: self.max_batch_size,
         };
         // ensure that it is unique - avoids single source of truth
         self.peers.retain(|p: &PeerInfo| p.peer_id != peer);
@@ -1127,7 +1119,7 @@ impl Sync {
         if self.peers.len() >= Self::MIN_PEERS {
             let mut peer = self.peers.pop()?;
             peer.last_used = std::time::Instant::now(); // used to determine stale requests.
-            peer.batch_size = self.dynamic_batch_sizing(&peer);
+            self.max_batch_size = self.dynamic_batch_sizing(&peer);
             tracing::trace!("sync::GetNextPeer {} ({})", peer.peer_id, peer.score);
             return Some(peer);
         }
@@ -1140,22 +1132,23 @@ impl Sync {
     /// Due to a hard-coded 10MB response limit in libp2p, we may be limited in how many blocks we can request
     /// for in a single request, between 1-100 blocks.
     fn dynamic_batch_sizing(&self, peer: &PeerInfo) -> usize {
-        match (&self.state, &peer.version, &peer.reason) {
+        match (&self.state, &peer.version, &self.in_flight_reason) {
             // V1 response may be too large. Reduce request range.
-            (SyncState::Phase1(_), PeerVer::V1, DownGrade::Timeout) => {
-                peer.batch_size.saturating_sub(peer.batch_size / 2).max(1)
-            }
+            (SyncState::Phase1(_), PeerVer::V1, DownGrade::Timeout) => self
+                .max_batch_size
+                .saturating_sub(self.max_batch_size / 2)
+                .max(1),
             // V1 response may be too large. Reduce request range.
-            (SyncState::Phase1(_), PeerVer::V1, DownGrade::Empty) => {
-                peer.batch_size.saturating_sub(peer.batch_size / 3).max(1)
-            }
+            (SyncState::Phase1(_), PeerVer::V1, DownGrade::Empty) => self
+                .max_batch_size
+                .saturating_sub(self.max_batch_size / 3)
+                .max(1),
             // V1 responses are going well, increase the request range linearly
-            (SyncState::Phase1(_), PeerVer::V1, DownGrade::None) => {
-                peer.batch_size
-                    .saturating_add(self.max_batch_size / 10)
-                    // For V1, ~100 empty blocks saturates the response payload
-                    .min(100)
-            }
+            (SyncState::Phase1(_), PeerVer::V1, DownGrade::None) => self
+                .max_batch_size
+                .saturating_add(self.max_batch_size)
+                // For V1, ~100 empty blocks saturates the response payload
+                .min(100),
             // V2 response may be too large, which can induce a timeout. Split into 10 block segments
             _ => self.max_batch_size,
         }
@@ -1201,8 +1194,6 @@ struct PeerInfo {
     peer_id: PeerId,
     last_used: Instant,
     version: PeerVer,
-    batch_size: usize,
-    reason: DownGrade,
 }
 
 impl Ord for PeerInfo {

From 8b40988cb5c35a0dac4720b3f4ba0a1c5d533fce Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 17 Jan 2025 22:42:37 +0800
Subject: [PATCH 075/119] feat: simplified the request_missing_metadata() match
 selector.

---
 zilliqa/src/sync.rs | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 7cb9e8df0..907049429 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -941,19 +941,15 @@ impl Sync {
                 self.count_segments()? + 1,
                 peer.peer_id
             );
-            let message = match self.state {
-                SyncState::Phase1(ChainMetaData { parent_hash, .. })
-                    if matches!(peer.version, PeerVer::V2) =>
-                {
+            let message = match (self.state.clone(), &peer.version) {
+                (SyncState::Phase1(ChainMetaData { parent_hash, .. }), PeerVer::V2) => {
                     ExternalMessage::MetaDataRequest(BlockRequestV2 {
                         request_at: SystemTime::now(),
                         from_hash: parent_hash,
                         batch_size: self.max_batch_size,
                     })
                 }
-                SyncState::Phase1(ChainMetaData { view_number, .. })
-                    if matches!(peer.version, PeerVer::V1) =>
-                {
+                (SyncState::Phase1(ChainMetaData { view_number, .. }), PeerVer::V1) => {
                     // For V1 BlockRequest, we request a little more than we need, due to drift
                     // Since the view number is an 'internal' clock, it is possible for the same block number
                     // to have different view numbers.
@@ -963,7 +959,7 @@ impl Sync {
                         from_view: view_number.saturating_sub(self.max_batch_size as u64),
                     })
                 }
-                SyncState::Phase0 if meta.is_some() && matches!(peer.version, PeerVer::V2) => {
+                (SyncState::Phase0, PeerVer::V2) if meta.is_some() => {
                     let meta = meta.unwrap();
                     let parent_hash = meta.parent_hash;
                     self.state = SyncState::Phase1(meta);
@@ -973,7 +969,7 @@ impl Sync {
                         batch_size: self.max_batch_size,
                     })
                 }
-                SyncState::Phase0 if meta.is_some() && matches!(peer.version, PeerVer::V1) => {
+                (SyncState::Phase0, PeerVer::V1) if meta.is_some() => {
                     let meta = meta.unwrap();
                     let view_number = meta.view_number;
                     self.state = SyncState::Phase1(meta);
@@ -1133,17 +1129,12 @@ impl Sync {
     /// for in a single request, between 1-100 blocks.
     fn dynamic_batch_sizing(&self, peer: &PeerInfo) -> usize {
         match (&self.state, &peer.version, &self.in_flight_reason) {
-            // V1 response may be too large. Reduce request range.
-            (SyncState::Phase1(_), PeerVer::V1, DownGrade::Timeout) => self
-                .max_batch_size
-                .saturating_sub(self.max_batch_size / 2)
-                .max(1),
-            // V1 response may be too large. Reduce request range.
+            // V1 response may be too large, reduce request range.
             (SyncState::Phase1(_), PeerVer::V1, DownGrade::Empty) => self
                 .max_batch_size
                 .saturating_sub(self.max_batch_size / 3)
                 .max(1),
-            // V1 responses are going well, increase the request range linearly
+            // V1 response going well, increase the request range
             (SyncState::Phase1(_), PeerVer::V1, DownGrade::None) => self
                 .max_batch_size
                 .saturating_add(self.max_batch_size)
@@ -1233,7 +1224,7 @@ impl PartialOrd for DownGrade {
 }
 
 /// Sync state
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 enum SyncState {
     Phase0,
     Phase1(ChainMetaData),

From 2146f8409158e6a77c80862f7df71d9135fbee61 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 20 Jan 2025 11:17:16 +0800
Subject: [PATCH 076/119] fix: improve test sync, added
 Network::run_until_synced();

---
 zilliqa/tests/it/main.rs    | 20 ++++++++++++++++++++
 zilliqa/tests/it/staking.rs | 20 +++++++++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs
index 481b73c8b..a0fa93758 100644
--- a/zilliqa/tests/it/main.rs
+++ b/zilliqa/tests/it/main.rs
@@ -836,6 +836,10 @@ impl Network {
                     true
                 }
             }
+            AnyMessage::External(ExternalMessage::InjectedProposal(_)) => {
+                self.handle_message(m.clone());
+                false
+            }
             _ => true,
         });
 
@@ -1049,6 +1053,22 @@ impl Network {
         }
     }
 
+    async fn run_until_synced(&mut self, index: usize) {
+        let mut check = self.rng.lock().unwrap().gen_range(0..self.nodes.len());
+        while check == index {
+            check = self.rng.lock().unwrap().gen_range(0..self.nodes.len());
+        }
+        self.run_until(
+            |net| {
+                net.get_node(index).get_finalized_height().unwrap()
+                    >= net.get_node(check).get_finalized_height().unwrap()
+            },
+            1000,
+        )
+        .await
+        .unwrap();
+    }
+
     async fn run_until(
         &mut self,
         mut condition: impl FnMut(&mut Network) -> bool,
diff --git a/zilliqa/tests/it/staking.rs b/zilliqa/tests/it/staking.rs
index ed5c83473..9c62b02f6 100644
--- a/zilliqa/tests/it/staking.rs
+++ b/zilliqa/tests/it/staking.rs
@@ -425,12 +425,28 @@ async fn rewards_are_sent_to_reward_address_of_proposer(mut network: Network) {
     check_miner_got_reward(&wallet, 1).await;
 }
 
+async fn wait_for_sync(network: &mut Network, index: usize) {
+    info!("syncing node #{}", index);
+    // sync up new node
+    network
+        .run_until(
+            |net| {
+                net.get_node(index).get_finalized_height().unwrap()
+                    >= net.get_node(0).get_finalized_height().unwrap()
+            },
+            1000,
+        )
+        .await
+        .unwrap();
+    info!("synced node #{}", index);
+}
+
 #[zilliqa_macros::test(blocks_per_epoch = 2, deposit_v3_upgrade_block_height = 12)]
 async fn validators_can_join_and_become_proposer(mut network: Network) {
     let wallet = network.genesis_wallet().await;
 
     // randomise the current epoch state and current leader
-    let blocks_to_prerun = network.rng.lock().unwrap().gen_range(0..8);
+    let blocks_to_prerun = network.rng.lock().unwrap().gen_range(1..8);
     network
         .run_until_block(&wallet, blocks_to_prerun.into(), 100)
         .await;
@@ -447,6 +463,7 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
     let staker_wallet = network.wallet_of_node(index).await;
     let pop_sinature = new_validator_key.pop_prove();
 
+    wait_for_sync(&mut network, index).await;
     let deposit_hash = deposit_stake(
         &mut network,
         &wallet,
@@ -535,6 +552,7 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
         .run_until_block(&staker_wallet, deposit_v3_deploy_block.into(), 200)
         .await;
 
+    wait_for_sync(&mut network, index).await;
     let deposit_hash = deposit_v3_stake(
         &mut network,
         &wallet,

From e0b2914e0e9133039fbd8fef1a9042f88a09256e Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 20 Jan 2025 11:20:28 +0800
Subject: [PATCH 077/119] fix: fixed
 unreliable::blocks_are_produced_while_a_node_restarts() test.

---
 zilliqa/tests/it/main.rs       | 6 ++----
 zilliqa/tests/it/unreliable.rs | 1 +
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs
index a0fa93758..3462b64e7 100644
--- a/zilliqa/tests/it/main.rs
+++ b/zilliqa/tests/it/main.rs
@@ -1054,10 +1054,8 @@ impl Network {
     }
 
     async fn run_until_synced(&mut self, index: usize) {
-        let mut check = self.rng.lock().unwrap().gen_range(0..self.nodes.len());
-        while check == index {
-            check = self.rng.lock().unwrap().gen_range(0..self.nodes.len());
-        }
+        assert!(self.nodes.len() > 1);
+        let check = if index != 0 { 0 } else { 1 };
         self.run_until(
             |net| {
                 net.get_node(index).get_finalized_height().unwrap()
diff --git a/zilliqa/tests/it/unreliable.rs b/zilliqa/tests/it/unreliable.rs
index 7518a3131..b35787e93 100644
--- a/zilliqa/tests/it/unreliable.rs
+++ b/zilliqa/tests/it/unreliable.rs
@@ -26,6 +26,7 @@ async fn blocks_are_produced_while_a_node_restarts(mut network: Network) {
 
     // Reconnect the 'restarted' node.
     network.connect_node(restarted_node);
+    network.run_until_synced(restarted_node).await;
 
     // TODO(#721): We should assert here that a new view occurred if-and-only-if the 'restarted' node was the proposer
     // of blocks 3 or 4. This would tell us that we aren't producing new views unnecessarily.

From 2e1a11178a6828483af0e78004e16bcc245fd04f Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 20 Jan 2025 12:27:14 +0800
Subject: [PATCH 078/119] fix:
 staking::validators_can_join_and_become_proposer() test.

---
 zilliqa/tests/it/staking.rs | 28 +++++-----------------------
 1 file changed, 5 insertions(+), 23 deletions(-)

diff --git a/zilliqa/tests/it/staking.rs b/zilliqa/tests/it/staking.rs
index 9c62b02f6..ed2954b0c 100644
--- a/zilliqa/tests/it/staking.rs
+++ b/zilliqa/tests/it/staking.rs
@@ -55,7 +55,7 @@ async fn deposit_stake(
         .await
         .unwrap()
         .tx_hash();
-    network.run_until_receipt(staker_wallet, hash, 80).await;
+    network.run_until_receipt(staker_wallet, hash, 180).await;
 
     // Stake the new validator's funds.
     let tx = TransactionRequest::new()
@@ -82,7 +82,7 @@ async fn deposit_stake(
         .await
         .unwrap()
         .tx_hash();
-    let receipt = network.run_until_receipt(staker_wallet, hash, 80).await;
+    let receipt = network.run_until_receipt(staker_wallet, hash, 180).await;
     assert_eq!(receipt.status.unwrap().as_u64(), 1);
     hash
 }
@@ -105,7 +105,7 @@ async fn deposit_v3_stake(
         .await
         .unwrap()
         .tx_hash();
-    network.run_until_receipt(staker_wallet, hash, 80).await;
+    network.run_until_receipt(staker_wallet, hash, 180).await;
 
     // Stake the new validator's funds.
     let tx = TransactionRequest::new()
@@ -133,7 +133,7 @@ async fn deposit_v3_stake(
         .await
         .unwrap()
         .tx_hash();
-    let receipt = network.run_until_receipt(staker_wallet, hash, 80).await;
+    let receipt = network.run_until_receipt(staker_wallet, hash, 180).await;
     assert_eq!(receipt.status.unwrap().as_u64(), 1);
     hash
 }
@@ -425,28 +425,12 @@ async fn rewards_are_sent_to_reward_address_of_proposer(mut network: Network) {
     check_miner_got_reward(&wallet, 1).await;
 }
 
-async fn wait_for_sync(network: &mut Network, index: usize) {
-    info!("syncing node #{}", index);
-    // sync up new node
-    network
-        .run_until(
-            |net| {
-                net.get_node(index).get_finalized_height().unwrap()
-                    >= net.get_node(0).get_finalized_height().unwrap()
-            },
-            1000,
-        )
-        .await
-        .unwrap();
-    info!("synced node #{}", index);
-}
-
 #[zilliqa_macros::test(blocks_per_epoch = 2, deposit_v3_upgrade_block_height = 12)]
 async fn validators_can_join_and_become_proposer(mut network: Network) {
     let wallet = network.genesis_wallet().await;
 
     // randomise the current epoch state and current leader
-    let blocks_to_prerun = network.rng.lock().unwrap().gen_range(1..8);
+    let blocks_to_prerun = network.rng.lock().unwrap().gen_range(0..8);
     network
         .run_until_block(&wallet, blocks_to_prerun.into(), 100)
         .await;
@@ -463,7 +447,6 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
     let staker_wallet = network.wallet_of_node(index).await;
     let pop_sinature = new_validator_key.pop_prove();
 
-    wait_for_sync(&mut network, index).await;
     let deposit_hash = deposit_stake(
         &mut network,
         &wallet,
@@ -552,7 +535,6 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
         .run_until_block(&staker_wallet, deposit_v3_deploy_block.into(), 200)
         .await;
 
-    wait_for_sync(&mut network, index).await;
     let deposit_hash = deposit_v3_stake(
         &mut network,
         &wallet,

From 25102c5f641890ff50ec28989a954c5e090d1519 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 20 Jan 2025 14:34:12 +0800
Subject: [PATCH 079/119] fix: tests.

---
 zilliqa/tests/it/consensus.rs  |  3 ++-
 zilliqa/tests/it/main.rs       | 35 ++++++++++++++++++----------------
 zilliqa/tests/it/staking.rs    |  2 ++
 zilliqa/tests/it/unreliable.rs |  2 +-
 4 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/zilliqa/tests/it/consensus.rs b/zilliqa/tests/it/consensus.rs
index 607c00fee..7d110fb90 100644
--- a/zilliqa/tests/it/consensus.rs
+++ b/zilliqa/tests/it/consensus.rs
@@ -106,6 +106,7 @@ async fn block_production(mut network: Network) {
 
     info!("Adding networked node.");
     let index = network.add_node();
+    network.run_until_synced(index).await;
 
     network
         .run_until(
@@ -116,7 +117,7 @@ async fn block_production(mut network: Network) {
                     .map_or(0, |b| b.number())
                     >= 10
             },
-            1000,
+            2000,
         )
         .await
         .unwrap();
diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs
index 3462b64e7..4b357d524 100644
--- a/zilliqa/tests/it/main.rs
+++ b/zilliqa/tests/it/main.rs
@@ -844,19 +844,20 @@ impl Network {
         });
 
         // Pick a random message
-        let index = self.rng.lock().unwrap().gen_range(0..messages.len());
-        let (source, destination, message) = messages.swap_remove(index);
-        // Requeue the other messages
-        for message in messages {
-            self.resend_message.send(message).unwrap();
-        }
-
-        trace!(
-            "{}",
-            format_message(&self.nodes, source, destination, &message)
-        );
+        if !messages.is_empty() {
+            let index = self.rng.lock().unwrap().gen_range(0..messages.len());
+            let (source, destination, message) = messages.swap_remove(index);
+            // Requeue the other messages
+            for message in messages {
+                self.resend_message.send(message).unwrap();
+            }
+            trace!(
+                "{}",
+                format_message(&self.nodes, source, destination, &message)
+            );
 
-        self.handle_message((source, destination, message))
+            self.handle_message((source, destination, message))
+        }
     }
 
     fn handle_message(&mut self, message: StreamMessage) {
@@ -1054,14 +1055,16 @@ impl Network {
     }
 
     async fn run_until_synced(&mut self, index: usize) {
-        assert!(self.nodes.len() > 1);
-        let check = if index != 0 { 0 } else { 1 };
+        let mut check = self.rng.lock().unwrap().gen_range(0..self.nodes.len());
+        while index == check {
+            check = self.rng.lock().unwrap().gen_range(0..self.nodes.len());
+        }
         self.run_until(
             |net| {
                 net.get_node(index).get_finalized_height().unwrap()
-                    >= net.get_node(check).get_finalized_height().unwrap()
+                    == net.get_node(check).get_finalized_height().unwrap()
             },
-            1000,
+            10000,
         )
         .await
         .unwrap();
diff --git a/zilliqa/tests/it/staking.rs b/zilliqa/tests/it/staking.rs
index ed2954b0c..f6d4455fc 100644
--- a/zilliqa/tests/it/staking.rs
+++ b/zilliqa/tests/it/staking.rs
@@ -447,6 +447,7 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
     let staker_wallet = network.wallet_of_node(index).await;
     let pop_sinature = new_validator_key.pop_prove();
 
+    network.run_until_synced(index).await;
     let deposit_hash = deposit_stake(
         &mut network,
         &wallet,
@@ -535,6 +536,7 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
         .run_until_block(&staker_wallet, deposit_v3_deploy_block.into(), 200)
         .await;
 
+    network.run_until_synced(index).await;
     let deposit_hash = deposit_v3_stake(
         &mut network,
         &wallet,
diff --git a/zilliqa/tests/it/unreliable.rs b/zilliqa/tests/it/unreliable.rs
index b35787e93..8a68acecf 100644
--- a/zilliqa/tests/it/unreliable.rs
+++ b/zilliqa/tests/it/unreliable.rs
@@ -32,5 +32,5 @@ async fn blocks_are_produced_while_a_node_restarts(mut network: Network) {
     // of blocks 3 or 4. This would tell us that we aren't producing new views unnecessarily.
 
     // Ensure more blocks are produced.
-    network.run_until_block(&wallet, 12.into(), 400).await;
+    network.run_until_block(&wallet, 12.into(), 1400).await;
 }

From cab93f3a2eb62181ad52fb91fe4e918505f417f6 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 21 Jan 2025 15:35:32 +0800
Subject: [PATCH 080/119] nit: use Db::contains_block() instead of
 Db::get_block_by_hash().

---
 zilliqa/src/node.rs |  2 +-
 zilliqa/src/sync.rs | 23 +++++++++--------------
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 58744ce98..d04a7a7ec 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -917,8 +917,8 @@ impl Node {
             return Ok(());
         }
         trace!("Handling proposal for view {0}", req.block.header.view);
-        self.consensus.sync.mark_received_proposal(&req)?;
         let proposal = self.consensus.receive_block(from, req.block)?;
+        self.consensus.sync.mark_received_proposal(req.from)?;
         if let Some(proposal) = proposal {
             trace!(
                 " ... broadcasting proposal for view {0}",
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 907049429..5ef335a3e 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -353,7 +353,7 @@ impl Sync {
             // Check if we are out of sync
             SyncState::Phase0 if self.in_pipeline == 0 => {
                 let parent_hash = self.recent_proposals.back().unwrap().header.qc.block_hash;
-                if self.db.get_block_by_hash(&parent_hash)?.is_none() {
+                if !self.db.contains_block(&parent_hash)? {
                     // No parent block, trigger sync
                     tracing::warn!("sync::SyncProposal : syncing from {parent_hash}",);
                     let block_hash = self.recent_proposals.back().unwrap().hash();
@@ -389,7 +389,7 @@ impl Sync {
             // Wait till 99% synced, zip it up!
             SyncState::Phase3 if self.in_pipeline == 0 => {
                 let ancestor_hash = self.recent_proposals.front().unwrap().header.qc.block_hash;
-                if self.db.get_block_by_hash(&ancestor_hash)?.is_some() {
+                if self.db.contains_block(&ancestor_hash)? {
                     tracing::info!(
                         "sync::SyncProposal : finishing {} blocks for segment #{} from {}",
                         self.recent_proposals.len(),
@@ -399,9 +399,10 @@ impl Sync {
                     // inject the proposals
                     let proposals = self.recent_proposals.drain(..).collect_vec();
                     self.inject_proposals(proposals)?;
+                } else {
+                    self.empty_metadata()?;
+                    self.state = SyncState::Phase0;
                 }
-                self.empty_metadata()?;
-                self.state = SyncState::Phase0;
             }
             // Retry to fix sync issues e.g. peers that are now offline
             SyncState::Retry1 if self.in_pipeline == 0 => {
@@ -852,7 +853,7 @@ impl Sync {
         self.insert_metadata(segment)?;
 
         // If the segment hits our history, start Phase 2.
-        if self.db.get_block_by_hash(&last_block_hash)?.is_some() {
+        if self.db.contains_block(&last_block_hash)? {
             self.state = SyncState::Phase2(Hash::ZERO);
         } else if Self::DO_SPECULATIVE {
             self.request_missing_metadata(None)?;
@@ -1042,19 +1043,13 @@ impl Sync {
     /// Mark a received proposal
     ///
     /// Mark a proposal as received, and remove it from the chain.
-    pub fn mark_received_proposal(&mut self, prop: &InjectedProposal) -> Result<()> {
-        if prop.from != self.peer_id {
+    pub fn mark_received_proposal(&mut self, from: PeerId) -> Result<()> {
+        if from != self.peer_id {
             tracing::error!(
                 "sync::MarkReceivedProposal : foreign InjectedProposal from {}",
-                prop.from
+                from
             );
         }
-        // if let Some(p) = self.chain_metadata.remove(&prop.block.hash()) {
-        //     tracing::warn!(
-        //         "sync::MarkReceivedProposal : removing stale metadata {}",
-        //         p.block_hash
-        //     );
-        // }
         self.in_pipeline = self.in_pipeline.saturating_sub(1);
         Ok(())
     }

From 497e80720f2361d0b35e6b59cdc7dcb8a8006bb2 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 21 Jan 2025 15:36:10 +0800
Subject: [PATCH 081/119] fix: tests.

---
 zilliqa/tests/it/main.rs    | 8 +++++---
 zilliqa/tests/it/staking.rs | 3 +--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs
index 4b357d524..8c81b83ce 100644
--- a/zilliqa/tests/it/main.rs
+++ b/zilliqa/tests/it/main.rs
@@ -1061,10 +1061,12 @@ impl Network {
         }
         self.run_until(
             |net| {
-                net.get_node(index).get_finalized_height().unwrap()
-                    == net.get_node(check).get_finalized_height().unwrap()
+                let height_i = net.get_node(index).get_finalized_height().unwrap();
+                let height_c = net.get_node(check).get_finalized_height().unwrap();
+                info!("syncing {}/{}", height_i, height_c);
+                height_c == height_i && height_i > 0
             },
-            10000,
+            1000,
         )
         .await
         .unwrap();
diff --git a/zilliqa/tests/it/staking.rs b/zilliqa/tests/it/staking.rs
index f6d4455fc..b745a27db 100644
--- a/zilliqa/tests/it/staking.rs
+++ b/zilliqa/tests/it/staking.rs
@@ -447,7 +447,6 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
     let staker_wallet = network.wallet_of_node(index).await;
     let pop_sinature = new_validator_key.pop_prove();
 
-    network.run_until_synced(index).await;
     let deposit_hash = deposit_stake(
         &mut network,
         &wallet,
@@ -536,7 +535,6 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
         .run_until_block(&staker_wallet, deposit_v3_deploy_block.into(), 200)
         .await;
 
-    network.run_until_synced(index).await;
     let deposit_hash = deposit_v3_stake(
         &mut network,
         &wallet,
@@ -609,6 +607,7 @@ async fn block_proposers_are_selected_proportionally_to_their_stake(mut network:
     let staker_wallet = network.wallet_of_node(index).await;
     let pop_signature = new_validator_key.pop_prove();
 
+    network.run_until_synced(index).await;
     deposit_stake(
         &mut network,
         &wallet,

From 37fb852bd265abd06d16fac94a299cea5cee739a Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 21 Jan 2025 19:57:23 +0800
Subject: [PATCH 082/119] feat: retry sync against upgraded Peer, immediately.

---
 zilliqa/src/consensus.rs |  2 +-
 zilliqa/src/sync.rs      | 26 ++++++++++----------------
 2 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index f079b538d..393ba5200 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -205,7 +205,7 @@ impl Consensus {
             )?;
         }
 
-        let sync = Sync::new(&config, db.clone(), message_sender.clone(), Vec::new())?;
+        let sync = Sync::new(&config, db.clone(), message_sender.clone())?;
 
         let latest_block = db
             .get_finalized_view()?
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 5ef335a3e..d9746bb7a 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -99,24 +99,10 @@ impl Sync {
     // Minimum of 2 peers to avoid single source of truth.
     const MIN_PEERS: usize = 2;
 
-    pub fn new(
-        config: &NodeConfig,
-        db: Arc<Db>,
-        message_sender: MessageSender,
-        peers: Vec<PeerId>,
-    ) -> Result<Self> {
+    pub fn new(config: &NodeConfig, db: Arc<Db>, message_sender: MessageSender) -> Result<Self> {
         let peer_id = message_sender.our_peer_id;
         let max_batch_size = config.block_request_batch_size.clamp(30, 180); // up to 180 sec of blocks at a time.
         let max_blocks_in_flight = config.max_blocks_in_flight.clamp(max_batch_size, 1800); // up to 30-mins worth of blocks in-pipeline.
-        let peers = peers
-            .into_iter()
-            .map(|peer_id| PeerInfo {
-                version: PeerVer::V1, // default to V1 peer, until otherwise proven.
-                score: 0,
-                peer_id,
-                last_used: Instant::now(),
-            })
-            .collect();
 
         // This DB could be left in-here as it is only used in this module
         // TODO: Make this in-memory by exploiting SQLite TEMP tables i.e. CREATE TEMP TABLE
@@ -154,7 +140,7 @@ impl Sync {
         Ok(Self {
             db,
             message_sender,
-            peers,
+            peers: BinaryHeap::new(),
             peer_id,
             request_timeout: config.consensus.consensus_timeout,
             max_batch_size,
@@ -698,6 +684,14 @@ impl Sync {
             if let Some(peer) = self.in_flight.as_mut() {
                 if peer.peer_id == from {
                     peer.version = PeerVer::V2;
+                    // retry with upgraded peer
+                    peer.last_used = self
+                        .peers
+                        .peek()
+                        .expect("peers.len() > 1")
+                        .last_used
+                        .checked_sub(Duration::from_secs(1))
+                        .expect("time is ordinal");
                     self.done_with_peer(DownGrade::None);
                 }
             }

From dd1b9f89f92e4f094d4010f46645a101b5bd9897 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 21 Jan 2025 21:47:10 +0800
Subject: [PATCH 083/119] fix: checkpoints_test(), randomized add_peers() for
 tests.

---
 zilliqa/tests/it/main.rs        | 9 ++++++---
 zilliqa/tests/it/persistence.rs | 1 +
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs
index 8c81b83ce..59bc90c11 100644
--- a/zilliqa/tests/it/main.rs
+++ b/zilliqa/tests/it/main.rs
@@ -401,7 +401,8 @@ impl Network {
         let receive_resend_message = UnboundedReceiverStream::new(receive_resend_message).boxed();
         receivers.push(receive_resend_message);
 
-        let peers = nodes.iter().map(|n| n.peer_id).collect_vec();
+        let mut peers = nodes.iter().map(|n| n.peer_id).collect_vec();
+        peers.shuffle(rng.lock().unwrap().deref_mut());
 
         for node in &nodes {
             trace!(
@@ -513,7 +514,8 @@ impl Network {
         let (node, receiver, local_receiver, request_responses) =
             node(config, secret_key, onchain_key, self.nodes.len(), None).unwrap();
 
-        let peers = self.nodes.iter().map(|n| n.peer_id).collect_vec();
+        let mut peers = self.nodes.iter().map(|n| n.peer_id).collect_vec();
+        peers.shuffle(self.rng.lock().unwrap().deref_mut());
         node.inner.lock().unwrap().consensus.sync.add_peers(peers);
 
         trace!("Node {}: {}", node.index, node.peer_id);
@@ -578,7 +580,8 @@ impl Network {
             .chain(request_response_receivers)
             .collect();
 
-        let peers = nodes.iter().map(|n| n.peer_id).collect_vec();
+        let mut peers = nodes.iter().map(|n| n.peer_id).collect_vec();
+        peers.shuffle(self.rng.lock().unwrap().deref_mut());
 
         for node in &nodes {
             trace!(
diff --git a/zilliqa/tests/it/persistence.rs b/zilliqa/tests/it/persistence.rs
index b4230308a..5505d522b 100644
--- a/zilliqa/tests/it/persistence.rs
+++ b/zilliqa/tests/it/persistence.rs
@@ -268,6 +268,7 @@ async fn checkpoints_test(mut network: Network) {
     assert_eq!(state["welcome_msg"], "default");
 
     // check the new node catches up and keeps up with block production
+    network.run_until_synced(new_node_idx).await;
     network
         .run_until_block(&new_node_wallet, 20.into(), 200)
         .await;

From df7fc9447289255701bdf02911fd88532a5a0356 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Wed, 22 Jan 2025 09:29:40 +0800
Subject: [PATCH 084/119] fix: handle_forking(), validators_can_join() test.

---
 zilliqa/tests/it/consensus.rs | 1 +
 zilliqa/tests/it/staking.rs   | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/zilliqa/tests/it/consensus.rs b/zilliqa/tests/it/consensus.rs
index 7d110fb90..4d87d39c8 100644
--- a/zilliqa/tests/it/consensus.rs
+++ b/zilliqa/tests/it/consensus.rs
@@ -170,6 +170,7 @@ async fn handle_forking_correctly(mut network: Network) {
     let original_receipt = first.unwrap();
 
     trace!("Running until the network has reverted the block");
+    network.run_until_synced(0).await;
     // Now we should be able to run the network until we get a different tx receipt from the first
     // node, which indicates that it has reverted the block
     network
diff --git a/zilliqa/tests/it/staking.rs b/zilliqa/tests/it/staking.rs
index b745a27db..4817faf00 100644
--- a/zilliqa/tests/it/staking.rs
+++ b/zilliqa/tests/it/staking.rs
@@ -447,6 +447,7 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
     let staker_wallet = network.wallet_of_node(index).await;
     let pop_sinature = new_validator_key.pop_prove();
 
+    network.run_until_synced(index).await;
     let deposit_hash = deposit_stake(
         &mut network,
         &wallet,
@@ -531,6 +532,7 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
     );
 
     // Give new node time to catch up to block including deposit_v3 deployment
+    network.run_until_synced(index).await;
     network
         .run_until_block(&staker_wallet, deposit_v3_deploy_block.into(), 200)
         .await;

From a57ed20db9fc33c2867e9b084bea8fe14b5da587 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Wed, 22 Jan 2025 10:10:21 +0800
Subject: [PATCH 085/119] fix: experiment.

---
 zilliqa/src/db.rs           |  2 +-
 zilliqa/src/sync.rs         | 18 +++++++++++++-----
 zilliqa/tests/it/main.rs    | 14 ++++++++++----
 zilliqa/tests/it/staking.rs | 16 ++++++++--------
 4 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/zilliqa/src/db.rs b/zilliqa/src/db.rs
index 31c104b90..aba4e0968 100644
--- a/zilliqa/src/db.rs
+++ b/zilliqa/src/db.rs
@@ -1191,7 +1191,7 @@ fn decompress_file<P: AsRef<Path> + Debug>(input_file_path: P, output_file_path:
 /// An implementor of [eth_trie::DB] which uses a [Connection] to persist data.
 #[derive(Debug, Clone)]
 pub struct TrieStorage {
-    pub db: Arc<Mutex<Connection>>,
+    db: Arc<Mutex<Connection>>,
     cache: Arc<Mutex<LruCache<Vec<u8>, Vec<u8>>>>,
 }
 
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index d9746bb7a..5ad85aba8 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -385,10 +385,9 @@ impl Sync {
                     // inject the proposals
                     let proposals = self.recent_proposals.drain(..).collect_vec();
                     self.inject_proposals(proposals)?;
-                } else {
-                    self.empty_metadata()?;
-                    self.state = SyncState::Phase0;
                 }
+                self.empty_metadata()?;
+                self.state = SyncState::Phase0;
             }
             // Retry to fix sync issues e.g. peers that are now offline
             SyncState::Retry1 if self.in_pipeline == 0 => {
@@ -693,6 +692,14 @@ impl Sync {
                         .checked_sub(Duration::from_secs(1))
                         .expect("time is ordinal");
                     self.done_with_peer(DownGrade::None);
+
+                    if Self::DO_SPECULATIVE {
+                        match self.state {
+                            SyncState::Phase1(_) => self.request_missing_metadata(None)?,
+                            SyncState::Phase2(_) => self.request_missing_blocks()?,
+                            _ => {}
+                        }
+                    }
                 }
             }
             return Ok(());
@@ -1137,8 +1144,9 @@ impl Sync {
     /// Returns (am_syncing, current_highest_block)
     pub fn am_syncing(&self) -> Result<bool> {
         Ok(self.in_pipeline != 0
-            || self.count_segments()? != 0
-            || !self.recent_proposals.is_empty())
+            || !matches!(self.state, SyncState::Phase0)
+            || !self.recent_proposals.is_empty()
+            || self.count_segments()? != 0)
     }
 
     // Returns (starting_block, current_block,  highest_block) if we're syncing,
diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs
index 59bc90c11..e826aea73 100644
--- a/zilliqa/tests/it/main.rs
+++ b/zilliqa/tests/it/main.rs
@@ -995,7 +995,7 @@ impl Network {
                                             external_message.clone(),
                                             response_channel,
                                         )
-                                        .unwrap();
+                                        .ok(); // TODO: better error handling
                                 }
                             });
                         }
@@ -1062,14 +1062,20 @@ impl Network {
         while index == check {
             check = self.rng.lock().unwrap().gen_range(0..self.nodes.len());
         }
+        let mut debounce = 0;
+        let mut old_height = 0;
         self.run_until(
             |net| {
                 let height_i = net.get_node(index).get_finalized_height().unwrap();
                 let height_c = net.get_node(check).get_finalized_height().unwrap();
-                info!("syncing {}/{}", height_i, height_c);
-                height_c == height_i && height_i > 0
+                info!("syncing {}/{}/{}", height_i, height_c, debounce);
+                if height_c == height_i && height_i > old_height {
+                    debounce += 1;
+                    old_height = height_i;
+                }
+                debounce == 3
             },
-            1000,
+            10000,
         )
         .await
         .unwrap();
diff --git a/zilliqa/tests/it/staking.rs b/zilliqa/tests/it/staking.rs
index 4817faf00..65dd1a7e8 100644
--- a/zilliqa/tests/it/staking.rs
+++ b/zilliqa/tests/it/staking.rs
@@ -55,7 +55,7 @@ async fn deposit_stake(
         .await
         .unwrap()
         .tx_hash();
-    network.run_until_receipt(staker_wallet, hash, 180).await;
+    network.run_until_receipt(staker_wallet, hash, 80).await;
 
     // Stake the new validator's funds.
     let tx = TransactionRequest::new()
@@ -82,7 +82,7 @@ async fn deposit_stake(
         .await
         .unwrap()
         .tx_hash();
-    let receipt = network.run_until_receipt(staker_wallet, hash, 180).await;
+    let receipt = network.run_until_receipt(staker_wallet, hash, 80).await;
     assert_eq!(receipt.status.unwrap().as_u64(), 1);
     hash
 }
@@ -105,7 +105,7 @@ async fn deposit_v3_stake(
         .await
         .unwrap()
         .tx_hash();
-    network.run_until_receipt(staker_wallet, hash, 180).await;
+    network.run_until_receipt(staker_wallet, hash, 80).await;
 
     // Stake the new validator's funds.
     let tx = TransactionRequest::new()
@@ -133,7 +133,7 @@ async fn deposit_v3_stake(
         .await
         .unwrap()
         .tx_hash();
-    let receipt = network.run_until_receipt(staker_wallet, hash, 180).await;
+    let receipt = network.run_until_receipt(staker_wallet, hash, 80).await;
     assert_eq!(receipt.status.unwrap().as_u64(), 1);
     hash
 }
@@ -430,10 +430,10 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
     let wallet = network.genesis_wallet().await;
 
     // randomise the current epoch state and current leader
-    let blocks_to_prerun = network.rng.lock().unwrap().gen_range(0..8);
-    network
-        .run_until_block(&wallet, blocks_to_prerun.into(), 100)
-        .await;
+    // let blocks_to_prerun = network.rng.lock().unwrap().gen_range(0..8);
+    // network
+    //     .run_until_block(&wallet, blocks_to_prerun.into(), 100)
+    //     .await;
 
     // First test joining deposit_v2
     let index = network.add_node();

From d720cd5e30da9f002480870074679eee7b447df2 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 23 Jan 2025 09:59:07 +0800
Subject: [PATCH 086/119] fix: checkpoints_test().

---
 zilliqa/tests/it/persistence.rs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/zilliqa/tests/it/persistence.rs b/zilliqa/tests/it/persistence.rs
index 5505d522b..ccaf27eea 100644
--- a/zilliqa/tests/it/persistence.rs
+++ b/zilliqa/tests/it/persistence.rs
@@ -169,7 +169,7 @@ async fn checkpoints_test(mut network: Network) {
         .await
         .unwrap()
         .tx_hash();
-    network.run_until_receipt(&wallet, update_tx_hash, 50).await;
+    network.run_until_receipt(&wallet, update_tx_hash, 51).await;
     // Scilla
     let (secret_key, address) = zilliqa_account(&mut network).await;
     let code = scilla_test_contract_code();
@@ -178,7 +178,7 @@ async fn checkpoints_test(mut network: Network) {
         deploy_scilla_contract(&mut network, &secret_key, &code, &data).await;
 
     // Run until block 9 so that we can insert a tx in block 10 (note that this transaction may not *always* appear in the desired block, therefore we do not assert its presence later)
-    network.run_until_block(&wallet, 9.into(), 200).await;
+    network.run_until_block(&wallet, 9.into(), 209).await;
 
     let _hash = wallet
         .send_transaction(TransactionRequest::pay(wallet.address(), 10), None)
@@ -187,7 +187,7 @@ async fn checkpoints_test(mut network: Network) {
         .tx_hash();
 
     // wait 10 blocks for checkpoint to happen - then 3 more to finalize that block
-    network.run_until_block(&wallet, 13.into(), 200).await;
+    network.run_until_block(&wallet, 13.into(), 213).await;
 
     let checkpoint_files = network
         .nodes
@@ -268,9 +268,8 @@ async fn checkpoints_test(mut network: Network) {
     assert_eq!(state["welcome_msg"], "default");
 
     // check the new node catches up and keeps up with block production
-    network.run_until_synced(new_node_idx).await;
     network
-        .run_until_block(&new_node_wallet, 20.into(), 200)
+        .run_until_block(&new_node_wallet, 20.into(), 220)
         .await;
 
     // check account nonce of old wallet

From 902e38b0ea78eea6aa3429b7c21d9e745dd1c1a0 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 23 Jan 2025 12:03:04 +0800
Subject: [PATCH 087/119] feat: check for checkpoint block, not just history.

---
 zilliqa/src/consensus.rs        |  2 ++
 zilliqa/src/sync.rs             | 22 +++++++++++++++++++---
 zilliqa/tests/it/main.rs        |  2 +-
 zilliqa/tests/it/persistence.rs |  1 +
 4 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index 393ba5200..dbe036077 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -367,6 +367,8 @@ impl Consensus {
                     .at_root(parent.state_root_hash().into())
                     .get_stakers(block.header)?,
             )?;
+
+            consensus.sync.set_checkpoint(&block);
         }
 
         // If timestamp of when current high_qc was written exists then use it to estimate the minimum number of blocks the network has moved on since shut down
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 5ad85aba8..7a70788ca 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -87,6 +87,8 @@ pub struct Sync {
     inject_at: Option<(std::time::Instant, usize)>,
     // record starting number, for eth_syncing() RPC call.
     started_at_block_number: u64,
+    // checkpoint
+    checkpoint_hash: Option<Hash>,
 }
 
 impl Sync {
@@ -152,9 +154,16 @@ impl Sync {
             recent_proposals: VecDeque::with_capacity(max_batch_size),
             inject_at: None,
             started_at_block_number: 0,
+            checkpoint_hash: None,
         })
     }
 
+    pub fn set_checkpoint(&mut self, checkpoint: &Block) {
+        let hash = checkpoint.hash();
+        tracing::info!("sync::Checkpoint {}", hash);
+        self.checkpoint_hash = Some(hash);
+    }
+
     /// Returns the number of stored segments
     fn count_segments(&self) -> Result<usize> {
         let mut segments = 0;
@@ -284,7 +293,7 @@ impl Sync {
     }
 
     /// Bulk inserts a bunch of metadata.
-    fn insert_metadata(&self, metas: Vec<ChainMetaData>) -> Result<()> {
+    fn insert_metadata(&self, metas: &Vec<ChainMetaData>) -> Result<()> {
         self.db.with_sqlite_tx(|c| {
             for meta in metas {
             c.prepare_cached(
@@ -851,10 +860,17 @@ impl Sync {
         );
 
         // Record the constructed chain metadata
-        self.insert_metadata(segment)?;
+        self.insert_metadata(&segment)?;
+
+        // If the checkpoint is in this segment,
+        let checkpointed = if let Some(checkpoint) = self.checkpoint_hash {
+            segment.iter().any(|b| b.block_hash == checkpoint)
+        } else {
+            false
+        };
 
         // If the segment hits our history, start Phase 2.
-        if self.db.contains_block(&last_block_hash)? {
+        if checkpointed || self.db.contains_block(&last_block_hash)? {
             self.state = SyncState::Phase2(Hash::ZERO);
         } else if Self::DO_SPECULATIVE {
             self.request_missing_metadata(None)?;
diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs
index e826aea73..ac370166c 100644
--- a/zilliqa/tests/it/main.rs
+++ b/zilliqa/tests/it/main.rs
@@ -1075,7 +1075,7 @@ impl Network {
                 }
                 debounce == 3
             },
-            10000,
+            2000,
         )
         .await
         .unwrap();
diff --git a/zilliqa/tests/it/persistence.rs b/zilliqa/tests/it/persistence.rs
index ccaf27eea..7757f1859 100644
--- a/zilliqa/tests/it/persistence.rs
+++ b/zilliqa/tests/it/persistence.rs
@@ -268,6 +268,7 @@ async fn checkpoints_test(mut network: Network) {
     assert_eq!(state["welcome_msg"], "default");
 
     // check the new node catches up and keeps up with block production
+    network.run_until_synced(new_node_idx).await;
     network
         .run_until_block(&new_node_wallet, 20.into(), 220)
         .await;

From 626e49d8f1b059804596c931a96771bfea224b59 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 23 Jan 2025 12:53:17 +0800
Subject: [PATCH 088/119] fix: undoing some test timeouts.

---
 zilliqa/tests/it/consensus.rs   | 2 +-
 zilliqa/tests/it/main.rs        | 1 -
 zilliqa/tests/it/persistence.rs | 8 ++++----
 zilliqa/tests/it/unreliable.rs  | 2 +-
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/zilliqa/tests/it/consensus.rs b/zilliqa/tests/it/consensus.rs
index 4d87d39c8..e456f52c3 100644
--- a/zilliqa/tests/it/consensus.rs
+++ b/zilliqa/tests/it/consensus.rs
@@ -117,7 +117,7 @@ async fn block_production(mut network: Network) {
                     .map_or(0, |b| b.number())
                     >= 10
             },
-            2000,
+            100,
         )
         .await
         .unwrap();
diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs
index ac370166c..cf07bcecb 100644
--- a/zilliqa/tests/it/main.rs
+++ b/zilliqa/tests/it/main.rs
@@ -1068,7 +1068,6 @@ impl Network {
             |net| {
                 let height_i = net.get_node(index).get_finalized_height().unwrap();
                 let height_c = net.get_node(check).get_finalized_height().unwrap();
-                info!("syncing {}/{}/{}", height_i, height_c, debounce);
                 if height_c == height_i && height_i > old_height {
                     debounce += 1;
                     old_height = height_i;
diff --git a/zilliqa/tests/it/persistence.rs b/zilliqa/tests/it/persistence.rs
index 7757f1859..5505d522b 100644
--- a/zilliqa/tests/it/persistence.rs
+++ b/zilliqa/tests/it/persistence.rs
@@ -169,7 +169,7 @@ async fn checkpoints_test(mut network: Network) {
         .await
         .unwrap()
         .tx_hash();
-    network.run_until_receipt(&wallet, update_tx_hash, 51).await;
+    network.run_until_receipt(&wallet, update_tx_hash, 50).await;
     // Scilla
     let (secret_key, address) = zilliqa_account(&mut network).await;
     let code = scilla_test_contract_code();
@@ -178,7 +178,7 @@ async fn checkpoints_test(mut network: Network) {
         deploy_scilla_contract(&mut network, &secret_key, &code, &data).await;
 
     // Run until block 9 so that we can insert a tx in block 10 (note that this transaction may not *always* appear in the desired block, therefore we do not assert its presence later)
-    network.run_until_block(&wallet, 9.into(), 209).await;
+    network.run_until_block(&wallet, 9.into(), 200).await;
 
     let _hash = wallet
         .send_transaction(TransactionRequest::pay(wallet.address(), 10), None)
@@ -187,7 +187,7 @@ async fn checkpoints_test(mut network: Network) {
         .tx_hash();
 
     // wait 10 blocks for checkpoint to happen - then 3 more to finalize that block
-    network.run_until_block(&wallet, 13.into(), 213).await;
+    network.run_until_block(&wallet, 13.into(), 200).await;
 
     let checkpoint_files = network
         .nodes
@@ -270,7 +270,7 @@ async fn checkpoints_test(mut network: Network) {
     // check the new node catches up and keeps up with block production
     network.run_until_synced(new_node_idx).await;
     network
-        .run_until_block(&new_node_wallet, 20.into(), 220)
+        .run_until_block(&new_node_wallet, 20.into(), 200)
         .await;
 
     // check account nonce of old wallet
diff --git a/zilliqa/tests/it/unreliable.rs b/zilliqa/tests/it/unreliable.rs
index 8a68acecf..b35787e93 100644
--- a/zilliqa/tests/it/unreliable.rs
+++ b/zilliqa/tests/it/unreliable.rs
@@ -32,5 +32,5 @@ async fn blocks_are_produced_while_a_node_restarts(mut network: Network) {
     // of blocks 3 or 4. This would tell us that we aren't producing new views unnecessarily.
 
     // Ensure more blocks are produced.
-    network.run_until_block(&wallet, 12.into(), 1400).await;
+    network.run_until_block(&wallet, 12.into(), 400).await;
 }

From b6509d4e296518a2dd6a4732894bf64d6207fe59 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 23 Jan 2025 19:40:09 +0800
Subject: [PATCH 089/119] feat: replace ChainMetaData with BlockHeader.

---
 zilliqa/src/consensus.rs |   3 +-
 zilliqa/src/message.rs   |  13 +---
 zilliqa/src/sync.rs      | 164 +++++++++++++++++++++------------------
 zilliqa/tests/it/main.rs |  10 ++-
 4 files changed, 98 insertions(+), 92 deletions(-)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index dbe036077..294ca6b93 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -3160,9 +3160,8 @@ impl Consensus {
         trace!("consensus::tick()");
         trace!("request_missing_blocks from timer");
 
-        // Drives syncing from timeouts, not just new Proposals
+        // TODO: Drive passive-sync from Timeouts
         if self.sync.am_syncing()? {
-            // TODO: Sync from Timeouts
             self.sync.sync_internal()?;
         } else {
             trace!("not syncing ...");
diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index 690ad67c6..5f298b021 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -242,16 +242,7 @@ pub struct InjectedProposal {
     pub block: Proposal,
 }
 
-/// Used to hold metadata about the chain
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ChainMetaData {
-    pub parent_hash: Hash,
-    pub block_hash: Hash,
-    pub block_number: u64,
-    pub view_number: u64,
-}
-
-/// Used to convey proposal processing internally, to avoid blocking threads for too long.
+/// TODO: Remove. Unused in RFC161 algorithm
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ProcessProposal {
     // An encoded PeerId
@@ -287,7 +278,7 @@ pub enum ExternalMessage {
     RemovePeer,
     InjectedProposal(InjectedProposal),
     MetaDataRequest(BlockRequestV2),
-    MetaDataResponse(Vec<ChainMetaData>),
+    MetaDataResponse(Vec<BlockHeader>),
     MultiBlockRequest(Vec<Hash>),
     MultiBlockResponse(Vec<Proposal>),
 }
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 7a70788ca..e0d463cd8 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -20,8 +20,8 @@ use crate::{
     crypto::Hash,
     db::Db,
     message::{
-        Block, BlockRequest, BlockRequestV2, BlockResponse, ChainMetaData, ExternalMessage,
-        InjectedProposal, Proposal,
+        Block, BlockHeader, BlockRequest, BlockRequestV2, BlockResponse, ExternalMessage,
+        InjectedProposal, Proposal, QuorumCertificate,
     },
     node::MessageSender,
     time::SystemTime,
@@ -211,18 +211,13 @@ impl Sync {
     }
 
     /// Peeks into the top of the segment stack.
-    fn last_segment(&self) -> Result<Option<(ChainMetaData, PeerInfo)>> {
+    fn last_segment(&self) -> Result<Option<(BlockHeader, PeerInfo)>> {
         let mut result = None;
         self.db.with_sqlite_tx(|c| {
             result = c
                 .prepare_cached("SELECT parent_hash, block_hash, block_number, view_number, peer, version FROM sync_data WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
                 .query_row([], |row| Ok((
-                    ChainMetaData{
-                    parent_hash: row.get(0)?,
-                    block_hash: row.get(1)?,
-                    block_number: row.get(2)?,
-                    view_number: row.get(3)?,
-                },
+                    BlockHeader::from_meta_data(row.get(0)?,row.get(1)?, row.get(2)?, row.get(3)?),
                 PeerInfo {
                     last_used: Instant::now(),
                     score:u32::MAX,
@@ -274,16 +269,16 @@ impl Sync {
     }
 
     /// Pushes a particular segment into the stack.
-    fn push_segment(&self, peer: PeerInfo, meta: ChainMetaData) -> Result<()> {
+    fn push_segment(&self, peer: PeerInfo, meta: BlockHeader) -> Result<()> {
         self.db.with_sqlite_tx(|c| {
             c.prepare_cached(
                 "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number, peer, version) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :peer, :version)")?
                 .execute(
                 named_params! {
-                    ":parent_hash": meta.parent_hash,
-                    ":block_hash": meta.block_hash,
-                    ":block_number": meta.block_number,
-                    ":view_number": meta.view_number,
+                    ":parent_hash": meta.qc.block_hash,
+                    ":block_hash": meta.hash,
+                    ":block_number": meta.number,
+                    ":view_number": meta.view,
                     ":peer": peer.peer_id.to_bytes(),
                     ":version": peer.version,
                 },
@@ -293,18 +288,18 @@ impl Sync {
     }
 
     /// Bulk inserts a bunch of metadata.
-    fn insert_metadata(&self, metas: &Vec<ChainMetaData>) -> Result<()> {
+    fn insert_metadata(&self, metas: &Vec<BlockHeader>) -> Result<()> {
         self.db.with_sqlite_tx(|c| {
             for meta in metas {
             c.prepare_cached(
                 "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number) VALUES (:parent_hash, :block_hash, :block_number, :view_number)")?
                 .execute(
                 named_params! {
-                    ":parent_hash": meta.parent_hash,
-                    ":block_hash": meta.block_hash,
-                    ":block_number": meta.block_number,
-                    ":view_number": meta.view_number,
-                },
+                    ":parent_hash": meta.qc.block_hash,
+                    ":block_hash": meta.hash,
+                    ":block_number": meta.number,
+                    ":view_number": meta.view,
+            },
             )?;
         }
             Ok(())
@@ -321,7 +316,7 @@ impl Sync {
 
     /// Phase 0: Sync a block proposal.
     ///
-    /// This is the main entry point for syncing a block proposal.
+    /// This is the main entry point for active-syncing a block proposal.
     /// We start by enqueuing all proposals, and then check if the parent block exists in history.
     /// If the parent block exists, we do nothing. Otherwise, we check the least recent one.
     /// If we find its parent in history, we inject the entire queue. Otherwise, we start syncing.
@@ -354,12 +349,12 @@ impl Sync {
                     let block_hash = self.recent_proposals.back().unwrap().hash();
                     let block_number = self.recent_proposals.back().unwrap().number();
                     let view_number = self.recent_proposals.back().unwrap().view();
-                    let meta = ChainMetaData {
-                        block_hash,
+                    let meta = BlockHeader::from_meta_data(
                         parent_hash,
+                        block_hash,
                         block_number,
                         view_number,
-                    };
+                    );
                     self.request_missing_metadata(Some(meta))?;
 
                     let highest_block = self
@@ -439,16 +434,6 @@ impl Sync {
         Proposal::from_parts(block, txs)
     }
 
-    /// Convenience function to extract metadata from the block.
-    fn block_to_metadata(&self, block: Block) -> ChainMetaData {
-        ChainMetaData {
-            parent_hash: block.parent_hash(),
-            block_hash: block.hash(),
-            block_number: block.number(),
-            view_number: block.view(),
-        }
-    }
-
     /// Phase 2: Retry Phase 1
     ///
     /// If something went wrong in Phase 2, Phase 1 may need to be retried for the recently used segment.
@@ -623,7 +608,7 @@ impl Sync {
 
             // If we have no chain_segments, we have nothing to do
             if let Some((meta, peer_info)) = self.last_segment()? {
-                let request_hashes = self.get_segment(meta.parent_hash)?;
+                let request_hashes = self.get_segment(meta.qc.block_hash)?;
 
                 // Checksum of the request hashes
                 let checksum = request_hashes
@@ -660,8 +645,8 @@ impl Sync {
                         });
                         // do not add VIEW_DRIFT - the stored marker is accurate!
                         ExternalMessage::BlockRequest(BlockRequest {
-                            to_view: meta.view_number.saturating_sub(1),
-                            from_view: meta.view_number.saturating_sub(self.max_batch_size as u64),
+                            to_view: meta.view.saturating_sub(1),
+                            from_view: meta.view.saturating_sub(self.max_batch_size as u64),
                         })
                     }
                 };
@@ -722,13 +707,18 @@ impl Sync {
         // Convert the V1 response into a V2 response.
         match self.state {
             // Phase 1 - construct the metadata chain from the set of received proposals
-            SyncState::Phase1(ChainMetaData {
-                block_number,
-                mut parent_hash,
+            SyncState::Phase1(BlockHeader {
+                number: block_number,
+                qc:
+                    QuorumCertificate {
+                        block_hash: parent_hash,
+                        ..
+                    },
                 ..
             }) => {
                 // We do not buffer the proposals, as it takes 250MB/day!
                 // Instead, we will re-request the proposals again, in Phase 2.
+                let mut parent_hash = parent_hash;
                 let metadata = response
                     .proposals
                     .into_iter()
@@ -743,12 +733,7 @@ impl Sync {
                         parent_hash = p.header.qc.block_hash;
                         true
                     })
-                    .map(|p| ChainMetaData {
-                        block_hash: p.hash(),
-                        parent_hash: p.header.qc.block_hash,
-                        block_number: p.number(),
-                        view_number: p.view(),
-                    })
+                    .map(|p| p.header)
                     .collect_vec();
 
                 self.handle_metadata_response(from, metadata)?;
@@ -784,7 +769,7 @@ impl Sync {
     pub fn handle_metadata_response(
         &mut self,
         from: PeerId,
-        response: Vec<ChainMetaData>,
+        response: Vec<BlockHeader>,
     ) -> Result<()> {
         // Check for expected response
         let segment_peer = if let Some(peer) = self.in_flight.as_ref() {
@@ -817,27 +802,24 @@ impl Sync {
             anyhow::bail!("sync::MetadataResponse : invalid state");
         };
 
-        let mut block_hash = meta.parent_hash;
-        let mut block_num = meta.block_number;
+        let mut block_hash = meta.qc.block_hash;
+        let mut block_num = meta.number;
         for meta in response.iter() {
             // check that the block hash and number is as expected.
-            if meta.block_hash != Hash::ZERO
-                && block_hash == meta.block_hash
-                && block_num == meta.block_number + 1
-            {
-                block_hash = meta.parent_hash;
-                block_num = meta.block_number;
+            if meta.hash != Hash::ZERO && block_hash == meta.hash && block_num == meta.number + 1 {
+                block_hash = meta.qc.block_hash;
+                block_num = meta.number;
             } else {
                 // TODO: possibly, discard and rebuild entire chain
                 // if something does not match, do nothing and retry the request with the next peer.
                 tracing::error!(
                     "sync::MetadataResponse : unexpected metadata hash={block_hash} != {}, num={block_num} != {}",
-                    meta.block_hash,
-                    meta.block_number,
+                    meta.hash,
+                    meta.number,
                 );
                 return Ok(());
             }
-            if meta.block_hash == response.last().unwrap().block_hash {
+            if meta.hash == response.last().unwrap().hash {
                 break; // done, we do not check the last parent, because that's outside this segment
             }
         }
@@ -846,11 +828,11 @@ impl Sync {
         let segment = response;
 
         // Record landmark, including peer that has this set of blocks
-        self.push_segment(segment_peer, meta.clone())?;
+        self.push_segment(segment_peer, *meta)?;
 
         // Record the oldest block in the chain's parent
         self.state = SyncState::Phase1(segment.last().cloned().unwrap());
-        let last_block_hash = segment.last().as_ref().unwrap().block_hash;
+        let last_block_hash = segment.last().as_ref().unwrap().hash;
 
         tracing::info!(
             "sync::MetadataResponse : received {} metadata segment #{} from {}",
@@ -864,7 +846,7 @@ impl Sync {
 
         // If the checkpoint is in this segment,
         let checkpointed = if let Some(checkpoint) = self.checkpoint_hash {
-            segment.iter().any(|b| b.block_hash == checkpoint)
+            segment.iter().any(|b| b.hash == checkpoint)
         } else {
             false
         };
@@ -912,7 +894,7 @@ impl Sync {
                 break; // that's all we have!
             };
             hash = block.parent_hash();
-            metas.push(self.block_to_metadata(block));
+            metas.push(block.header);
         }
 
         let message = ExternalMessage::MetaDataResponse(metas);
@@ -928,7 +910,7 @@ impl Sync {
     /// This constructs a chain history by requesting blocks from a peer, going backwards from a given block.
     /// If Phase 1 is in progress, it continues requesting blocks from the last known Phase 1 block.
     /// Otherwise, it requests blocks from the given starting metadata.
-    pub fn request_missing_metadata(&mut self, meta: Option<ChainMetaData>) -> Result<()> {
+    pub fn request_missing_metadata(&mut self, meta: Option<BlockHeader>) -> Result<()> {
         if !matches!(self.state, SyncState::Phase1(_)) && !matches!(self.state, SyncState::Phase0) {
             anyhow::bail!("sync::RequestMissingMetadata : invalid state");
         }
@@ -960,14 +942,27 @@ impl Sync {
                 peer.peer_id
             );
             let message = match (self.state.clone(), &peer.version) {
-                (SyncState::Phase1(ChainMetaData { parent_hash, .. }), PeerVer::V2) => {
-                    ExternalMessage::MetaDataRequest(BlockRequestV2 {
-                        request_at: SystemTime::now(),
-                        from_hash: parent_hash,
-                        batch_size: self.max_batch_size,
-                    })
-                }
-                (SyncState::Phase1(ChainMetaData { view_number, .. }), PeerVer::V1) => {
+                (
+                    SyncState::Phase1(BlockHeader {
+                        qc:
+                            QuorumCertificate {
+                                block_hash: parent_hash,
+                                ..
+                            },
+                        ..
+                    }),
+                    PeerVer::V2,
+                ) => ExternalMessage::MetaDataRequest(BlockRequestV2 {
+                    request_at: SystemTime::now(),
+                    from_hash: parent_hash,
+                    batch_size: self.max_batch_size,
+                }),
+                (
+                    SyncState::Phase1(BlockHeader {
+                        view: view_number, ..
+                    }),
+                    PeerVer::V1,
+                ) => {
                     // For V1 BlockRequest, we request a little more than we need, due to drift
                     // Since the view number is an 'internal' clock, it is possible for the same block number
                     // to have different view numbers.
@@ -979,7 +974,7 @@ impl Sync {
                 }
                 (SyncState::Phase0, PeerVer::V2) if meta.is_some() => {
                     let meta = meta.unwrap();
-                    let parent_hash = meta.parent_hash;
+                    let parent_hash = meta.qc.block_hash;
                     self.state = SyncState::Phase1(meta);
                     ExternalMessage::MetaDataRequest(BlockRequestV2 {
                         request_at: SystemTime::now(),
@@ -989,7 +984,7 @@ impl Sync {
                 }
                 (SyncState::Phase0, PeerVer::V1) if meta.is_some() => {
                     let meta = meta.unwrap();
-                    let view_number = meta.view_number;
+                    let view_number = meta.view;
                     self.state = SyncState::Phase1(meta);
                     let drift = self.max_batch_size as u64 / 10;
                     ExternalMessage::BlockRequest(BlockRequest {
@@ -1104,7 +1099,7 @@ impl Sync {
         // if the new peer is not synced, it will get downgraded to the back of heap.
         // but by placing them at the back of the 'best' pack, we get to try them out soon.
         let new_peer = PeerInfo {
-            version: PeerVer::V1, // default V2
+            version: PeerVer::V1,
             score: self.peers.iter().map(|p| p.score).min().unwrap_or_default(),
             peer_id: peer,
             last_used: Instant::now(),
@@ -1237,10 +1232,11 @@ impl PartialOrd for DownGrade {
 }
 
 /// Sync state
+#[allow(clippy::large_enum_variant)]
 #[derive(Debug, Clone)]
 enum SyncState {
     Phase0,
-    Phase1(ChainMetaData),
+    Phase1(BlockHeader),
     Phase2(Hash),
     Phase3,
     Retry1,
@@ -1268,3 +1264,21 @@ impl ToSql for PeerVer {
         Ok((self.clone() as u32).into())
     }
 }
+
+impl BlockHeader {
+    pub fn from_meta_data(
+        parent_hash: Hash,
+        block_hash: Hash,
+        block_number: u64,
+        view_number: u64,
+    ) -> BlockHeader {
+        let mut meta = BlockHeader {
+            view: view_number,
+            number: block_number,
+            hash: block_hash,
+            ..Default::default()
+        };
+        meta.qc.block_hash = parent_hash;
+        meta
+    }
+}
diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs
index cf07bcecb..01332edd8 100644
--- a/zilliqa/tests/it/main.rs
+++ b/zilliqa/tests/it/main.rs
@@ -1058,10 +1058,12 @@ impl Network {
     }
 
     async fn run_until_synced(&mut self, index: usize) {
-        let mut check = self.rng.lock().unwrap().gen_range(0..self.nodes.len());
-        while index == check {
-            check = self.rng.lock().unwrap().gen_range(0..self.nodes.len());
-        }
+        let check = loop {
+            let i = self.random_index();
+            if i != index {
+                break i;
+            }
+        };
         let mut debounce = 0;
         let mut old_height = 0;
         self.run_until(

From e0007e30d36f6a57caa1270f61bf4bd181ba1e4e Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Thu, 23 Jan 2025 21:50:27 +0800
Subject: [PATCH 090/119] feat: changed BlockRequestV2 from 'hash'-based to
 'height'-based.

---
 zilliqa/src/message.rs  | 10 +++++++---
 zilliqa/src/p2p_node.rs | 13 ++++++++++++-
 zilliqa/src/sync.rs     | 30 +++++++++++++++++-------------
 3 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index 5f298b021..fca919a3e 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -230,8 +230,8 @@ impl fmt::Debug for BlockResponse {
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct BlockRequestV2 {
     pub request_at: SystemTime,
-    pub from_hash: Hash,
-    pub batch_size: usize,
+    pub from_height: u64,
+    pub to_height: u64,
 }
 
 /// Used to convey proposal processing internally, to avoid blocking threads for too long.
@@ -306,7 +306,11 @@ impl Display for ExternalMessage {
                 write!(f, "MetaDataResponse({})", r.len())
             }
             ExternalMessage::MetaDataRequest(r) => {
-                write!(f, "MetaDataRequest({}, num={})", r.from_hash, r.batch_size)
+                write!(
+                    f,
+                    "MetaDataRequest(from={}, to={})",
+                    r.from_height, r.to_height
+                )
             }
             ExternalMessage::InjectedProposal(p) => {
                 write!(f, "InjectedProposal {}", p.block.number())
diff --git a/zilliqa/src/p2p_node.rs b/zilliqa/src/p2p_node.rs
index 6fd941b23..9cec57729 100644
--- a/zilliqa/src/p2p_node.rs
+++ b/zilliqa/src/p2p_node.rs
@@ -34,9 +34,10 @@ use crate::{
     cfg::{Config, ConsensusConfig, NodeConfig},
     crypto::SecretKey,
     db,
-    message::{ExternalMessage, InternalMessage},
+    message::{BlockRequestV2, ExternalMessage, InternalMessage},
     node::{OutgoingMessageFailure, RequestId},
     node_launcher::{NodeInputChannels, NodeLauncher, ResponseChannel},
+    time::SystemTime,
 };
 
 /// Messages are a tuple of the destination shard ID and the actual message.
@@ -312,6 +313,16 @@ impl P2pNode {
                                     debug!(source = %_source, %to, external_message = %_external_message, request_id = %_request_id, "message received");
                                     let _topic = Self::shard_id_to_topic(shard_id);
                                     let _id = format!("{}", _request_id);
+
+                                    // insert local time for BlockRequestV2 - this is checked in Sync::HandleMetadataRequest
+                                    let _external_message = match _external_message {
+                                        ExternalMessage::MetaDataRequest(BlockRequestV2{from_height, to_height, ..}) => ExternalMessage::MetaDataRequest(BlockRequestV2{
+                                            from_height, to_height, request_at: SystemTime::now(),
+                                        }),
+                                        // pass-thru everything else
+                                        e => e,
+                                    };
+
                                     cfg_if! {
                                         if #[cfg(not(feature = "fake_response_channel"))] {
                                             self.send_to(&_topic.hash(), |c| c.requests.send((_source, _id, _external_message, ResponseChannel::Remote(_channel))))?;
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index e0d463cd8..f40fd21d9 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -1,6 +1,7 @@
 use std::{
     cmp::Ordering,
     collections::{BinaryHeap, VecDeque},
+    ops::Sub,
     sync::Arc,
     time::{Duration, Instant},
 };
@@ -885,10 +886,17 @@ impl Sync {
         // TODO: Check if we should service this request
         // Validators could respond to this request if there is nothing else to do.
 
-        let batch_size: usize = self.max_batch_size.min(request.batch_size); // mitigate DOS by limiting the number of blocks we return
+        let batch_size: usize = self
+            .max_batch_size
+            .min(request.to_height.saturating_sub(request.from_height) as usize); // mitigate DOS by limiting the number of blocks we return
         let mut metas = Vec::with_capacity(batch_size);
-        let mut hash = request.from_hash;
-        while metas.len() < batch_size {
+        let Some(block) = self.db.get_canonical_block_by_number(request.to_height)? else {
+            tracing::warn!("sync::MetadataRequest : unknown block height");
+            return Ok(ExternalMessage::Acknowledgement);
+        };
+        metas.push(block.header);
+        let mut hash = block.parent_hash();
+        while metas.len() <= batch_size {
             // grab the parent
             let Some(block) = self.db.get_block_by_hash(&hash)? else {
                 break; // that's all we have!
@@ -944,18 +952,14 @@ impl Sync {
             let message = match (self.state.clone(), &peer.version) {
                 (
                     SyncState::Phase1(BlockHeader {
-                        qc:
-                            QuorumCertificate {
-                                block_hash: parent_hash,
-                                ..
-                            },
+                        number: block_number,
                         ..
                     }),
                     PeerVer::V2,
                 ) => ExternalMessage::MetaDataRequest(BlockRequestV2 {
                     request_at: SystemTime::now(),
-                    from_hash: parent_hash,
-                    batch_size: self.max_batch_size,
+                    to_height: block_number.saturating_sub(1),
+                    from_height: block_number.saturating_sub(self.max_batch_size as u64),
                 }),
                 (
                     SyncState::Phase1(BlockHeader {
@@ -974,12 +978,12 @@ impl Sync {
                 }
                 (SyncState::Phase0, PeerVer::V2) if meta.is_some() => {
                     let meta = meta.unwrap();
-                    let parent_hash = meta.qc.block_hash;
+                    let block_number = meta.number;
                     self.state = SyncState::Phase1(meta);
                     ExternalMessage::MetaDataRequest(BlockRequestV2 {
                         request_at: SystemTime::now(),
-                        from_hash: parent_hash,
-                        batch_size: self.max_batch_size,
+                        to_height: block_number.sub(1),
+                        from_height: block_number.sub(self.max_batch_size as u64),
                     })
                 }
                 (SyncState::Phase0, PeerVer::V1) if meta.is_some() => {

From 7c4d40bba2dfe0b25d44a81e5a3384898bd02ee1 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 24 Jan 2025 08:09:41 +0800
Subject: [PATCH 091/119] feat: simplify checkpointed check.

---
 zilliqa/src/consensus.rs |  4 ++--
 zilliqa/src/sync.rs      | 16 ++++++----------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index 294ca6b93..3f01a52a9 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -205,8 +205,6 @@ impl Consensus {
             )?;
         }
 
-        let sync = Sync::new(&config, db.clone(), message_sender.clone())?;
-
         let latest_block = db
             .get_finalized_view()?
             .and_then(|view| {
@@ -314,6 +312,8 @@ impl Consensus {
             }
         };
 
+        let sync = Sync::new(&config, db.clone(), message_sender.clone())?;
+
         let mut consensus = Consensus {
             secret_key,
             config,
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index f40fd21d9..9f8b1a93e 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -88,8 +88,8 @@ pub struct Sync {
     inject_at: Option<(std::time::Instant, usize)>,
     // record starting number, for eth_syncing() RPC call.
     started_at_block_number: u64,
-    // checkpoint
-    checkpoint_hash: Option<Hash>,
+    // checkpoint, if set
+    checkpoint_hash: Hash,
 }
 
 impl Sync {
@@ -155,14 +155,14 @@ impl Sync {
             recent_proposals: VecDeque::with_capacity(max_batch_size),
             inject_at: None,
             started_at_block_number: 0,
-            checkpoint_hash: None,
+            checkpoint_hash: Hash::ZERO,
         })
     }
 
     pub fn set_checkpoint(&mut self, checkpoint: &Block) {
         let hash = checkpoint.hash();
         tracing::info!("sync::Checkpoint {}", hash);
-        self.checkpoint_hash = Some(hash);
+        self.checkpoint_hash = hash;
     }
 
     /// Returns the number of stored segments
@@ -845,12 +845,8 @@ impl Sync {
         // Record the constructed chain metadata
         self.insert_metadata(&segment)?;
 
-        // If the checkpoint is in this segment,
-        let checkpointed = if let Some(checkpoint) = self.checkpoint_hash {
-            segment.iter().any(|b| b.hash == checkpoint)
-        } else {
-            false
-        };
+        // If the checkpoint is in this segment
+        let checkpointed = segment.iter().any(|b| b.hash == self.checkpoint_hash);
 
         // If the segment hits our history, start Phase 2.
         if checkpointed || self.db.contains_block(&last_block_hash)? {

From 75ab04b3c2938d445c5b4543503d111ff3526e99 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 24 Jan 2025 08:32:56 +0800
Subject: [PATCH 092/119] nit: make sync_data temporary.

---
 zilliqa/src/sync.rs | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 9f8b1a93e..d738d13f0 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -107,11 +107,10 @@ impl Sync {
         let max_batch_size = config.block_request_batch_size.clamp(30, 180); // up to 180 sec of blocks at a time.
         let max_blocks_in_flight = config.max_blocks_in_flight.clamp(max_batch_size, 1800); // up to 30-mins worth of blocks in-pipeline.
 
-        // This DB could be left in-here as it is only used in this module
-        // TODO: Make this in-memory by exploiting SQLite TEMP tables i.e. CREATE TEMP TABLE
+        // This in-memory DB is placed in-here as it is only used in this module.
         db.with_sqlite_tx(|c| {
             c.execute_batch(
-                "CREATE TABLE IF NOT EXISTS sync_data (
+                "CREATE TEMP TABLE IF NOT EXISTS sync_data (
                 block_hash BLOB NOT NULL UNIQUE,
                 parent_hash BLOB NOT NULL,
                 block_number INTEGER NOT NULL PRIMARY KEY,
@@ -140,6 +139,18 @@ impl Sync {
             SyncState::Retry1
         };
 
+        let latest_block_number = db
+            .get_finalized_view()?
+            .and_then(|view| {
+                db.get_block_hash_by_view(view)
+                    .expect("no header found at view {view}")
+            })
+            .and_then(|hash| {
+                db.get_block_by_hash(&hash)
+                    .expect("no block found for hash {hash}")
+            })
+            .and_then(|block| Some(block.number()));
+
         Ok(Self {
             db,
             message_sender,
@@ -154,17 +165,11 @@ impl Sync {
             state,
             recent_proposals: VecDeque::with_capacity(max_batch_size),
             inject_at: None,
-            started_at_block_number: 0,
+            started_at_block_number: latest_block_number.unwrap_or_default(),
             checkpoint_hash: Hash::ZERO,
         })
     }
 
-    pub fn set_checkpoint(&mut self, checkpoint: &Block) {
-        let hash = checkpoint.hash();
-        tracing::info!("sync::Checkpoint {}", hash);
-        self.checkpoint_hash = hash;
-    }
-
     /// Returns the number of stored segments
     fn count_segments(&self) -> Result<usize> {
         let mut segments = 0;
@@ -847,9 +852,10 @@ impl Sync {
 
         // If the checkpoint is in this segment
         let checkpointed = segment.iter().any(|b| b.hash == self.checkpoint_hash);
-
+        let started = self.started_at_block_number <= segment.first().as_ref().unwrap().number
+            && self.started_at_block_number >= segment.last().as_ref().unwrap().number;
         // If the segment hits our history, start Phase 2.
-        if checkpointed || self.db.contains_block(&last_block_hash)? {
+        if started || checkpointed || self.db.contains_block(&last_block_hash)? {
             self.state = SyncState::Phase2(Hash::ZERO);
         } else if Self::DO_SPECULATIVE {
             self.request_missing_metadata(None)?;
@@ -1185,6 +1191,13 @@ impl Sync {
             )))
         }
     }
+
+    /// Sets the checkpoint, if node was started from a checkpoint.
+    pub fn set_checkpoint(&mut self, checkpoint: &Block) {
+        let hash = checkpoint.hash();
+        tracing::info!("sync::Checkpoint {}", hash);
+        self.checkpoint_hash = hash;
+    }
 }
 
 #[derive(Debug, Clone, Eq, PartialEq)]

From 1c8c4124fb78843311dd5e2c120e9b7539183315 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 24 Jan 2025 08:48:08 +0800
Subject: [PATCH 093/119] fix: better error handling for committee_for_hash().

---
 zilliqa/src/consensus.rs | 10 +++++++---
 zilliqa/src/sync.rs      |  2 +-
 zilliqa/tests/it/main.rs |  2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index 3f01a52a9..18cb10c2e 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -829,7 +829,6 @@ impl Consensus {
 
         let proposer_address = parent_state.get_reward_address(proposer)?;
 
-        let mut total_cosigner_stake = 0;
         let cosigner_stake: Vec<_> = committee
             .iter()
             .enumerate()
@@ -841,11 +840,15 @@ impl Consensus {
                     .unwrap()
                     .unwrap()
                     .get();
-                total_cosigner_stake += stake;
                 (reward_address, stake)
             })
             .collect();
 
+        let total_cosigner_stake = cosigner_stake.iter().fold(0, |sum, c| sum + c.1);
+        if total_cosigner_stake == 0 {
+            return Err(anyhow!("total stake is 0"));
+        }
+
         // Track total awards given out. This may be different to rewards_per_block because we round down on division when we split the rewards
         let mut total_rewards_issued = 0;
 
@@ -1698,7 +1701,8 @@ impl Consensus {
 
     fn committee_for_hash(&self, parent_hash: Hash) -> Result<Vec<NodePublicKey>> {
         let Ok(Some(parent)) = self.get_block(&parent_hash) else {
-            return Err(anyhow!("parent block not found: {:?}", parent_hash));
+            tracing::error!("parent block not found: {:?}", parent_hash);
+            return Ok(Vec::new()); // return an empty vector instead of Err for graceful app-level error-handling
         };
 
         let parent_root_hash = parent.state_root_hash();
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index d738d13f0..2f5956f08 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -149,7 +149,7 @@ impl Sync {
                 db.get_block_by_hash(&hash)
                     .expect("no block found for hash {hash}")
             })
-            .and_then(|block| Some(block.number()));
+            .map(|block| block.number());
 
         Ok(Self {
             db,
diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs
index 01332edd8..e1267087f 100644
--- a/zilliqa/tests/it/main.rs
+++ b/zilliqa/tests/it/main.rs
@@ -995,7 +995,7 @@ impl Network {
                                             external_message.clone(),
                                             response_channel,
                                         )
-                                        .ok(); // TODO: better error handling
+                                        .unwrap();
                                 }
                             });
                         }

From 6036668a5cc666d3311ef2b59a78d8f76b209869 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 24 Jan 2025 09:05:17 +0800
Subject: [PATCH 094/119] feat: sets starting_block during Sync::new().

---
 zilliqa/src/consensus.rs |  2 +-
 zilliqa/src/sync.rs      | 25 ++++++++++++-------------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index 18cb10c2e..77ff2ba04 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -312,7 +312,7 @@ impl Consensus {
             }
         };
 
-        let sync = Sync::new(&config, db.clone(), message_sender.clone())?;
+        let sync = Sync::new(&config, db.clone(), &latest_block, message_sender.clone())?;
 
         let mut consensus = Consensus {
             secret_key,
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 2f5956f08..db01d1e71 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -102,7 +102,12 @@ impl Sync {
     // Minimum of 2 peers to avoid single source of truth.
     const MIN_PEERS: usize = 2;
 
-    pub fn new(config: &NodeConfig, db: Arc<Db>, message_sender: MessageSender) -> Result<Self> {
+    pub fn new(
+        config: &NodeConfig,
+        db: Arc<Db>,
+        latest_block: &Option<Block>,
+        message_sender: MessageSender,
+    ) -> Result<Self> {
         let peer_id = message_sender.our_peer_id;
         let max_batch_size = config.block_request_batch_size.clamp(30, 180); // up to 180 sec of blocks at a time.
         let max_blocks_in_flight = config.max_blocks_in_flight.clamp(max_batch_size, 1800); // up to 30-mins worth of blocks in-pipeline.
@@ -139,17 +144,11 @@ impl Sync {
             SyncState::Retry1
         };
 
-        let latest_block_number = db
-            .get_finalized_view()?
-            .and_then(|view| {
-                db.get_block_hash_by_view(view)
-                    .expect("no header found at view {view}")
-            })
-            .and_then(|hash| {
-                db.get_block_by_hash(&hash)
-                    .expect("no block found for hash {hash}")
-            })
-            .map(|block| block.number());
+        let latest_block_number = latest_block
+            .as_ref()
+            .expect("Some(block) expected")
+            .number();
+        tracing::info!("latest_block_number = {latest_block_number}");
 
         Ok(Self {
             db,
@@ -165,7 +164,7 @@ impl Sync {
             state,
             recent_proposals: VecDeque::with_capacity(max_batch_size),
             inject_at: None,
-            started_at_block_number: latest_block_number.unwrap_or_default(),
+            started_at_block_number: latest_block_number,
             checkpoint_hash: Hash::ZERO,
         })
     }

From deec0fa15cffba2c22746fbbcb83f3755269c53a Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 24 Jan 2025 09:31:12 +0800
Subject: [PATCH 095/119] feat: store gas_used as a proxy for block size.

---
 zilliqa/src/sync.rs | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index db01d1e71..4d4845ede 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -26,6 +26,7 @@ use crate::{
     },
     node::MessageSender,
     time::SystemTime,
+    transaction::EvmGas,
 };
 
 // Syncing Algorithm
@@ -112,7 +113,7 @@ impl Sync {
         let max_batch_size = config.block_request_batch_size.clamp(30, 180); // up to 180 sec of blocks at a time.
         let max_blocks_in_flight = config.max_blocks_in_flight.clamp(max_batch_size, 1800); // up to 30-mins worth of blocks in-pipeline.
 
-        // This in-memory DB is placed in-here as it is only used in this module.
+        // This in-memory DB is placed here as it is only used in this module.
         db.with_sqlite_tx(|c| {
             c.execute_batch(
                 "CREATE TEMP TABLE IF NOT EXISTS sync_data (
@@ -120,8 +121,9 @@ impl Sync {
                 parent_hash BLOB NOT NULL,
                 block_number INTEGER NOT NULL PRIMARY KEY,
                 view_number INTEGER NOT NULL,
-                peer BLOB DEFAULT NULL,
-                version INTEGER DEFAULT 0
+                gas_used INTEGER NOT NULL,
+                version INTEGER DEFAULT 0,
+                peer BLOB DEFAULT NULL
             );
             CREATE INDEX IF NOT EXISTS idx_sync_data ON sync_data(block_number) WHERE peer IS NOT NULL;",
             )?;
@@ -148,7 +150,6 @@ impl Sync {
             .as_ref()
             .expect("Some(block) expected")
             .number();
-        tracing::info!("latest_block_number = {latest_block_number}");
 
         Ok(Self {
             db,
@@ -220,14 +221,14 @@ impl Sync {
         let mut result = None;
         self.db.with_sqlite_tx(|c| {
             result = c
-                .prepare_cached("SELECT parent_hash, block_hash, block_number, view_number, peer, version FROM sync_data WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
+                .prepare_cached("SELECT parent_hash, block_hash, block_number, view_number, gas_used, version, peer FROM sync_data WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
                 .query_row([], |row| Ok((
-                    BlockHeader::from_meta_data(row.get(0)?,row.get(1)?, row.get(2)?, row.get(3)?),
+                    BlockHeader::from_meta_data(row.get(0)?,row.get(1)?, row.get(2)?, row.get(3)?, row.get(4)?),
                 PeerInfo {
                     last_used: Instant::now(),
                     score:u32::MAX,
                     version: row.get(5)?,
-                    peer_id: PeerId::from_bytes(row.get::<_,Vec<u8>>(4)?.as_slice()).unwrap(),
+                    peer_id: PeerId::from_bytes(row.get::<_,Vec<u8>>(6)?.as_slice()).unwrap(),
                 },
             )))
                 .optional()?;
@@ -277,13 +278,14 @@ impl Sync {
     fn push_segment(&self, peer: PeerInfo, meta: BlockHeader) -> Result<()> {
         self.db.with_sqlite_tx(|c| {
             c.prepare_cached(
-                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number, peer, version) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :peer, :version)")?
+                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number, gas_used, version, peer) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :gas_used, :version, :peer)")?
                 .execute(
                 named_params! {
                     ":parent_hash": meta.qc.block_hash,
                     ":block_hash": meta.hash,
                     ":block_number": meta.number,
                     ":view_number": meta.view,
+                    ":gas_used": meta.gas_used,
                     ":peer": peer.peer_id.to_bytes(),
                     ":version": peer.version,
                 },
@@ -297,13 +299,14 @@ impl Sync {
         self.db.with_sqlite_tx(|c| {
             for meta in metas {
             c.prepare_cached(
-                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number) VALUES (:parent_hash, :block_hash, :block_number, :view_number)")?
+                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number, gas_used) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :gas_used)")?
                 .execute(
                 named_params! {
                     ":parent_hash": meta.qc.block_hash,
                     ":block_hash": meta.hash,
                     ":block_number": meta.number,
                     ":view_number": meta.view,
+                    ":gas_used": meta.gas_used,
             },
             )?;
         }
@@ -354,11 +357,13 @@ impl Sync {
                     let block_hash = self.recent_proposals.back().unwrap().hash();
                     let block_number = self.recent_proposals.back().unwrap().number();
                     let view_number = self.recent_proposals.back().unwrap().view();
+                    let gas_used = self.recent_proposals.back().unwrap().header.gas_used;
                     let meta = BlockHeader::from_meta_data(
                         parent_hash,
                         block_hash,
                         block_number,
                         view_number,
+                        gas_used,
                     );
                     self.request_missing_metadata(Some(meta))?;
 
@@ -1283,8 +1288,10 @@ impl BlockHeader {
         block_hash: Hash,
         block_number: u64,
         view_number: u64,
+        gas_used: EvmGas,
     ) -> BlockHeader {
         let mut meta = BlockHeader {
+            gas_used,
             view: view_number,
             number: block_number,
             hash: block_hash,

From 4d68b82aade69f69fd840032cbe81b3cc37e44ca Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 24 Jan 2025 09:42:20 +0800
Subject: [PATCH 096/119] feat: reordered handle_metadata_response() to allow
 for micro-segmentation.

---
 zilliqa/src/sync.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 4d4845ede..b3d5163e3 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -837,13 +837,6 @@ impl Sync {
         // Chain segment is sane
         let segment = response;
 
-        // Record landmark, including peer that has this set of blocks
-        self.push_segment(segment_peer, *meta)?;
-
-        // Record the oldest block in the chain's parent
-        self.state = SyncState::Phase1(segment.last().cloned().unwrap());
-        let last_block_hash = segment.last().as_ref().unwrap().hash;
-
         tracing::info!(
             "sync::MetadataResponse : received {} metadata segment #{} from {}",
             segment.len(),
@@ -854,6 +847,13 @@ impl Sync {
         // Record the constructed chain metadata
         self.insert_metadata(&segment)?;
 
+        // Record landmark(s), including peer that has this set of blocks
+        self.push_segment(segment_peer, *meta)?;
+
+        // Record the oldest block in the chain's parent
+        self.state = SyncState::Phase1(segment.last().cloned().unwrap());
+        let last_block_hash = segment.last().as_ref().unwrap().hash;
+
         // If the checkpoint is in this segment
         let checkpointed = segment.iter().any(|b| b.hash == self.checkpoint_hash);
         let started = self.started_at_block_number <= segment.first().as_ref().unwrap().number

From a00851a801f9c818f49862e5fc84438c7d34325a Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 24 Jan 2025 14:08:03 +0800
Subject: [PATCH 097/119] fix: removed dynamic_batch_sizing() as it should be
 unnecessary until block 1.0M in protomainnet.

---
 zilliqa/src/lib.rs  | 2 +-
 zilliqa/src/sync.rs | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/zilliqa/src/lib.rs b/zilliqa/src/lib.rs
index 9b783a862..8da13f370 100644
--- a/zilliqa/src/lib.rs
+++ b/zilliqa/src/lib.rs
@@ -22,8 +22,8 @@ pub mod scilla;
 mod scilla_proto;
 pub mod serde_util;
 pub mod state;
-pub mod test_util;
 pub mod sync;
+pub mod test_util;
 pub mod time;
 pub mod transaction;
 pub mod zq1_proto;
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index b3d5163e3..364b28582 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -1131,8 +1131,9 @@ impl Sync {
     fn get_next_peer(&mut self) -> Option<PeerInfo> {
         if self.peers.len() >= Self::MIN_PEERS {
             let mut peer = self.peers.pop()?;
-            peer.last_used = std::time::Instant::now(); // used to determine stale requests.
-            self.max_batch_size = self.dynamic_batch_sizing(&peer);
+            peer.last_used = std::time::Instant::now();
+            // dynamic sizing should not be needed, if we're syncing recent blocks.
+            // self.max_batch_size = self.dynamic_batch_sizing(&peer);
             tracing::trace!("sync::GetNextPeer {} ({})", peer.peer_id, peer.score);
             return Some(peer);
         }
@@ -1144,7 +1145,7 @@ impl Sync {
     ///
     /// Due to a hard-coded 10MB response limit in libp2p, we may be limited in how many blocks we can request
     /// for in a single request, between 1-100 blocks.
-    fn dynamic_batch_sizing(&self, peer: &PeerInfo) -> usize {
+    fn _dynamic_batch_sizing(&self, peer: &PeerInfo) -> usize {
         match (&self.state, &peer.version, &self.in_flight_reason) {
             // V1 response may be too large, reduce request range.
             (SyncState::Phase1(_), PeerVer::V1, DownGrade::Empty) => self

From 4ed1f3865597f09391e94cecf15ecbb6f8bde578 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 24 Jan 2025 16:05:07 +0800
Subject: [PATCH 098/119] feat: shifts txn verification from server side to
 client side.

---
 zilliqa/src/consensus.rs |  2 +-
 zilliqa/src/p2p_node.rs  | 13 +------------
 zilliqa/src/sync.rs      | 33 ++++++++++++++++++++++++++-------
 3 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index 77ff2ba04..08f1b3a98 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -1701,7 +1701,7 @@ impl Consensus {
 
     fn committee_for_hash(&self, parent_hash: Hash) -> Result<Vec<NodePublicKey>> {
         let Ok(Some(parent)) = self.get_block(&parent_hash) else {
-            tracing::error!("parent block not found: {:?}", parent_hash);
+            // tracing::error!("parent block not found: {:?}", parent_hash);
             return Ok(Vec::new()); // return an empty vector instead of Err for graceful app-level error-handling
         };
 
diff --git a/zilliqa/src/p2p_node.rs b/zilliqa/src/p2p_node.rs
index 9cec57729..6fd941b23 100644
--- a/zilliqa/src/p2p_node.rs
+++ b/zilliqa/src/p2p_node.rs
@@ -34,10 +34,9 @@ use crate::{
     cfg::{Config, ConsensusConfig, NodeConfig},
     crypto::SecretKey,
     db,
-    message::{BlockRequestV2, ExternalMessage, InternalMessage},
+    message::{ExternalMessage, InternalMessage},
     node::{OutgoingMessageFailure, RequestId},
     node_launcher::{NodeInputChannels, NodeLauncher, ResponseChannel},
-    time::SystemTime,
 };
 
 /// Messages are a tuple of the destination shard ID and the actual message.
@@ -313,16 +312,6 @@ impl P2pNode {
                                     debug!(source = %_source, %to, external_message = %_external_message, request_id = %_request_id, "message received");
                                     let _topic = Self::shard_id_to_topic(shard_id);
                                     let _id = format!("{}", _request_id);
-
-                                    // insert local time for BlockRequestV2 - this is checked in Sync::HandleMetadataRequest
-                                    let _external_message = match _external_message {
-                                        ExternalMessage::MetaDataRequest(BlockRequestV2{from_height, to_height, ..}) => ExternalMessage::MetaDataRequest(BlockRequestV2{
-                                            from_height, to_height, request_at: SystemTime::now(),
-                                        }),
-                                        // pass-thru everything else
-                                        e => e,
-                                    };
-
                                     cfg_if! {
                                         if #[cfg(not(feature = "fake_response_channel"))] {
                                             self.send_to(&_topic.hash(), |c| c.requests.send((_source, _id, _external_message, ResponseChannel::Remote(_channel))))?;
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 364b28582..e8bdfb1f4 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -430,18 +430,20 @@ impl Sync {
     }
 
     /// Convenience function to convert a block to a proposal (add full txs)
-    /// NOTE: Includes intershard transactions. Should only be used for syncing history,
-    /// not for consensus messages regarding new blocks.
+    /// Should only be used for syncing history, not for consensus messages regarding new blocks.
     fn block_to_proposal(&self, block: Block) -> Proposal {
         // since block must be valid, unwrap(s) are safe
         let txs = block
             .transactions
             .iter()
             .map(|hash| self.db.get_transaction(hash).unwrap().unwrap())
-            .map(|tx| tx.verify().unwrap())
+            // handle verification on the client-side
+            .map(|tx| {
+                let hash = tx.calculate_hash();
+                (tx, hash)
+            })
             .collect_vec();
-
-        Proposal::from_parts(block, txs)
+        Proposal::from_parts_with_hashes(block, txs)
     }
 
     /// Phase 2: Retry Phase 1
@@ -480,6 +482,23 @@ impl Sync {
         &mut self,
         from: PeerId,
         response: Vec<Proposal>,
+    ) -> Result<()> {
+        // Verify transactions on the client-side
+        let proposals = response
+            .into_iter()
+            .map(|p| {
+                let (b, t) = p.into_parts();
+                let txns = t.into_iter().map(|t| t.verify().unwrap()).collect_vec();
+                Proposal::from_parts(b, txns)
+            })
+            .collect_vec();
+        self.inner_handle_multiblock_response(from, proposals)
+    }
+
+    pub fn inner_handle_multiblock_response(
+        &mut self,
+        from: PeerId,
+        response: Vec<Proposal>,
     ) -> Result<()> {
         if let Some(peer) = self.in_flight.as_ref() {
             if peer.peer_id != from {
@@ -759,7 +778,7 @@ impl Sync {
                     .sorted_by(|a, b| b.number().cmp(&a.number()))
                     .collect_vec();
 
-                self.handle_multiblock_response(from, multi_blocks)?;
+                self.inner_handle_multiblock_response(from, multi_blocks)?;
             }
             _ => {
                 tracing::error!(
@@ -883,7 +902,7 @@ impl Sync {
             from
         );
 
-        // Do not respond to stale requests
+        // Do not respond to stale requests as the client has timed-out
         if request.request_at.elapsed()? > self.request_timeout {
             tracing::warn!("sync::MetadataRequest : stale request");
             return Ok(ExternalMessage::Acknowledgement);

From 3da126ff6cf8ddf44e359f5ed4ba6c3f9ee58796 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 24 Jan 2025 17:51:17 +0800
Subject: [PATCH 099/119] fix: validators_can_join..()

---
 zilliqa/tests/it/staking.rs | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/zilliqa/tests/it/staking.rs b/zilliqa/tests/it/staking.rs
index 65dd1a7e8..179b46d84 100644
--- a/zilliqa/tests/it/staking.rs
+++ b/zilliqa/tests/it/staking.rs
@@ -430,10 +430,10 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
     let wallet = network.genesis_wallet().await;
 
     // randomise the current epoch state and current leader
-    // let blocks_to_prerun = network.rng.lock().unwrap().gen_range(0..8);
-    // network
-    //     .run_until_block(&wallet, blocks_to_prerun.into(), 100)
-    //     .await;
+    let blocks_to_prerun = network.rng.lock().unwrap().gen_range(0..8);
+    network
+        .run_until_block(&wallet, blocks_to_prerun.into(), 200)
+        .await;
 
     // First test joining deposit_v2
     let index = network.add_node();
@@ -447,7 +447,7 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
     let staker_wallet = network.wallet_of_node(index).await;
     let pop_sinature = new_validator_key.pop_prove();
 
-    network.run_until_synced(index).await;
+    // This has to be done before `contract_upgrade_block_heights` which is 12, by default in the tests
     let deposit_hash = deposit_stake(
         &mut network,
         &wallet,
@@ -532,7 +532,6 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
     );
 
     // Give new node time to catch up to block including deposit_v3 deployment
-    network.run_until_synced(index).await;
     network
         .run_until_block(&staker_wallet, deposit_v3_deploy_block.into(), 200)
         .await;

From 4449e64c07b2b29426dbea87c75ec56202f7c8e4 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 27 Jan 2025 10:08:58 +0800
Subject: [PATCH 100/119] fix PR comments: -
 https://github.com/Zilliqa/zq2/pull/2089/files#r1926920199 -
 https://github.com/Zilliqa/zq2/pull/2089/files#r1926894987 -
 https://github.com/Zilliqa/zq2/pull/2089/files#r1927243328 -
 https://github.com/Zilliqa/zq2/pull/2089/files#r1927261156

---
 zilliqa/src/consensus.rs      | 111 ++++++++++++----------------------
 zilliqa/src/message.rs        |   4 +-
 zilliqa/src/sync.rs           |  29 ++++-----
 zilliqa/tests/it/consensus.rs |   1 -
 4 files changed, 51 insertions(+), 94 deletions(-)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index 08f1b3a98..f7408beb5 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -659,10 +659,8 @@ impl Consensus {
             return Ok(None);
         }
 
-        if let Err((e, temporary)) = self.check_block(&block, during_sync) {
-            if !temporary {
-                warn!(?e, "invalid block proposal received!");
-            }
+        if let Err(e) = self.check_block(&block, during_sync) {
+            warn!(?e, "invalid block proposal received!");
             return Ok(None);
         }
 
@@ -2225,41 +2223,32 @@ impl Consensus {
 
     /// Check the validity of a block. Returns `Err(_, true)` if this block could become valid in the future and
     /// `Err(_, false)` if this block could never be valid.
-    fn check_block(&self, block: &Block, during_sync: bool) -> Result<(), (anyhow::Error, bool)> {
-        block.verify_hash().map_err(|e| (e, false))?;
+    fn check_block(&self, block: &Block, during_sync: bool) -> Result<()> {
+        block.verify_hash()?;
 
         if block.view() == 0 {
             // We only check a block if we receive it from an external source. We obviously already have the genesis
             // block, so we aren't ever expecting to receive it.
-            return Err((anyhow!("tried to check genesis block"), false));
+            return Err(anyhow!("tried to check genesis block"));
         }
 
-        let Some(parent) = self
-            .get_block(&block.parent_hash())
-            .map_err(|e| (e, false))?
-        else {
+        let Some(parent) = self.get_block(&block.parent_hash())? else {
             warn!(
                 "Missing parent block while trying to check validity of block number {}",
                 block.number()
             );
-            return Err((MissingBlockError::from(block.parent_hash()).into(), true));
+            return Err(MissingBlockError::from(block.parent_hash()).into());
         };
 
-        let finalized_view = self.get_finalized_view().map_err(|e| (e, false))?;
-        let Some(finalized_block) = self
-            .get_block_by_view(finalized_view)
-            .map_err(|e| (e, false))?
-        else {
-            return Err((MissingBlockError::from(finalized_view).into(), false));
+        let finalized_view = self.get_finalized_view()?;
+        let Some(finalized_block) = self.get_block_by_view(finalized_view)? else {
+            return Err(MissingBlockError::from(finalized_view).into());
         };
         if block.view() < finalized_block.view() {
-            return Err((
-                anyhow!(
-                    "block is too old: view is {} but we have finalized {}",
-                    block.view(),
-                    finalized_block.view()
-                ),
-                false,
+            return Err(anyhow!(
+                "block is too old: view is {} but we have finalized {}",
+                block.view(),
+                finalized_block.view()
             ));
         }
 
@@ -2274,12 +2263,11 @@ impl Consensus {
         let committee = self
             .state
             .at_root(parent.state_root_hash().into())
-            .get_stakers(block.header)
-            .map_err(|e| (e, false))?;
+            .get_stakers(block.header)?;
 
         if verified.is_err() {
             info!(?block, "Unable to verify block = ");
-            return Err((anyhow!("invalid block signature found! block hash: {:?} block view: {:?} committee len {:?}", block.hash(), block.view(), committee.len()), false));
+            return Err(anyhow!("invalid block signature found! block hash: {:?} block view: {:?} committee len {:?}", block.hash(), block.view(), committee.len()));
         }
 
         // Check if the co-signers of the block's QC represent the supermajority.
@@ -2288,13 +2276,11 @@ impl Consensus {
             &committee,
             parent.state_root_hash(),
             block,
-        )
-        .map_err(|e| (e, false))?;
+        )?;
 
         // Verify the block's QC signature - note the parent should be the committee the QC
         // was signed over.
-        self.verify_qc_signature(&block.header.qc, committee.clone())
-            .map_err(|e| (e, false))?;
+        self.verify_qc_signature(&block.header.qc, committee.clone())?;
         if let Some(agg) = &block.agg {
             // Check if the signers of the block's aggregate QC represent the supermajority
             self.check_quorum_in_indices(
@@ -2302,24 +2288,16 @@ impl Consensus {
                 &committee,
                 parent.state_root_hash(),
                 block,
-            )
-            .map_err(|e| (e, false))?;
+            )?;
             // Verify the aggregate QC's signature
-            self.batch_verify_agg_signature(agg, &committee)
-                .map_err(|e| (e, false))?;
+            self.batch_verify_agg_signature(agg, &committee)?;
         }
 
         // Retrieve the highest among the aggregated QCs and check if it equals the block's QC.
-        let block_high_qc = self.get_high_qc_from_block(block).map_err(|e| (e, false))?;
-        let Some(block_high_qc_block) = self
-            .get_block(&block_high_qc.block_hash)
-            .map_err(|e| (e, false))?
-        else {
+        let block_high_qc = self.get_high_qc_from_block(block)?;
+        let Some(block_high_qc_block) = self.get_block(&block_high_qc.block_hash)? else {
             warn!("missing finalized block4");
-            return Err((
-                MissingBlockError::from(block_high_qc.block_hash).into(),
-                false,
-            ));
+            return Err(MissingBlockError::from(block_high_qc.block_hash).into());
         };
         // Prevent the creation of forks from the already committed chain
         if block_high_qc_block.view() < finalized_block.view() {
@@ -2329,19 +2307,16 @@ impl Consensus {
                 finalized_block.view(),
                 self.high_qc,
                 block);
-            return Err((
-                anyhow!(
-                    "invalid block - high QC view is {} while finalized is {}",
-                    block_high_qc_block.view(),
-                    finalized_block.view()
-                ),
-                false,
+            return Err(anyhow!(
+                "invalid block - high QC view is {} while finalized is {}",
+                block_high_qc_block.view(),
+                finalized_block.view()
             ));
         }
 
         // This block's timestamp must be greater than or equal to the parent block's timestamp.
         if block.timestamp() < parent.timestamp() {
-            return Err((anyhow!("timestamp decreased from parent"), false));
+            return Err(anyhow!("timestamp decreased from parent"));
         }
 
         // This block's timestamp should be at most `self.allowed_timestamp_skew` away from the current time. Note this
@@ -2351,31 +2326,22 @@ impl Consensus {
             .elapsed()
             .unwrap_or_else(|err| err.duration());
         if !during_sync && difference > self.config.allowed_timestamp_skew {
-            return Err((
-                anyhow!(
-                    "timestamp difference for block {} greater than allowed skew: {difference:?}",
-                    block.view()
-                ),
-                false,
+            return Err(anyhow!(
+                "timestamp difference for block {} greater than allowed skew: {difference:?}",
+                block.view()
             ));
         }
 
         // Blocks must be in sequential order
         if block.header.number != parent.header.number + 1 {
-            return Err((
-                anyhow!(
-                    "block number is not sequential: {} != {} + 1",
-                    block.header.number,
-                    parent.header.number
-                ),
-                false,
+            return Err(anyhow!(
+                "block number is not sequential: {} != {} + 1",
+                block.header.number,
+                parent.header.number
             ));
         }
 
-        if !self
-            .block_extends_from(block, &finalized_block)
-            .map_err(|e| (e, false))?
-        {
+        if !self.block_extends_from(block, &finalized_block)? {
             warn!(
                 "invalid block {:?}, does not extend finalized block {:?} our head is {:?}",
                 block,
@@ -2383,9 +2349,8 @@ impl Consensus {
                 self.head_block()
             );
 
-            return Err((
-                anyhow!("invalid block, does not extend from finalized block"),
-                false,
+            return Err(anyhow!(
+                "invalid block, does not extend from finalized block"
             ));
         }
         Ok(())
diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index fca919a3e..8f4a9b28c 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -228,7 +228,7 @@ impl fmt::Debug for BlockResponse {
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct BlockRequestV2 {
+pub struct RequestBlocksByHeight {
     pub request_at: SystemTime,
     pub from_height: u64,
     pub to_height: u64,
@@ -277,7 +277,7 @@ pub enum ExternalMessage {
     AddPeer,
     RemovePeer,
     InjectedProposal(InjectedProposal),
-    MetaDataRequest(BlockRequestV2),
+    MetaDataRequest(RequestBlocksByHeight),
     MetaDataResponse(Vec<BlockHeader>),
     MultiBlockRequest(Vec<Hash>),
     MultiBlockResponse(Vec<Proposal>),
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index e8bdfb1f4..88ad98bcb 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -21,8 +21,8 @@ use crate::{
     crypto::Hash,
     db::Db,
     message::{
-        Block, BlockHeader, BlockRequest, BlockRequestV2, BlockResponse, ExternalMessage,
-        InjectedProposal, Proposal, QuorumCertificate,
+        Block, BlockHeader, BlockRequest, BlockResponse, ExternalMessage, InjectedProposal,
+        Proposal, QuorumCertificate, RequestBlocksByHeight,
     },
     node::MessageSender,
     time::SystemTime,
@@ -100,9 +100,6 @@ impl Sync {
     #[cfg(debug_assertions)]
     const DO_SPECULATIVE: bool = false;
 
-    // Minimum of 2 peers to avoid single source of truth.
-    const MIN_PEERS: usize = 2;
-
     pub fn new(
         config: &NodeConfig,
         db: Arc<Db>,
@@ -895,7 +892,7 @@ impl Sync {
     pub fn handle_metadata_request(
         &mut self,
         from: PeerId,
-        request: BlockRequestV2,
+        request: RequestBlocksByHeight,
     ) -> Result<ExternalMessage> {
         tracing::debug!(
             "sync::MetadataRequest : received a metadata request from {}",
@@ -981,7 +978,7 @@ impl Sync {
                         ..
                     }),
                     PeerVer::V2,
-                ) => ExternalMessage::MetaDataRequest(BlockRequestV2 {
+                ) => ExternalMessage::MetaDataRequest(RequestBlocksByHeight {
                     request_at: SystemTime::now(),
                     to_height: block_number.saturating_sub(1),
                     from_height: block_number.saturating_sub(self.max_batch_size as u64),
@@ -1005,7 +1002,7 @@ impl Sync {
                     let meta = meta.unwrap();
                     let block_number = meta.number;
                     self.state = SyncState::Phase1(meta);
-                    ExternalMessage::MetaDataRequest(BlockRequestV2 {
+                    ExternalMessage::MetaDataRequest(RequestBlocksByHeight {
                         request_at: SystemTime::now(),
                         to_height: block_number.sub(1),
                         from_height: block_number.sub(self.max_batch_size as u64),
@@ -1148,16 +1145,12 @@ impl Sync {
 
     /// Get the next best peer to use
     fn get_next_peer(&mut self) -> Option<PeerInfo> {
-        if self.peers.len() >= Self::MIN_PEERS {
-            let mut peer = self.peers.pop()?;
-            peer.last_used = std::time::Instant::now();
-            // dynamic sizing should not be needed, if we're syncing recent blocks.
-            // self.max_batch_size = self.dynamic_batch_sizing(&peer);
-            tracing::trace!("sync::GetNextPeer {} ({})", peer.peer_id, peer.score);
-            return Some(peer);
-        }
-        tracing::warn!("sync::NextPeer : {} insufficient peers", self.peers.len());
-        None
+        let mut peer = self.peers.pop()?;
+        peer.last_used = std::time::Instant::now();
+        // dynamic sizing should not be needed, if we're syncing recent blocks.
+        // self.max_batch_size = self.dynamic_batch_sizing(&peer);
+        tracing::trace!("sync::GetNextPeer {} ({})", peer.peer_id, peer.score);
+        Some(peer)
     }
 
     /// Phase 1: Dynamic Batch Sizing
diff --git a/zilliqa/tests/it/consensus.rs b/zilliqa/tests/it/consensus.rs
index e456f52c3..6946a5e17 100644
--- a/zilliqa/tests/it/consensus.rs
+++ b/zilliqa/tests/it/consensus.rs
@@ -106,7 +106,6 @@ async fn block_production(mut network: Network) {
 
     info!("Adding networked node.");
     let index = network.add_node();
-    network.run_until_synced(index).await;
 
     network
         .run_until(

From 108887c570d1f71bce3e1af132e71ba8ecd483e8 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 27 Jan 2025 15:56:57 +0800
Subject: [PATCH 101/119] feat: moved sync db layer from sync.rs to db.rs

---
 zilliqa/src/db.rs           | 163 ++++++++++++++++++++++++++++++-
 zilliqa/src/sync.rs         | 186 ++++--------------------------------
 zilliqa/tests/it/staking.rs |   2 +-
 3 files changed, 181 insertions(+), 170 deletions(-)

diff --git a/zilliqa/src/db.rs b/zilliqa/src/db.rs
index aba4e0968..b0ca82f97 100644
--- a/zilliqa/src/db.rs
+++ b/zilliqa/src/db.rs
@@ -6,13 +6,14 @@ use std::{
     ops::Range,
     path::{Path, PathBuf},
     sync::{Arc, Mutex},
-    time::Duration,
+    time::{Duration, Instant},
 };
 
 use alloy::primitives::Address;
 use anyhow::{anyhow, Context, Result};
 use eth_trie::{EthTrie, MemoryDB, Trie, DB};
 use itertools::Itertools;
+use libp2p::PeerId;
 use lru_mem::LruCache;
 use lz4::{Decoder, EncoderBuilder};
 use rusqlite::{
@@ -28,6 +29,7 @@ use crate::{
     exec::{ScillaError, ScillaException, ScillaTransition},
     message::{AggregateQc, Block, BlockHeader, QuorumCertificate},
     state::Account,
+    sync::PeerInfo,
     time::SystemTime,
     transaction::{EvmGas, Log, SignedTransaction, TransactionReceipt},
 };
@@ -326,6 +328,19 @@ impl Db {
             CREATE TABLE IF NOT EXISTS state_trie (key BLOB NOT NULL PRIMARY KEY, value BLOB NOT NULL) WITHOUT ROWID;
             ",
         )?;
+        connection.execute_batch(
+            "CREATE TEMP TABLE IF NOT EXISTS sync_data (
+            block_hash BLOB NOT NULL UNIQUE,
+            parent_hash BLOB NOT NULL,
+            block_number INTEGER NOT NULL PRIMARY KEY,
+            view_number INTEGER NOT NULL,
+            gas_used INTEGER NOT NULL,
+            version INTEGER DEFAULT 0,
+            peer BLOB DEFAULT NULL
+        );
+        CREATE INDEX IF NOT EXISTS idx_sync_data ON sync_data(block_number) WHERE peer IS NOT NULL;",
+        )?;
+
         Ok(())
     }
 
@@ -340,6 +355,152 @@ impl Db {
         Ok(Some(base_path.join("checkpoints").into_boxed_path()))
     }
 
+    /// Returns the number of stored sync segments
+    pub fn count_sync_segments(&self) -> Result<usize> {
+        Ok(self
+            .db
+            .lock()
+            .unwrap()
+            .prepare_cached("SELECT COUNT(block_number) FROM sync_data WHERE peer IS NOT NULL")?
+            .query_row([], |row| row.get(0))
+            .optional()?
+            .unwrap_or_default())
+    }
+
+    /// Checks if the stored metadata exists
+    pub fn contains_sync_metadata(&self, hash: &Hash) -> Result<bool> {
+        Ok(self
+            .db
+            .lock()
+            .unwrap()
+            .prepare_cached("SELECT block_number FROM sync_data WHERE block_hash = ?1")?
+            .query_row([hash], |row| row.get::<_, u64>(0))
+            .optional()?
+            .is_some())
+    }
+
+    /// Retrieves bulk metadata information from the given block_hash (inclusive)
+    pub fn get_sync_segment(&self, hash: Hash) -> Result<Vec<Hash>> {
+        let db = self.db.lock().unwrap();
+
+        let mut hashes = Vec::new();
+        let mut block_hash = hash;
+
+        while let Some(parent_hash) = db
+            .prepare_cached("SELECT parent_hash FROM sync_data WHERE block_hash = ?1")?
+            .query_row([block_hash], |row| row.get::<_, Hash>(0))
+            .optional()?
+        {
+            hashes.push(block_hash);
+            block_hash = parent_hash;
+        }
+        Ok(hashes)
+    }
+
+    /// Peeks into the top of the segment stack.
+    pub fn last_sync_segment(&self) -> Result<Option<(BlockHeader, PeerInfo)>> {
+        let db = self.db.lock().unwrap();
+        let r = db.prepare_cached("SELECT parent_hash, block_hash, block_number, view_number, gas_used, version, peer FROM sync_data WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
+        .query_row([], |row| Ok((
+            BlockHeader::from_meta_data(row.get(0)?,row.get(1)?, row.get(2)?, row.get(3)?, row.get(4)?),
+        PeerInfo {
+            last_used: Instant::now(),
+            score: u32::MAX,
+            version: row.get(5)?,
+            peer_id: PeerId::from_bytes(row.get::<_,Vec<u8>>(6)?.as_slice()).unwrap(),
+        }))).optional()?;
+        Ok(r)
+    }
+
+    /// Pushes a particular segment into the stack.
+    pub fn push_sync_segment(&self, peer: PeerInfo, meta: BlockHeader) -> Result<()> {
+        let db = self.db.lock().unwrap();
+        db.prepare_cached(
+                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number, gas_used, version, peer) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :gas_used, :version, :peer)")?
+                .execute(
+                named_params! {
+                    ":parent_hash": meta.qc.block_hash,
+                    ":block_hash": meta.hash,
+                    ":block_number": meta.number,
+                    ":view_number": meta.view,
+                    ":gas_used": meta.gas_used,
+                    ":peer": peer.peer_id.to_bytes(),
+                    ":version": peer.version,
+                },
+            )?;
+        Ok(())
+    }
+
+    /// Bulk inserts a bunch of metadata.
+    pub fn insert_sync_metadata(&self, metas: &Vec<BlockHeader>) -> Result<()> {
+        let mut db = self.db.lock().unwrap();
+        let tx = db.transaction()?;
+
+        for meta in metas {
+            tx.prepare_cached(
+                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number, gas_used) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :gas_used)")?
+                .execute(
+                named_params! {
+                    ":parent_hash": meta.qc.block_hash,
+                    ":block_hash": meta.hash,
+                    ":block_number": meta.number,
+                    ":view_number": meta.view,
+                    ":gas_used": meta.gas_used,
+            })?;
+        }
+        tx.commit()?;
+        Ok(())
+    }
+
+    /// Empty the metadata table.
+    pub fn empty_sync_metadata(&self) -> Result<()> {
+        self.db
+            .lock()
+            .unwrap()
+            .execute("DELETE FROM sync_data", [])?;
+        Ok(())
+    }
+
+    /// Pops a segment from the stack; and bulk removes all metadata associated with it.
+    pub fn pop_sync_segment(&self) -> Result<()> {
+        let mut db = self.db.lock().unwrap();
+        let c = db.transaction()?;
+
+        if let Some(block_hash) = c.prepare_cached("SELECT block_hash FROM sync_data WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
+        .query_row([], |row| row.get::<_,Hash>(0)).optional()? {
+            if let Some(parent_hash) = c.prepare_cached("SELECT parent_hash FROM sync_data WHERE block_hash = ?1")?
+            .query_row([block_hash], |row| row.get(0)).optional()? {
+
+            // update marker
+            c.prepare_cached(
+                "UPDATE sync_data SET peer = NULL WHERE block_hash = ?1")?
+                .execute(
+                [block_hash]
+            )?;
+
+            // remove segment                
+            let mut hashes = Vec::new();
+            let mut block_hash = parent_hash;
+            while let Some(parent_hash) = c
+                    .prepare_cached("SELECT parent_hash FROM sync_data WHERE block_hash = ?1")?
+                    .query_row([block_hash], |row| row.get::<_, Hash>(0))
+                    .optional()?
+                {
+                    hashes.push(block_hash);
+                    block_hash = parent_hash;
+                }
+
+            for hash in hashes {
+                c.prepare_cached("DELETE FROM sync_data WHERE block_hash = ?1")?
+                .execute([hash])?;
+            }
+            }
+        }
+
+        c.commit()?;
+        Ok(())
+    }
+
     /// Fetch checkpoint data from file and initialise db state
     /// Return checkpointed block and transactions which must be executed after this function
     /// Return None if checkpoint already loaded
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 88ad98bcb..0bb10c7cb 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -10,11 +10,7 @@ use alloy::primitives::BlockNumber;
 use anyhow::Result;
 use itertools::Itertools;
 use libp2p::PeerId;
-use rusqlite::{
-    named_params,
-    types::{FromSql, FromSqlResult, ToSql, ToSqlOutput, ValueRef},
-    OptionalExtension,
-};
+use rusqlite::types::{FromSql, FromSqlResult, ToSql, ToSqlOutput, ValueRef};
 
 use crate::{
     cfg::NodeConfig,
@@ -110,37 +106,11 @@ impl Sync {
         let max_batch_size = config.block_request_batch_size.clamp(30, 180); // up to 180 sec of blocks at a time.
         let max_blocks_in_flight = config.max_blocks_in_flight.clamp(max_batch_size, 1800); // up to 30-mins worth of blocks in-pipeline.
 
-        // This in-memory DB is placed here as it is only used in this module.
-        db.with_sqlite_tx(|c| {
-            c.execute_batch(
-                "CREATE TEMP TABLE IF NOT EXISTS sync_data (
-                block_hash BLOB NOT NULL UNIQUE,
-                parent_hash BLOB NOT NULL,
-                block_number INTEGER NOT NULL PRIMARY KEY,
-                view_number INTEGER NOT NULL,
-                gas_used INTEGER NOT NULL,
-                version INTEGER DEFAULT 0,
-                peer BLOB DEFAULT NULL
-            );
-            CREATE INDEX IF NOT EXISTS idx_sync_data ON sync_data(block_number) WHERE peer IS NOT NULL;",
-            )?;
-            Ok(())
-        })?;
-
-        // Restore metadata/segments
-        let mut segments = 0;
-        db.with_sqlite_tx(|c| {
-            segments = c
-                .prepare_cached("SELECT COUNT(block_number) FROM sync_data WHERE peer IS NOT NULL")?
-                .query_row([], |row| row.get::<_, usize>(0))
-                .optional()?
-                .unwrap_or_default();
-            Ok(())
-        })?;
-        let state = if segments == 0 {
+        // Start from reset, or continue sync
+        let state = if db.count_sync_segments()? == 0 {
             SyncState::Phase0
         } else {
-            SyncState::Retry1
+            SyncState::Retry1 // continue sync
         };
 
         let latest_block_number = latest_block
@@ -167,156 +137,36 @@ impl Sync {
         })
     }
 
-    /// Returns the number of stored segments
     fn count_segments(&self) -> Result<usize> {
-        let mut segments = 0;
-        self.db.with_sqlite_tx(|c| {
-            segments = c
-                .prepare_cached("SELECT COUNT(block_number) FROM sync_data WHERE peer IS NOT NULL")?
-                .query_row([], |row| row.get(0))
-                .optional()?
-                .unwrap_or_default();
-            Ok(())
-        })?;
-        Ok(segments)
+        self.db.count_sync_segments()
     }
 
-    /// Checks if the stored metadata exists
     fn contains_metadata(&self, hash: &Hash) -> Result<bool> {
-        let mut result = false;
-        self.db.with_sqlite_tx(|c| {
-            result = c
-                .prepare_cached("SELECT block_number FROM sync_data WHERE block_hash = ?1")?
-                .query_row([hash], |row| row.get::<_, u64>(0))
-                .optional()?
-                .is_some();
-            Ok(())
-        })?;
-        Ok(result)
+        self.db.contains_sync_metadata(hash)
     }
 
-    /// Retrieves bulk metadata information from the given block_hash (inclusive)
     fn get_segment(&self, hash: Hash) -> Result<Vec<Hash>> {
-        let mut hashes = Vec::with_capacity(self.max_batch_size);
-        let mut block_hash = hash;
-        self.db.with_sqlite_tx(|c| {
-            while let Some(parent_hash) = c
-                .prepare_cached("SELECT parent_hash FROM sync_data WHERE block_hash = ?1")?
-                .query_row([block_hash], |row| row.get::<_, Hash>(0))
-                .optional()?
-            {
-                hashes.push(block_hash);
-                block_hash = parent_hash;
-            }
-            Ok(())
-        })?;
-        Ok(hashes)
+        self.db.get_sync_segment(hash)
     }
 
-    /// Peeks into the top of the segment stack.
     fn last_segment(&self) -> Result<Option<(BlockHeader, PeerInfo)>> {
-        let mut result = None;
-        self.db.with_sqlite_tx(|c| {
-            result = c
-                .prepare_cached("SELECT parent_hash, block_hash, block_number, view_number, gas_used, version, peer FROM sync_data WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
-                .query_row([], |row| Ok((
-                    BlockHeader::from_meta_data(row.get(0)?,row.get(1)?, row.get(2)?, row.get(3)?, row.get(4)?),
-                PeerInfo {
-                    last_used: Instant::now(),
-                    score:u32::MAX,
-                    version: row.get(5)?,
-                    peer_id: PeerId::from_bytes(row.get::<_,Vec<u8>>(6)?.as_slice()).unwrap(),
-                },
-            )))
-                .optional()?;
-            Ok(())
-        })?;
-        Ok(result)
+        self.db.last_sync_segment()
     }
 
-    /// Pops a segment from the stack; and bulk removes all metadata associated with it.
     fn pop_segment(&self) -> Result<()> {
-        self.db.with_sqlite_tx(|c| {
-            if let Some(block_hash) = c.prepare_cached("SELECT block_hash FROM sync_data WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
-            .query_row([], |row| row.get::<_,Hash>(0)).optional()? {
-                if let Some(parent_hash) = c.prepare_cached("SELECT parent_hash FROM sync_data WHERE block_hash = ?1")?
-                .query_row([block_hash], |row| row.get(0)).optional()? {
-
-                // update marker
-                c.prepare_cached(
-                    "UPDATE sync_data SET peer = NULL WHERE block_hash = ?1")?
-                    .execute(
-                    [block_hash]
-                )?;
-
-                // remove segment                
-                let mut hashes = Vec::with_capacity(self.max_batch_size);
-                let mut block_hash = parent_hash;
-                while let Some(parent_hash) = c
-                        .prepare_cached("SELECT parent_hash FROM sync_data WHERE block_hash = ?1")?
-                        .query_row([block_hash], |row| row.get::<_, Hash>(0))
-                        .optional()?
-                    {
-                        hashes.push(block_hash);
-                        block_hash = parent_hash;
-                    }
-
-                for hash in hashes {
-                    c.prepare_cached("DELETE FROM sync_data WHERE block_hash = ?1")?
-                    .execute([hash])?;
-                }
-                }
-            }
-            Ok(())
-        })
+        self.db.pop_sync_segment()
     }
 
-    /// Pushes a particular segment into the stack.
     fn push_segment(&self, peer: PeerInfo, meta: BlockHeader) -> Result<()> {
-        self.db.with_sqlite_tx(|c| {
-            c.prepare_cached(
-                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number, gas_used, version, peer) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :gas_used, :version, :peer)")?
-                .execute(
-                named_params! {
-                    ":parent_hash": meta.qc.block_hash,
-                    ":block_hash": meta.hash,
-                    ":block_number": meta.number,
-                    ":view_number": meta.view,
-                    ":gas_used": meta.gas_used,
-                    ":peer": peer.peer_id.to_bytes(),
-                    ":version": peer.version,
-                },
-            )?;
-            Ok(())
-        })
+        self.db.push_sync_segment(peer, meta)
     }
 
-    /// Bulk inserts a bunch of metadata.
     fn insert_metadata(&self, metas: &Vec<BlockHeader>) -> Result<()> {
-        self.db.with_sqlite_tx(|c| {
-            for meta in metas {
-            c.prepare_cached(
-                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number, gas_used) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :gas_used)")?
-                .execute(
-                named_params! {
-                    ":parent_hash": meta.qc.block_hash,
-                    ":block_hash": meta.hash,
-                    ":block_number": meta.number,
-                    ":view_number": meta.view,
-                    ":gas_used": meta.gas_used,
-            },
-            )?;
-        }
-            Ok(())
-        })
+        self.db.insert_sync_metadata(metas)
     }
 
-    /// Empty the metadata table.
     fn empty_metadata(&self) -> Result<()> {
-        self.db.with_sqlite_tx(|c| {
-            c.execute("DELETE FROM sync_data", [])?;
-            Ok(())
-        })
+        self.db.empty_sync_metadata()
     }
 
     /// Phase 0: Sync a block proposal.
@@ -1218,11 +1068,11 @@ impl Sync {
 }
 
 #[derive(Debug, Clone, Eq, PartialEq)]
-struct PeerInfo {
-    score: u32,
-    peer_id: PeerId,
-    last_used: Instant,
-    version: PeerVer,
+pub struct PeerInfo {
+    pub score: u32,
+    pub peer_id: PeerId,
+    pub last_used: Instant,
+    pub version: PeerVer,
 }
 
 impl Ord for PeerInfo {
@@ -1274,7 +1124,7 @@ enum SyncState {
 
 /// Peer Version
 #[derive(Debug, Clone, Eq, PartialEq)]
-enum PeerVer {
+pub enum PeerVer {
     V1 = 1,
     V2 = 2,
 }
diff --git a/zilliqa/tests/it/staking.rs b/zilliqa/tests/it/staking.rs
index 179b46d84..2fcaab5d3 100644
--- a/zilliqa/tests/it/staking.rs
+++ b/zilliqa/tests/it/staking.rs
@@ -430,7 +430,7 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
     let wallet = network.genesis_wallet().await;
 
     // randomise the current epoch state and current leader
-    let blocks_to_prerun = network.rng.lock().unwrap().gen_range(0..8);
+    let blocks_to_prerun = network.rng.lock().unwrap().gen_range(0..4);
     network
         .run_until_block(&wallet, blocks_to_prerun.into(), 200)
         .await;

From 40c84ce977970221d722c15e4fb1d97302d70ffe Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 27 Jan 2025 17:58:39 +0800
Subject: [PATCH 102/119] feat: moved internal sync-peers to SyncPeers
 shared-state. https://github.com/Zilliqa/zq2/pull/2089/files#r1927206432

---
 zilliqa/src/consensus.rs     |  13 ++-
 zilliqa/src/node.rs          |  18 +--
 zilliqa/src/node_launcher.rs |  10 +-
 zilliqa/src/p2p_node.rs      |  16 ++-
 zilliqa/src/sync.rs          | 214 +++++++++++++++++++----------------
 5 files changed, 155 insertions(+), 116 deletions(-)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index f7408beb5..bc044c001 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -36,7 +36,7 @@ use crate::{
     node::{MessageSender, NetworkMessage, OutgoingMessageFailure},
     pool::{TransactionPool, TxAddResult, TxPoolContent},
     state::State,
-    sync::Sync,
+    sync::{Sync, SyncPeers},
     time::SystemTime,
     transaction::{EvmGas, SignedTransaction, TransactionReceipt, VerifiedTransaction},
 };
@@ -187,6 +187,7 @@ impl Consensus {
         message_sender: MessageSender,
         reset_timeout: UnboundedSender<Duration>,
         db: Arc<Db>,
+        peers: Arc<SyncPeers>,
     ) -> Result<Self> {
         trace!(
             "Opening database in {:?} for shard {}",
@@ -312,7 +313,13 @@ impl Consensus {
             }
         };
 
-        let sync = Sync::new(&config, db.clone(), &latest_block, message_sender.clone())?;
+        let sync = Sync::new(
+            &config,
+            db.clone(),
+            &latest_block,
+            message_sender.clone(),
+            peers.clone(),
+        )?;
 
         let mut consensus = Consensus {
             secret_key,
@@ -411,7 +418,7 @@ impl Consensus {
                 })
                 .collect_vec();
 
-            consensus.sync.add_peers(recent_peer_ids);
+            peers.add_peers(recent_peer_ids);
         }
 
         Ok(consensus)
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index d04a7a7ec..edd6f835f 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -40,6 +40,7 @@ use crate::{
     p2p_node::{LocalMessageTuple, OutboundMessageTuple},
     pool::{TxAddResult, TxPoolContent},
     state::State,
+    sync::SyncPeers,
     transaction::{
         EvmGas, SignedTransaction, TransactionReceipt, TxIntershard, VerifiedTransaction,
     },
@@ -170,6 +171,7 @@ impl ChainId {
 }
 
 impl Node {
+    #[allow(clippy::too_many_arguments)]
     pub fn new(
         config: NodeConfig,
         secret_key: SecretKey,
@@ -178,6 +180,7 @@ impl Node {
         request_responses: UnboundedSender<(ResponseChannel, ExternalMessage)>,
         reset_timeout: UnboundedSender<Duration>,
         peer_num: Arc<AtomicUsize>,
+        peers: Arc<SyncPeers>,
     ) -> Result<Node> {
         config.validate()?;
         let peer_id = secret_key.to_libp2p_keypair().public().to_peer_id();
@@ -201,7 +204,14 @@ impl Node {
             reset_timeout: reset_timeout.clone(),
             db: db.clone(),
             chain_id: ChainId::new(config.eth_chain_id),
-            consensus: Consensus::new(secret_key, config, message_sender, reset_timeout, db)?,
+            consensus: Consensus::new(
+                secret_key,
+                config,
+                message_sender,
+                reset_timeout,
+                db,
+                peers,
+            )?,
             peer_num,
         };
         Ok(node)
@@ -227,12 +237,6 @@ impl Node {
                         )))?;
                 }
             }
-            ExternalMessage::AddPeer => {
-                self.consensus.sync.add_peer(from);
-            }
-            ExternalMessage::RemovePeer => {
-                self.consensus.sync.remove_peer(from);
-            }
             // `Proposals` are re-routed to `handle_request()`
             _ => {
                 warn!("unexpected message type");
diff --git a/zilliqa/src/node_launcher.rs b/zilliqa/src/node_launcher.rs
index 747208918..50d10cf52 100644
--- a/zilliqa/src/node_launcher.rs
+++ b/zilliqa/src/node_launcher.rs
@@ -32,6 +32,7 @@ use crate::{
     message::{ExternalMessage, InternalMessage},
     node::{self, OutgoingMessageFailure},
     p2p_node::{LocalMessageTuple, OutboundMessageTuple},
+    sync::SyncPeers,
 };
 
 pub struct NodeLauncher {
@@ -96,7 +97,7 @@ impl NodeLauncher {
         local_outbound_message_sender: UnboundedSender<LocalMessageTuple>,
         request_responses_sender: UnboundedSender<(ResponseChannel, ExternalMessage)>,
         peer_num: Arc<AtomicUsize>,
-    ) -> Result<(Self, NodeInputChannels)> {
+    ) -> Result<(Self, NodeInputChannels, Arc<SyncPeers>)> {
         /// Helper to create a (sender, receiver) pair for a channel.
         fn sender_receiver<T>() -> (UnboundedSender<T>, UnboundedReceiverStream<T>) {
             let (sender, receiver) = mpsc::unbounded_channel();
@@ -110,6 +111,9 @@ impl NodeLauncher {
         let (local_messages_sender, local_messages_receiver) = sender_receiver();
         let (reset_timeout_sender, reset_timeout_receiver) = sender_receiver();
 
+        let peer_id = secret_key.to_libp2p_keypair().public().to_peer_id();
+        let peers: Arc<SyncPeers> = Arc::new(SyncPeers::new(peer_id));
+
         let node = Node::new(
             config.clone(),
             secret_key,
@@ -118,7 +122,9 @@ impl NodeLauncher {
             request_responses_sender,
             reset_timeout_sender.clone(),
             peer_num,
+            peers.clone(),
         )?;
+
         let node = Arc::new(Mutex::new(node));
 
         for api_server in &config.api_servers {
@@ -168,7 +174,7 @@ impl NodeLauncher {
             local_messages: local_messages_sender,
         };
 
-        Ok((launcher, input_channels))
+        Ok((launcher, input_channels, peers))
     }
 
     pub async fn start_shard_node(&mut self) -> Result<()> {
diff --git a/zilliqa/src/p2p_node.rs b/zilliqa/src/p2p_node.rs
index 6fd941b23..24cc271ad 100644
--- a/zilliqa/src/p2p_node.rs
+++ b/zilliqa/src/p2p_node.rs
@@ -37,6 +37,7 @@ use crate::{
     message::{ExternalMessage, InternalMessage},
     node::{OutgoingMessageFailure, RequestId},
     node_launcher::{NodeInputChannels, NodeLauncher, ResponseChannel},
+    sync::SyncPeers,
 };
 
 /// Messages are a tuple of the destination shard ID and the actual message.
@@ -61,6 +62,7 @@ pub type OutboundMessageTuple = (Option<(PeerId, RequestId)>, u64, ExternalMessa
 pub type LocalMessageTuple = (u64, u64, InternalMessage);
 
 pub struct P2pNode {
+    shard_peers: HashMap<TopicHash, Arc<SyncPeers>>,
     shard_nodes: HashMap<TopicHash, NodeInputChannels>,
     shard_threads: JoinSet<Result<()>>,
     task_threads: JoinSet<Result<()>>,
@@ -148,6 +150,7 @@ impl P2pNode {
             .build();
 
         Ok(Self {
+            shard_peers: HashMap::new(),
             shard_nodes: HashMap::new(),
             peer_id,
             secret_key,
@@ -194,7 +197,7 @@ impl P2pNode {
             info!("LaunchShard message received for a shard we're already running. Ignoring...");
             return Ok(());
         }
-        let (mut node, input_channels) = NodeLauncher::new(
+        let (mut node, input_channels, peers) = NodeLauncher::new(
             self.secret_key,
             config,
             self.outbound_message_sender.clone(),
@@ -203,6 +206,7 @@ impl P2pNode {
             self.peer_num.clone(),
         )
         .await?;
+        self.shard_peers.insert(topic.hash(), peers);
         self.shard_nodes.insert(topic.hash(), input_channels);
         self.shard_threads
             .spawn(async move { node.start_shard_node().await });
@@ -266,12 +270,14 @@ impl P2pNode {
                                 .add_address(&peer_id, address.clone());
                         }
                         SwarmEvent::Behaviour(BehaviourEvent::Gossipsub(gossipsub::Event::Subscribed { peer_id, topic })) => {
-                            let message = ExternalMessage::AddPeer;
-                            self.send_to(&topic, |c| c.broadcasts.send((peer_id, message)))?;
+                            if let Some(peers) = self.shard_peers.get(&topic) {
+                                peers.add_peer(peer_id);
+                            }
                         }
                         SwarmEvent::Behaviour(BehaviourEvent::Gossipsub(gossipsub::Event::Unsubscribed { peer_id, topic })) => {
-                            let message = ExternalMessage::RemovePeer;
-                            self.send_to(&topic, |c| c.broadcasts.send((peer_id, message)))?;
+                            if let Some(peers) = self.shard_peers.get(&topic) {
+                                peers.remove_peer(peer_id);
+                            }
                         }
                         SwarmEvent::Behaviour(BehaviourEvent::Gossipsub(gossipsub::Event::Message{
                             message_id: msg_id,
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 0bb10c7cb..db972dcbc 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -2,7 +2,7 @@ use std::{
     cmp::Ordering,
     collections::{BinaryHeap, VecDeque},
     ops::Sub,
-    sync::Arc,
+    sync::{Arc, Mutex},
     time::{Duration, Instant},
 };
 
@@ -62,11 +62,10 @@ pub struct Sync {
     db: Arc<Db>,
     // message bus
     message_sender: MessageSender,
-    // internal list of peers, maintained with add_peer/remove_peer.
-    peers: BinaryHeap<PeerInfo>,
+    // internal peers
+    peers: Arc<SyncPeers>,
     // peer handling an in-flight request
     in_flight: Option<PeerInfo>,
-    in_flight_reason: DownGrade,
     // in-flight request timeout, before retry
     request_timeout: Duration,
     // how many blocks to request at once
@@ -101,6 +100,7 @@ impl Sync {
         db: Arc<Db>,
         latest_block: &Option<Block>,
         message_sender: MessageSender,
+        peers: Arc<SyncPeers>,
     ) -> Result<Self> {
         let peer_id = message_sender.our_peer_id;
         let max_batch_size = config.block_request_batch_size.clamp(30, 180); // up to 180 sec of blocks at a time.
@@ -121,13 +121,12 @@ impl Sync {
         Ok(Self {
             db,
             message_sender,
-            peers: BinaryHeap::new(),
             peer_id,
+            peers,
             request_timeout: config.consensus.consensus_timeout,
             max_batch_size,
             max_blocks_in_flight,
             in_flight: None,
-            in_flight_reason: DownGrade::None,
             in_pipeline: usize::MIN,
             state,
             recent_proposals: VecDeque::with_capacity(max_batch_size),
@@ -364,11 +363,13 @@ impl Sync {
         if response.is_empty() {
             // Empty response, downgrade peer and retry phase 1.
             tracing::warn!("sync::MultiBlockResponse : empty blocks {from}",);
-            self.done_with_peer(DownGrade::Empty);
+            self.peers
+                .done_with_peer(self.in_flight.take(), DownGrade::Empty);
             self.state = SyncState::Retry1;
             return Ok(());
         } else {
-            self.done_with_peer(DownGrade::None);
+            self.peers
+                .done_with_peer(self.in_flight.take(), DownGrade::None);
         }
 
         tracing::info!(
@@ -465,7 +466,8 @@ impl Sync {
                     "sync::RequestMissingBlocks : in-flight request {} timed out, requesting from new peer",
                     peer.peer_id
                 );
-                self.done_with_peer(DownGrade::Timeout);
+                self.peers
+                    .done_with_peer(self.in_flight.take(), DownGrade::Timeout);
             } else {
                 return Ok(());
             }
@@ -478,9 +480,9 @@ impl Sync {
         }
 
         // will be re-inserted below
-        if let Some(peer) = self.get_next_peer() {
+        if let Some(peer) = self.peers.get_next_peer() {
             // reinsert peer, as we will use a faux peer below, to force the request to go to the original responder
-            self.peers.push(peer);
+            self.peers.reinsert_peer(peer)?;
 
             // If we have no chain_segments, we have nothing to do
             if let Some((meta, peer_info)) = self.last_segment()? {
@@ -550,19 +552,11 @@ impl Sync {
             && response.from_view == u64::MAX
         {
             tracing::info!("sync::HandleBlockResponse : upgrading {from}",);
-            if let Some(peer) = self.in_flight.as_mut() {
+            if let Some(mut peer) = self.in_flight.take() {
                 if peer.peer_id == from {
                     peer.version = PeerVer::V2;
                     // retry with upgraded peer
-                    peer.last_used = self
-                        .peers
-                        .peek()
-                        .expect("peers.len() > 1")
-                        .last_used
-                        .checked_sub(Duration::from_secs(1))
-                        .expect("time is ordinal");
-                    self.done_with_peer(DownGrade::None);
-
+                    self.peers.reinsert_peer(peer)?;
                     if Self::DO_SPECULATIVE {
                         match self.state {
                             SyncState::Phase1(_) => self.request_missing_metadata(None)?,
@@ -667,10 +661,12 @@ impl Sync {
         if response.is_empty() {
             // Empty response, downgrade peer and retry with a new peer.
             tracing::warn!("sync::MetadataResponse : empty blocks {from}",);
-            self.done_with_peer(DownGrade::Empty);
+            self.peers
+                .done_with_peer(self.in_flight.take(), DownGrade::Empty);
             return Ok(());
         } else {
-            self.done_with_peer(DownGrade::None);
+            self.peers
+                .done_with_peer(self.in_flight.take(), DownGrade::None);
         }
 
         // Check the linkage of the returned chain
@@ -801,7 +797,8 @@ impl Sync {
                     "sync::RequestMissingMetadata : in-flight request {} timed out, requesting from new peer",
                     peer.peer_id
                 );
-                self.done_with_peer(DownGrade::Timeout);
+                self.peers
+                    .done_with_peer(self.in_flight.take(), DownGrade::Timeout);
             } else {
                 return Ok(());
             }
@@ -814,7 +811,7 @@ impl Sync {
             return Ok(());
         }
 
-        if let Some(peer) = self.get_next_peer() {
+        if let Some(peer) = self.peers.get_next_peer() {
             tracing::info!(
                 "sync::RequestMissingMetadata : requesting {} metadata of segment #{} from {}",
                 self.max_batch_size,
@@ -942,26 +939,84 @@ impl Sync {
         Ok(())
     }
 
+    /// Returns (am_syncing, current_highest_block)
+    pub fn am_syncing(&self) -> Result<bool> {
+        Ok(self.in_pipeline != 0
+            || !matches!(self.state, SyncState::Phase0)
+            || !self.recent_proposals.is_empty()
+            || self.count_segments()? != 0)
+    }
+
+    // Returns (starting_block, current_block,  highest_block) if we're syncing,
+    // None if we're not.
+    pub fn get_sync_data(&self) -> Result<Option<(BlockNumber, BlockNumber, BlockNumber)>> {
+        let flag = self.am_syncing()?;
+        if !flag {
+            Ok(None)
+        } else {
+            let highest_block = self
+                .db
+                .get_canonical_block_by_number(
+                    self.db
+                        .get_highest_canonical_block_number()?
+                        .expect("no highest block"),
+                )?
+                .expect("missing highest block");
+
+            let highest_saved_block_number = highest_block.number();
+            let highest_block_number_seen = self.recent_proposals.back().unwrap().number();
+            Ok(Some((
+                self.started_at_block_number,
+                highest_saved_block_number,
+                highest_block_number_seen,
+            )))
+        }
+    }
+
+    /// Sets the checkpoint, if node was started from a checkpoint.
+    pub fn set_checkpoint(&mut self, checkpoint: &Block) {
+        let hash = checkpoint.hash();
+        tracing::info!("sync::Checkpoint {}", hash);
+        self.checkpoint_hash = hash;
+    }
+}
+
+#[derive(Debug)]
+pub struct SyncPeers {
+    peer_id: PeerId,
+    peers: Arc<Mutex<BinaryHeap<PeerInfo>>>,
+}
+
+impl SyncPeers {
+    pub fn new(peer_id: PeerId) -> Self {
+        Self {
+            peer_id,
+            peers: Arc::new(Mutex::new(BinaryHeap::<PeerInfo>::new())),
+        }
+    }
+
     /// Downgrade a peer based on the response received.
     ///
     /// This algorithm favours good peers that respond quickly (i.e. no timeout).
     /// In most cases, it eventually degenerates into 2 sources - avoid a single source of truth.
-    fn done_with_peer(&mut self, downgrade: DownGrade) {
-        if let Some(mut peer) = self.in_flight.take() {
+    fn done_with_peer(&self, in_flight: Option<PeerInfo>, downgrade: DownGrade) {
+        if let Some(mut peer) = in_flight {
             tracing::trace!("sync::DoneWithPeer {} {:?}", peer.peer_id, downgrade);
-            self.in_flight_reason = downgrade.clone();
+            let mut peers = self.peers.lock().unwrap();
             peer.score = peer.score.saturating_add(downgrade as u32);
-            // Ensure that the next peer is equal or better
-            peer.score = peer.score.max(self.peers.peek().unwrap().score);
+            if !peers.is_empty() {
+                // Ensure that the next peer is equal or better
+                peer.score = peer.score.max(peers.peek().unwrap().score);
+            }
             // Reinsert peers that are good
             if peer.score < u32::MAX {
-                self.peers.push(peer);
+                peers.push(peer);
             }
         }
     }
 
     /// Add bulk peers
-    pub fn add_peers(&mut self, peers: Vec<PeerId>) {
+    pub fn add_peers(&self, peers: Vec<PeerId>) {
         tracing::debug!("sync::AddPeers {:?}", peers);
         for peer in peers {
             if peer != self.peer_id {
@@ -971,31 +1026,33 @@ impl Sync {
     }
 
     /// Add a peer to the list of peers.
-    pub fn add_peer(&mut self, peer: PeerId) {
+    pub fn add_peer(&self, peer: PeerId) {
+        let mut peers = self.peers.lock().unwrap();
         // if the new peer is not synced, it will get downgraded to the back of heap.
         // but by placing them at the back of the 'best' pack, we get to try them out soon.
         let new_peer = PeerInfo {
             version: PeerVer::V1,
-            score: self.peers.iter().map(|p| p.score).min().unwrap_or_default(),
+            score: peers.iter().map(|p| p.score).min().unwrap_or_default(),
             peer_id: peer,
             last_used: Instant::now(),
         };
         // ensure that it is unique - avoids single source of truth
-        self.peers.retain(|p: &PeerInfo| p.peer_id != peer);
-        self.peers.push(new_peer);
+        peers.retain(|p: &PeerInfo| p.peer_id != peer);
+        peers.push(new_peer);
 
-        tracing::trace!("sync::AddPeer {peer}/{}", self.peers.len());
+        tracing::trace!("sync::AddPeer {peer}/{}", peers.len());
     }
 
     /// Remove a peer from the list of peers.
-    pub fn remove_peer(&mut self, peer: PeerId) {
-        tracing::trace!("sync::RemovePeer {peer}");
-        self.peers.retain(|p: &PeerInfo| p.peer_id != peer);
+    pub fn remove_peer(&self, peer: PeerId) {
+        let mut peers = self.peers.lock().unwrap();
+        peers.retain(|p: &PeerInfo| p.peer_id != peer);
+        tracing::trace!("sync::RemovePeer {peer}/{}", peers.len());
     }
 
     /// Get the next best peer to use
-    fn get_next_peer(&mut self) -> Option<PeerInfo> {
-        let mut peer = self.peers.pop()?;
+    pub fn get_next_peer(&self) -> Option<PeerInfo> {
+        let mut peer = self.peers.lock().unwrap().pop()?;
         peer.last_used = std::time::Instant::now();
         // dynamic sizing should not be needed, if we're syncing recent blocks.
         // self.max_batch_size = self.dynamic_batch_sizing(&peer);
@@ -1003,67 +1060,26 @@ impl Sync {
         Some(peer)
     }
 
-    /// Phase 1: Dynamic Batch Sizing
-    ///
-    /// Due to a hard-coded 10MB response limit in libp2p, we may be limited in how many blocks we can request
-    /// for in a single request, between 1-100 blocks.
-    fn _dynamic_batch_sizing(&self, peer: &PeerInfo) -> usize {
-        match (&self.state, &peer.version, &self.in_flight_reason) {
-            // V1 response may be too large, reduce request range.
-            (SyncState::Phase1(_), PeerVer::V1, DownGrade::Empty) => self
-                .max_batch_size
-                .saturating_sub(self.max_batch_size / 3)
-                .max(1),
-            // V1 response going well, increase the request range
-            (SyncState::Phase1(_), PeerVer::V1, DownGrade::None) => self
-                .max_batch_size
-                .saturating_add(self.max_batch_size)
-                // For V1, ~100 empty blocks saturates the response payload
-                .min(100),
-            // V2 response may be too large, which can induce a timeout. Split into 10 block segments
-            _ => self.max_batch_size,
-        }
-    }
-
-    /// Returns (am_syncing, current_highest_block)
-    pub fn am_syncing(&self) -> Result<bool> {
-        Ok(self.in_pipeline != 0
-            || !matches!(self.state, SyncState::Phase0)
-            || !self.recent_proposals.is_empty()
-            || self.count_segments()? != 0)
+    /// Reinserts the peer such that it is at the front of the queue.
+    pub fn reinsert_peer(&self, peer: PeerInfo) -> Result<()> {
+        let mut peers = self.peers.lock().unwrap();
+        let mut peer = peer;
+        peer.last_used = peers
+            .peek()
+            .expect("peers.len() > 1")
+            .last_used
+            .checked_sub(Duration::from_secs(1))
+            .expect("time is ordinal");
+        peers.push(peer);
+        Ok(())
     }
 
-    // Returns (starting_block, current_block,  highest_block) if we're syncing,
-    // None if we're not.
-    pub fn get_sync_data(&self) -> Result<Option<(BlockNumber, BlockNumber, BlockNumber)>> {
-        let flag = self.am_syncing()?;
-        if !flag {
-            Ok(None)
-        } else {
-            let highest_block = self
-                .db
-                .get_canonical_block_by_number(
-                    self.db
-                        .get_highest_canonical_block_number()?
-                        .expect("no highest block"),
-                )?
-                .expect("missing highest block");
-
-            let highest_saved_block_number = highest_block.number();
-            let highest_block_number_seen = self.recent_proposals.back().unwrap().number();
-            Ok(Some((
-                self.started_at_block_number,
-                highest_saved_block_number,
-                highest_block_number_seen,
-            )))
-        }
+    pub fn len(&self) -> usize {
+        self.peers.lock().unwrap().len()
     }
 
-    /// Sets the checkpoint, if node was started from a checkpoint.
-    pub fn set_checkpoint(&mut self, checkpoint: &Block) {
-        let hash = checkpoint.hash();
-        tracing::info!("sync::Checkpoint {}", hash);
-        self.checkpoint_hash = hash;
+    pub fn is_empty(&self) -> bool {
+        self.peers.lock().unwrap().is_empty()
     }
 }
 

From 3ede63115aaa5e2a8e791443bdb762c6bbcbfc1d Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 27 Jan 2025 17:59:33 +0800
Subject: [PATCH 103/119] fix: tests with shared-state SyncPeers.

---
 z2/src/docgen.rs         | 16 +++++++++++++---
 zilliqa/benches/it.rs    |  7 ++++++-
 zilliqa/tests/it/main.rs | 25 +++++++++++--------------
 3 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/z2/src/docgen.rs b/z2/src/docgen.rs
index 1f9b25a52..76d71a30c 100644
--- a/z2/src/docgen.rs
+++ b/z2/src/docgen.rs
@@ -14,7 +14,7 @@ use regex::Regex;
 use serde::{Deserialize, Serialize};
 use tera::Tera;
 use tokio::fs;
-use zilliqa::{cfg::NodeConfig, crypto::SecretKey};
+use zilliqa::{cfg::NodeConfig, crypto::SecretKey, sync::SyncPeers};
 
 const SUPPORTED_APIS_PATH_NAME: &str = "index";
 
@@ -352,10 +352,20 @@ pub fn get_implemented_jsonrpc_methods() -> Result<HashMap<ApiMethod, PageStatus
     let (s2, _) = tokio::sync::mpsc::unbounded_channel();
     let (s3, _) = tokio::sync::mpsc::unbounded_channel();
     let (s4, _) = tokio::sync::mpsc::unbounded_channel();
-    let peers = Arc::new(AtomicUsize::new(0));
+    let peers_count = Arc::new(AtomicUsize::new(0));
+
+    let peer_id = secret_key.to_libp2p_keypair().public().to_peer_id();
+    let peers = Arc::new(SyncPeers::new(peer_id));
 
     let my_node = Arc::new(Mutex::new(zilliqa::node::Node::new(
-        config, secret_key, s1, s2, s3, s4, peers,
+        config,
+        secret_key,
+        s1,
+        s2,
+        s3,
+        s4,
+        peers_count,
+        peers,
     )?));
     let module = zilliqa::api::rpc_module(my_node.clone(), &[]);
     for m in module.method_names() {
diff --git a/zilliqa/benches/it.rs b/zilliqa/benches/it.rs
index 04e9993ae..f2b24fe62 100644
--- a/zilliqa/benches/it.rs
+++ b/zilliqa/benches/it.rs
@@ -28,6 +28,7 @@ use zilliqa::{
     message::{Block, ExternalMessage, Proposal, QuorumCertificate, Vote, MAX_COMMITTEE_SIZE},
     node::{MessageSender, RequestId},
     schnorr,
+    sync::SyncPeers,
     test_util::compile_contract,
     time::{self, SystemTime},
     transaction::{
@@ -46,12 +47,13 @@ fn process_empty(c: &mut Criterion) {
         .measurement_time(Duration::from_secs(10));
 
     let secret_key = SecretKey::new().unwrap();
+    let peer_id = secret_key.to_libp2p_keypair().public().to_peer_id();
     let (outbound_message_sender, _a) = mpsc::unbounded_channel();
     let (local_message_sender, _b) = mpsc::unbounded_channel();
     let (reset_timeout_sender, _c) = mpsc::unbounded_channel();
     let message_sender = MessageSender {
         our_shard: 0,
-        our_peer_id: PeerId::random(),
+        our_peer_id: peer_id,
         outbound_channel: outbound_message_sender,
         local_channel: local_message_sender,
         request_id: RequestId::default(),
@@ -88,6 +90,7 @@ fn process_empty(c: &mut Criterion) {
         message_sender,
         reset_timeout_sender,
         Arc::new(db),
+        Arc::new(SyncPeers::new(peer_id)),
     )
     .unwrap();
 
@@ -163,6 +166,7 @@ fn consensus(
     index: usize,
 ) -> Consensus {
     let secret_key = genesis_deposits[index].0;
+    let peer_id = secret_key.to_libp2p_keypair().public().to_peer_id();
     let (outbound_message_sender, a) = mpsc::unbounded_channel();
     let (local_message_sender, b) = mpsc::unbounded_channel();
     let (reset_timeout_sender, c) = mpsc::unbounded_channel();
@@ -208,6 +212,7 @@ fn consensus(
         message_sender,
         reset_timeout_sender,
         Arc::new(db),
+        Arc::new(SyncPeers::new(peer_id)),
     )
     .unwrap()
 }
diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs
index e1267087f..c90064ef7 100644
--- a/zilliqa/tests/it/main.rs
+++ b/zilliqa/tests/it/main.rs
@@ -77,6 +77,7 @@ use zilliqa::{
     message::{ExternalMessage, InternalMessage},
     node::{Node, RequestId},
     node_launcher::ResponseChannel,
+    sync::SyncPeers,
     transaction::EvmGas,
 };
 
@@ -165,6 +166,9 @@ fn node(
     let (reset_timeout_sender, reset_timeout_receiver) = mpsc::unbounded_channel();
     std::mem::forget(reset_timeout_receiver);
 
+    let peer_id = secret_key.to_libp2p_keypair().public().to_peer_id();
+    let peers = Arc::new(SyncPeers::new(peer_id));
+
     let node = Node::new(
         NodeConfig {
             data_dir: datadir
@@ -178,6 +182,7 @@ fn node(
         request_responses_sender,
         reset_timeout_sender,
         Arc::new(AtomicUsize::new(0)),
+        peers.clone(),
     )?;
     let node = Arc::new(Mutex::new(node));
     let rpc_module: RpcModule<Arc<Mutex<Node>>> =
@@ -186,12 +191,13 @@ fn node(
     Ok((
         TestNode {
             index,
-            peer_id: secret_key.to_libp2p_keypair().public().to_peer_id(),
+            peer_id,
             secret_key,
             onchain_key,
             inner: node,
             dir: datadir,
             rpc_module,
+            peers,
         },
         message_receiver,
         local_message_receiver,
@@ -208,6 +214,7 @@ struct TestNode {
     rpc_module: RpcModule<Arc<Mutex<Node>>>,
     inner: Arc<Mutex<Node>>,
     dir: Option<TempDir>,
+    peers: Arc<SyncPeers>,
 }
 
 struct Network {
@@ -411,12 +418,7 @@ impl Network {
                 node.peer_id,
                 node.dir.as_ref().unwrap().path().to_string_lossy(),
             );
-            node.inner
-                .lock()
-                .unwrap()
-                .consensus
-                .sync
-                .add_peers(peers.clone());
+            node.peers.add_peers(peers.clone());
         }
 
         Network {
@@ -516,7 +518,7 @@ impl Network {
 
         let mut peers = self.nodes.iter().map(|n| n.peer_id).collect_vec();
         peers.shuffle(self.rng.lock().unwrap().deref_mut());
-        node.inner.lock().unwrap().consensus.sync.add_peers(peers);
+        node.peers.add_peers(peers.clone());
 
         trace!("Node {}: {}", node.index, node.peer_id);
 
@@ -590,12 +592,7 @@ impl Network {
                 node.peer_id,
                 node.dir.as_ref().unwrap().path().to_string_lossy(),
             );
-            node.inner
-                .lock()
-                .unwrap()
-                .consensus
-                .sync
-                .add_peers(peers.clone());
+            node.peers.add_peers(peers.clone());
         }
 
         let (resend_message, receive_resend_message) = mpsc::unbounded_channel::<StreamMessage>();

From d4f6d26f4662f59c88828fbbe1be8931590205a7 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 27 Jan 2025 23:08:56 +0800
Subject: [PATCH 104/119] fix: issue of upgraded node, encountered in
 protomainnet where a node was recorded as V1 in Phase1, but was updated to V2
 in Phase2, causing the sync to be stuck in a loop.

---
 zilliqa/src/sync.rs | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index db972dcbc..27a431dd9 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -546,23 +546,25 @@ impl Sync {
     /// In phase 1, it will extract the metadata and feed it into handle_metadata_response.
     /// In phase 2, it will extract the blocks and feed it into handle_multiblock_response.
     pub fn handle_block_response(&mut self, from: PeerId, response: BlockResponse) -> Result<()> {
-        // Upgrade to V2 peer.
+        // V2 response
         if response.availability.is_none()
             && response.proposals.is_empty()
             && response.from_view == u64::MAX
         {
-            tracing::info!("sync::HandleBlockResponse : upgrading {from}",);
+            tracing::info!("sync::HandleBlockResponse : new response from {from}",);
             if let Some(mut peer) = self.in_flight.take() {
-                if peer.peer_id == from {
+                if peer.peer_id == from && peer.version == PeerVer::V1 {
+                    // upgrade to V2 peer
                     peer.version = PeerVer::V2;
-                    // retry with upgraded peer
                     self.peers.reinsert_peer(peer)?;
-                    if Self::DO_SPECULATIVE {
-                        match self.state {
-                            SyncState::Phase1(_) => self.request_missing_metadata(None)?,
-                            SyncState::Phase2(_) => self.request_missing_blocks()?,
-                            _ => {}
+                    match self.state {
+                        SyncState::Phase2(_) => {
+                            self.state = SyncState::Retry1;
                         }
+                        SyncState::Phase1(_) if Self::DO_SPECULATIVE => {
+                            self.request_missing_metadata(None)?;
+                        }
+                        _ => {}
                     }
                 }
             }
@@ -1062,6 +1064,9 @@ impl SyncPeers {
 
     /// Reinserts the peer such that it is at the front of the queue.
     pub fn reinsert_peer(&self, peer: PeerInfo) -> Result<()> {
+        if peer.score == u32::MAX {
+            return Ok(());
+        }
         let mut peers = self.peers.lock().unwrap();
         let mut peer = peer;
         peer.last_used = peers

From 7cd32214f51b43e6787c62bdeb1dfc4471f3445d Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 28 Jan 2025 09:04:06 +0800
Subject: [PATCH 105/119] nit: increase deposit_v3 boundary to 24.

---
 zilliqa/tests/it/staking.rs    | 7 +++----
 zilliqa/tests/it/unreliable.rs | 1 -
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/zilliqa/tests/it/staking.rs b/zilliqa/tests/it/staking.rs
index 2fcaab5d3..b423ffcde 100644
--- a/zilliqa/tests/it/staking.rs
+++ b/zilliqa/tests/it/staking.rs
@@ -425,7 +425,7 @@ async fn rewards_are_sent_to_reward_address_of_proposer(mut network: Network) {
     check_miner_got_reward(&wallet, 1).await;
 }
 
-#[zilliqa_macros::test(blocks_per_epoch = 2, deposit_v3_upgrade_block_height = 12)]
+#[zilliqa_macros::test(blocks_per_epoch = 2, deposit_v3_upgrade_block_height = 24)]
 async fn validators_can_join_and_become_proposer(mut network: Network) {
     let wallet = network.genesis_wallet().await;
 
@@ -447,7 +447,7 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
     let staker_wallet = network.wallet_of_node(index).await;
     let pop_sinature = new_validator_key.pop_prove();
 
-    // This has to be done before `contract_upgrade_block_heights` which is 12, by default in the tests
+    // This has to be done before `contract_upgrade_block_heights` which is 24, by default in this test
     let deposit_hash = deposit_stake(
         &mut network,
         &wallet,
@@ -514,7 +514,6 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
     check_miner_got_reward(&wallet, BlockNumber::Latest).await;
 
     // Now test joining deposit_v3
-    let deposit_v3_deploy_block = 12;
     let index = network.add_node();
     let new_validator_priv_key = network.get_node_raw(index).secret_key;
     let new_validator_pub_key = new_validator_priv_key.node_public_key();
@@ -533,7 +532,7 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
 
     // Give new node time to catch up to block including deposit_v3 deployment
     network
-        .run_until_block(&staker_wallet, deposit_v3_deploy_block.into(), 200)
+        .run_until_block(&staker_wallet, 24.into(), 200)
         .await;
 
     let deposit_hash = deposit_v3_stake(
diff --git a/zilliqa/tests/it/unreliable.rs b/zilliqa/tests/it/unreliable.rs
index b35787e93..7518a3131 100644
--- a/zilliqa/tests/it/unreliable.rs
+++ b/zilliqa/tests/it/unreliable.rs
@@ -26,7 +26,6 @@ async fn blocks_are_produced_while_a_node_restarts(mut network: Network) {
 
     // Reconnect the 'restarted' node.
     network.connect_node(restarted_node);
-    network.run_until_synced(restarted_node).await;
 
     // TODO(#721): We should assert here that a new view occurred if-and-only-if the 'restarted' node was the proposer
     // of blocks 3 or 4. This would tell us that we aren't producing new views unnecessarily.

From 19da84821457d2d36d074399d46e443f2f866b1a Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 28 Jan 2025 10:15:00 +0800
Subject: [PATCH 106/119] feat: use libp2p timeout instead of internal sync
 timeout.

---
 zilliqa/src/consensus.rs |   9 +-
 zilliqa/src/node.rs      |   2 +-
 zilliqa/src/p2p_node.rs  |   3 +-
 zilliqa/src/sync.rs      | 214 +++++++++++++++++----------------------
 4 files changed, 98 insertions(+), 130 deletions(-)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index bc044c001..9487ae98b 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -33,7 +33,7 @@ use crate::{
         ExternalMessage, InternalMessage, NewView, Proposal, QuorumCertificate, Vote,
         MAX_COMMITTEE_SIZE,
     },
-    node::{MessageSender, NetworkMessage, OutgoingMessageFailure},
+    node::{MessageSender, NetworkMessage},
     pool::{TransactionPool, TxAddResult, TxPoolContent},
     state::State,
     sync::{Sync, SyncPeers},
@@ -3125,13 +3125,6 @@ impl Consensus {
         Ok(count)
     }
 
-    pub fn report_outgoing_message_failure(
-        &mut self,
-        _failure: OutgoingMessageFailure,
-    ) -> Result<()> {
-        Ok(()) // FIXME: Stub
-    }
-
     pub fn tick(&mut self) -> Result<()> {
         trace!("consensus::tick()");
         trace!("request_missing_blocks from timer");
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index edd6f835f..c1dccc01f 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -331,7 +331,7 @@ impl Node {
         failure: OutgoingMessageFailure,
     ) -> Result<()> {
         debug!(from = %self.peer_id, %to, ?failure, "handling message failure");
-        self.consensus.report_outgoing_message_failure(failure)?;
+        self.consensus.sync.handle_request_failure(failure)?;
         Ok(())
     }
 
diff --git a/zilliqa/src/p2p_node.rs b/zilliqa/src/p2p_node.rs
index 24cc271ad..21737468e 100644
--- a/zilliqa/src/p2p_node.rs
+++ b/zilliqa/src/p2p_node.rs
@@ -112,7 +112,8 @@ impl P2pNode {
                 Ok(Behaviour {
                     request_response: request_response::cbor::Behaviour::new(
                         iter::once((StreamProtocol::new("/zq2-message/1"), ProtocolSupport::Full)),
-                        Default::default(),
+                        request_response::Config::default()
+                            .with_request_timeout(Duration::from_secs(10)),
                     ),
                     gossipsub: gossipsub::Behaviour::new(
                         MessageAuthenticity::Signed(key_pair.clone()),
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 27a431dd9..b5b829063 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -20,7 +20,7 @@ use crate::{
         Block, BlockHeader, BlockRequest, BlockResponse, ExternalMessage, InjectedProposal,
         Proposal, QuorumCertificate, RequestBlocksByHeight,
     },
-    node::MessageSender,
+    node::{MessageSender, OutgoingMessageFailure, RequestId},
     time::SystemTime,
     transaction::EvmGas,
 };
@@ -65,9 +65,7 @@ pub struct Sync {
     // internal peers
     peers: Arc<SyncPeers>,
     // peer handling an in-flight request
-    in_flight: Option<PeerInfo>,
-    // in-flight request timeout, before retry
-    request_timeout: Duration,
+    in_flight: Option<(PeerInfo, RequestId)>,
     // how many blocks to request at once
     max_batch_size: usize,
     // how many blocks to inject into the queue
@@ -123,7 +121,6 @@ impl Sync {
             message_sender,
             peer_id,
             peers,
-            request_timeout: config.consensus.consensus_timeout,
             max_batch_size,
             max_blocks_in_flight,
             in_flight: None,
@@ -136,36 +133,19 @@ impl Sync {
         })
     }
 
-    fn count_segments(&self) -> Result<usize> {
-        self.db.count_sync_segments()
-    }
-
-    fn contains_metadata(&self, hash: &Hash) -> Result<bool> {
-        self.db.contains_sync_metadata(hash)
-    }
-
-    fn get_segment(&self, hash: Hash) -> Result<Vec<Hash>> {
-        self.db.get_sync_segment(hash)
-    }
-
-    fn last_segment(&self) -> Result<Option<(BlockHeader, PeerInfo)>> {
-        self.db.last_sync_segment()
-    }
-
-    fn pop_segment(&self) -> Result<()> {
-        self.db.pop_sync_segment()
-    }
-
-    fn push_segment(&self, peer: PeerInfo, meta: BlockHeader) -> Result<()> {
-        self.db.push_sync_segment(peer, meta)
-    }
-
-    fn insert_metadata(&self, metas: &Vec<BlockHeader>) -> Result<()> {
-        self.db.insert_sync_metadata(metas)
-    }
-
-    fn empty_metadata(&self) -> Result<()> {
-        self.db.empty_sync_metadata()
+    pub fn handle_request_failure(&mut self, failure: OutgoingMessageFailure) -> Result<()> {
+        // chekc if the request is a sync messages
+        if let Some((peer, req_id)) = self.in_flight.as_ref() {
+            // downgrade peer due to timeout
+            if peer.peer_id == failure.peer && *req_id == failure.request_id {
+                tracing::warn!(to = %peer.peer_id, err = %failure.error,
+                    "sync::RequestFailure : in-flight request failed"
+                );
+                self.peers
+                    .done_with_peer(self.in_flight.take(), DownGrade::Timeout);
+            }
+        }
+        Ok(())
     }
 
     /// Phase 0: Sync a block proposal.
@@ -239,14 +219,14 @@ impl Sync {
                     tracing::info!(
                         "sync::SyncProposal : finishing {} blocks for segment #{} from {}",
                         self.recent_proposals.len(),
-                        self.count_segments()?,
+                        self.db.count_sync_segments()?,
                         self.peer_id,
                     );
                     // inject the proposals
                     let proposals = self.recent_proposals.drain(..).collect_vec();
                     self.inject_proposals(proposals)?;
                 }
-                self.empty_metadata()?;
+                self.db.empty_sync_metadata()?;
                 self.state = SyncState::Phase0;
             }
             // Retry to fix sync issues e.g. peers that are now offline
@@ -301,7 +281,7 @@ impl Sync {
     /// This will rebuild history from the previous marker, with another peer.
     /// If this function is called many times, it will eventually restart from Phase 0.
     fn retry_phase1(&mut self) -> Result<()> {
-        if self.count_segments()? == 0 {
+        if self.db.count_sync_segments()? == 0 {
             tracing::error!("sync::RetryPhase1 : cannot retry phase 1 without chain segments!");
             self.state = SyncState::Phase0;
             return Ok(());
@@ -309,12 +289,12 @@ impl Sync {
 
         tracing::debug!(
             "sync::RetryPhase1 : retrying segment #{}",
-            self.count_segments()?,
+            self.db.count_sync_segments()?,
         );
 
         // remove the last segment from the chain metadata
-        let (meta, _) = self.last_segment()?.unwrap();
-        self.pop_segment()?;
+        let (meta, _) = self.db.last_sync_segment()?.unwrap();
+        self.db.pop_sync_segment()?;
         self.state = SyncState::Phase1(meta);
 
         Ok(())
@@ -346,7 +326,7 @@ impl Sync {
         from: PeerId,
         response: Vec<Proposal>,
     ) -> Result<()> {
-        if let Some(peer) = self.in_flight.as_ref() {
+        if let Some((peer, _)) = self.in_flight.as_ref() {
             if peer.peer_id != from {
                 tracing::warn!(
                     "sync::MultiBlockResponse : unexpected peer={} != {from}",
@@ -375,7 +355,7 @@ impl Sync {
         tracing::info!(
             "sync::MultiBlockResponse : received {} blocks for segment #{} from {}",
             response.len(),
-            self.count_segments()?,
+            self.db.count_sync_segments()?,
             from
         );
 
@@ -405,11 +385,11 @@ impl Sync {
             .sorted_by_key(|p| p.number())
             .collect_vec();
 
-        self.pop_segment()?;
+        self.db.pop_sync_segment()?;
         self.inject_proposals(proposals)?;
 
         // Done with phase 2
-        if self.count_segments()? == 0 {
+        if self.db.count_sync_segments()? == 0 {
             self.state = SyncState::Phase3;
         } else if Self::DO_SPECULATIVE {
             // Speculatively request more blocks
@@ -460,21 +440,11 @@ impl Sync {
             anyhow::bail!("sync::RequestMissingBlocks : invalid state");
         }
         // Early exit if there's a request in-flight; and if it has not expired.
-        if let Some(peer) = self.in_flight.as_ref() {
-            if peer.last_used.elapsed() > self.request_timeout {
-                tracing::warn!(
-                    "sync::RequestMissingBlocks : in-flight request {} timed out, requesting from new peer",
-                    peer.peer_id
-                );
-                self.peers
-                    .done_with_peer(self.in_flight.take(), DownGrade::Timeout);
-            } else {
-                return Ok(());
-            }
-        } else if self.in_pipeline > self.max_blocks_in_flight {
+        if self.in_flight.is_some() || self.in_pipeline > self.max_blocks_in_flight {
             tracing::warn!(
-                "sync::RequestMissingBlocks : syncing {} blocks in pipeline",
-                self.in_pipeline
+                "sync::RequestMissingBlocks : syncing {}/{} blocks in pipeline",
+                self.in_pipeline,
+                self.max_blocks_in_flight
             );
             return Ok(());
         }
@@ -485,8 +455,8 @@ impl Sync {
             self.peers.reinsert_peer(peer)?;
 
             // If we have no chain_segments, we have nothing to do
-            if let Some((meta, peer_info)) = self.last_segment()? {
-                let request_hashes = self.get_segment(meta.qc.block_hash)?;
+            if let Some((meta, peer_info)) = self.db.last_sync_segment()? {
+                let request_hashes = self.db.get_sync_segment(meta.qc.block_hash)?;
 
                 // Checksum of the request hashes
                 let checksum = request_hashes
@@ -501,35 +471,41 @@ impl Sync {
                 tracing::info!(
                     "sync::RequestMissingBlocks : requesting {} blocks of segment #{} from {}",
                     request_hashes.len(),
-                    self.count_segments()?,
+                    self.db.count_sync_segments()?,
                     peer_info.peer_id,
                 );
-                let message = match peer_info.version {
+                let (peer_info, message) = match peer_info.version {
                     PeerVer::V2 => {
-                        self.in_flight = Some(PeerInfo {
-                            version: PeerVer::V2,
-                            peer_id: peer_info.peer_id,
-                            last_used: std::time::Instant::now(),
-                            score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
-                        });
-                        ExternalMessage::MultiBlockRequest(request_hashes)
+                        (
+                            PeerInfo {
+                                version: PeerVer::V2,
+                                peer_id: peer_info.peer_id,
+                                last_used: std::time::Instant::now(),
+                                score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
+                            },
+                            ExternalMessage::MultiBlockRequest(request_hashes),
+                        )
                     }
                     PeerVer::V1 => {
-                        self.in_flight = Some(PeerInfo {
-                            version: PeerVer::V1,
-                            peer_id: peer_info.peer_id,
-                            last_used: std::time::Instant::now(),
-                            score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
-                        });
-                        // do not add VIEW_DRIFT - the stored marker is accurate!
-                        ExternalMessage::BlockRequest(BlockRequest {
-                            to_view: meta.view.saturating_sub(1),
-                            from_view: meta.view.saturating_sub(self.max_batch_size as u64),
-                        })
+                        (
+                            PeerInfo {
+                                version: PeerVer::V1,
+                                peer_id: peer_info.peer_id,
+                                last_used: std::time::Instant::now(),
+                                score: u32::MAX, // used to indicate faux peer, will not be added to the group of peers
+                            },
+                            // do not add VIEW_DRIFT - the stored marker is accurate!
+                            ExternalMessage::BlockRequest(BlockRequest {
+                                to_view: meta.view.saturating_sub(1),
+                                from_view: meta.view.saturating_sub(self.max_batch_size as u64),
+                            }),
+                        )
                     }
                 };
-                self.message_sender
+                let request_id = self
+                    .message_sender
                     .send_external_message(peer_info.peer_id, message)?;
+                self.in_flight = Some((peer_info, request_id));
             }
         } else {
             tracing::warn!(
@@ -552,7 +528,7 @@ impl Sync {
             && response.from_view == u64::MAX
         {
             tracing::info!("sync::HandleBlockResponse : new response from {from}",);
-            if let Some(mut peer) = self.in_flight.take() {
+            if let Some((mut peer, _)) = self.in_flight.take() {
                 if peer.peer_id == from && peer.version == PeerVer::V1 {
                     // upgrade to V2 peer
                     peer.version = PeerVer::V2;
@@ -617,7 +593,11 @@ impl Sync {
                     .proposals
                     .into_iter()
                     // filter any blocks that are not in the chain e.g. forks
-                    .filter(|p| self.contains_metadata(&p.hash()).unwrap_or_default())
+                    .filter(|p| {
+                        self.db
+                            .contains_sync_metadata(&p.hash())
+                            .unwrap_or_default()
+                    })
                     .sorted_by(|a, b| b.number().cmp(&a.number()))
                     .collect_vec();
 
@@ -644,7 +624,7 @@ impl Sync {
         response: Vec<BlockHeader>,
     ) -> Result<()> {
         // Check for expected response
-        let segment_peer = if let Some(peer) = self.in_flight.as_ref() {
+        let segment_peer = if let Some((peer, _)) = self.in_flight.as_ref() {
             if peer.peer_id != from {
                 tracing::warn!(
                     "sync::MetadataResponse : unexpected peer={} != {from}",
@@ -704,15 +684,15 @@ impl Sync {
         tracing::info!(
             "sync::MetadataResponse : received {} metadata segment #{} from {}",
             segment.len(),
-            self.count_segments()?,
+            self.db.count_sync_segments()?,
             from
         );
 
         // Record the constructed chain metadata
-        self.insert_metadata(&segment)?;
+        self.db.insert_sync_metadata(&segment)?;
 
         // Record landmark(s), including peer that has this set of blocks
-        self.push_segment(segment_peer, *meta)?;
+        self.db.push_sync_segment(segment_peer, *meta)?;
 
         // Record the oldest block in the chain's parent
         self.state = SyncState::Phase1(segment.last().cloned().unwrap());
@@ -747,8 +727,8 @@ impl Sync {
             from
         );
 
-        // Do not respond to stale requests as the client has timed-out
-        if request.request_at.elapsed()? > self.request_timeout {
+        // Do not respond to stale requests as the client has probably timed-out
+        if request.request_at.elapsed()? > Duration::from_secs(5) {
             tracing::warn!("sync::MetadataRequest : stale request");
             return Ok(ExternalMessage::Acknowledgement);
         }
@@ -793,34 +773,24 @@ impl Sync {
             anyhow::bail!("sync::RequestMissingMetadata : invalid state");
         }
         // Early exit if there's a request in-flight; and if it has not expired.
-        if let Some(peer) = self.in_flight.as_ref() {
-            if peer.last_used.elapsed() > self.request_timeout {
-                tracing::warn!(
-                    "sync::RequestMissingMetadata : in-flight request {} timed out, requesting from new peer",
-                    peer.peer_id
-                );
-                self.peers
-                    .done_with_peer(self.in_flight.take(), DownGrade::Timeout);
-            } else {
-                return Ok(());
-            }
-        } else if self.in_pipeline > self.max_batch_size {
+        if self.in_flight.is_some() || self.in_pipeline > self.max_batch_size {
             // anything more than this and we cannot be sure whether the segment hits history
             tracing::warn!(
-                "sync::RequestMissingMetadata :  syncing {} blocks in pipeline",
-                self.in_pipeline
+                "sync::RequestMissingMetadata : syncing {}/{} blocks in pipeline",
+                self.in_pipeline,
+                self.max_batch_size
             );
             return Ok(());
         }
 
-        if let Some(peer) = self.peers.get_next_peer() {
+        if let Some(peer_info) = self.peers.get_next_peer() {
             tracing::info!(
                 "sync::RequestMissingMetadata : requesting {} metadata of segment #{} from {}",
                 self.max_batch_size,
-                self.count_segments()? + 1,
-                peer.peer_id
+                self.db.count_sync_segments()? + 1,
+                peer_info.peer_id
             );
-            let message = match (self.state.clone(), &peer.version) {
+            let message = match (self.state.clone(), &peer_info.version) {
                 (
                     SyncState::Phase1(BlockHeader {
                         number: block_number,
@@ -869,9 +839,10 @@ impl Sync {
                 }
                 _ => anyhow::bail!("sync::MissingMetadata : invalid state"),
             };
-            self.message_sender
-                .send_external_message(peer.peer_id, message)?;
-            self.in_flight = Some(peer);
+            let request_id = self
+                .message_sender
+                .send_external_message(peer_info.peer_id, message)?;
+            self.in_flight = Some((peer_info, request_id));
         } else {
             tracing::warn!(
                 "sync::RequestMissingBlocks : {} insufficient peers to handle request",
@@ -946,7 +917,7 @@ impl Sync {
         Ok(self.in_pipeline != 0
             || !matches!(self.state, SyncState::Phase0)
             || !self.recent_proposals.is_empty()
-            || self.count_segments()? != 0)
+            || self.db.count_sync_segments()? != 0)
     }
 
     // Returns (starting_block, current_block,  highest_block) if we're syncing,
@@ -1001,8 +972,8 @@ impl SyncPeers {
     ///
     /// This algorithm favours good peers that respond quickly (i.e. no timeout).
     /// In most cases, it eventually degenerates into 2 sources - avoid a single source of truth.
-    fn done_with_peer(&self, in_flight: Option<PeerInfo>, downgrade: DownGrade) {
-        if let Some(mut peer) = in_flight {
+    fn done_with_peer(&self, in_flight: Option<(PeerInfo, RequestId)>, downgrade: DownGrade) {
+        if let Some((mut peer, _)) = in_flight {
             tracing::trace!("sync::DoneWithPeer {} {:?}", peer.peer_id, downgrade);
             let mut peers = self.peers.lock().unwrap();
             peer.score = peer.score.saturating_add(downgrade as u32);
@@ -1069,12 +1040,15 @@ impl SyncPeers {
         }
         let mut peers = self.peers.lock().unwrap();
         let mut peer = peer;
-        peer.last_used = peers
-            .peek()
-            .expect("peers.len() > 1")
-            .last_used
-            .checked_sub(Duration::from_secs(1))
-            .expect("time is ordinal");
+        if !peers.is_empty() {
+            // Ensure that it gets to the head of the line
+            peer.last_used = peers
+                .peek()
+                .expect("peers.len() > 1")
+                .last_used
+                .checked_sub(Duration::from_secs(1))
+                .expect("time is ordinal");
+        }
         peers.push(peer);
         Ok(())
     }

From 5bbfe26a05d0ae833121db8aa693b84e9e72a496 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 28 Jan 2025 10:37:12 +0800
Subject: [PATCH 107/119] nit: change sync_data to sync_metadata table name;
 misc nits.

---
 zilliqa/src/db.rs      | 32 ++++++++++++++++----------------
 zilliqa/src/message.rs |  5 +----
 zilliqa/src/sync.rs    |  9 +++------
 3 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/zilliqa/src/db.rs b/zilliqa/src/db.rs
index b0ca82f97..b4d1b8e77 100644
--- a/zilliqa/src/db.rs
+++ b/zilliqa/src/db.rs
@@ -329,7 +329,7 @@ impl Db {
             ",
         )?;
         connection.execute_batch(
-            "CREATE TEMP TABLE IF NOT EXISTS sync_data (
+            "CREATE TEMP TABLE IF NOT EXISTS sync_metadata (
             block_hash BLOB NOT NULL UNIQUE,
             parent_hash BLOB NOT NULL,
             block_number INTEGER NOT NULL PRIMARY KEY,
@@ -338,7 +338,7 @@ impl Db {
             version INTEGER DEFAULT 0,
             peer BLOB DEFAULT NULL
         );
-        CREATE INDEX IF NOT EXISTS idx_sync_data ON sync_data(block_number) WHERE peer IS NOT NULL;",
+        CREATE INDEX IF NOT EXISTS idx_sync_metadata ON sync_metadata(block_number) WHERE peer IS NOT NULL;",
         )?;
 
         Ok(())
@@ -361,20 +361,20 @@ impl Db {
             .db
             .lock()
             .unwrap()
-            .prepare_cached("SELECT COUNT(block_number) FROM sync_data WHERE peer IS NOT NULL")?
+            .prepare_cached("SELECT COUNT(block_number) FROM sync_metadata WHERE peer IS NOT NULL")?
             .query_row([], |row| row.get(0))
             .optional()?
             .unwrap_or_default())
     }
 
     /// Checks if the stored metadata exists
-    pub fn contains_sync_metadata(&self, hash: &Hash) -> Result<bool> {
+    pub fn contains_sync_metadata(&self, block_hash: &Hash) -> Result<bool> {
         Ok(self
             .db
             .lock()
             .unwrap()
-            .prepare_cached("SELECT block_number FROM sync_data WHERE block_hash = ?1")?
-            .query_row([hash], |row| row.get::<_, u64>(0))
+            .prepare_cached("SELECT parent_hash FROM sync_metadata WHERE block_hash = ?1")?
+            .query_row([block_hash], |row| row.get::<_, Hash>(0))
             .optional()?
             .is_some())
     }
@@ -387,7 +387,7 @@ impl Db {
         let mut block_hash = hash;
 
         while let Some(parent_hash) = db
-            .prepare_cached("SELECT parent_hash FROM sync_data WHERE block_hash = ?1")?
+            .prepare_cached("SELECT parent_hash FROM sync_metadata WHERE block_hash = ?1")?
             .query_row([block_hash], |row| row.get::<_, Hash>(0))
             .optional()?
         {
@@ -400,7 +400,7 @@ impl Db {
     /// Peeks into the top of the segment stack.
     pub fn last_sync_segment(&self) -> Result<Option<(BlockHeader, PeerInfo)>> {
         let db = self.db.lock().unwrap();
-        let r = db.prepare_cached("SELECT parent_hash, block_hash, block_number, view_number, gas_used, version, peer FROM sync_data WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
+        let r = db.prepare_cached("SELECT parent_hash, block_hash, block_number, view_number, gas_used, version, peer FROM sync_metadata WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
         .query_row([], |row| Ok((
             BlockHeader::from_meta_data(row.get(0)?,row.get(1)?, row.get(2)?, row.get(3)?, row.get(4)?),
         PeerInfo {
@@ -416,7 +416,7 @@ impl Db {
     pub fn push_sync_segment(&self, peer: PeerInfo, meta: BlockHeader) -> Result<()> {
         let db = self.db.lock().unwrap();
         db.prepare_cached(
-                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number, gas_used, version, peer) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :gas_used, :version, :peer)")?
+                "INSERT OR REPLACE INTO sync_metadata (parent_hash, block_hash, block_number, view_number, gas_used, version, peer) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :gas_used, :version, :peer)")?
                 .execute(
                 named_params! {
                     ":parent_hash": meta.qc.block_hash,
@@ -438,7 +438,7 @@ impl Db {
 
         for meta in metas {
             tx.prepare_cached(
-                "INSERT OR REPLACE INTO sync_data (parent_hash, block_hash, block_number, view_number, gas_used) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :gas_used)")?
+                "INSERT OR REPLACE INTO sync_metadata (parent_hash, block_hash, block_number, view_number, gas_used) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :gas_used)")?
                 .execute(
                 named_params! {
                     ":parent_hash": meta.qc.block_hash,
@@ -457,7 +457,7 @@ impl Db {
         self.db
             .lock()
             .unwrap()
-            .execute("DELETE FROM sync_data", [])?;
+            .execute("DELETE FROM sync_metadata", [])?;
         Ok(())
     }
 
@@ -466,14 +466,14 @@ impl Db {
         let mut db = self.db.lock().unwrap();
         let c = db.transaction()?;
 
-        if let Some(block_hash) = c.prepare_cached("SELECT block_hash FROM sync_data WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
+        if let Some(block_hash) = c.prepare_cached("SELECT block_hash FROM sync_metadata WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
         .query_row([], |row| row.get::<_,Hash>(0)).optional()? {
-            if let Some(parent_hash) = c.prepare_cached("SELECT parent_hash FROM sync_data WHERE block_hash = ?1")?
+            if let Some(parent_hash) = c.prepare_cached("SELECT parent_hash FROM sync_metadata WHERE block_hash = ?1")?
             .query_row([block_hash], |row| row.get(0)).optional()? {
 
             // update marker
             c.prepare_cached(
-                "UPDATE sync_data SET peer = NULL WHERE block_hash = ?1")?
+                "UPDATE sync_metadata SET peer = NULL WHERE block_hash = ?1")?
                 .execute(
                 [block_hash]
             )?;
@@ -482,7 +482,7 @@ impl Db {
             let mut hashes = Vec::new();
             let mut block_hash = parent_hash;
             while let Some(parent_hash) = c
-                    .prepare_cached("SELECT parent_hash FROM sync_data WHERE block_hash = ?1")?
+                    .prepare_cached("SELECT parent_hash FROM sync_metadata WHERE block_hash = ?1")?
                     .query_row([block_hash], |row| row.get::<_, Hash>(0))
                     .optional()?
                 {
@@ -491,7 +491,7 @@ impl Db {
                 }
 
             for hash in hashes {
-                c.prepare_cached("DELETE FROM sync_data WHERE block_hash = ?1")?
+                c.prepare_cached("DELETE FROM sync_metadata WHERE block_hash = ?1")?
                 .execute([hash])?;
             }
             }
diff --git a/zilliqa/src/message.rs b/zilliqa/src/message.rs
index 8f4a9b28c..9e1088d57 100644
--- a/zilliqa/src/message.rs
+++ b/zilliqa/src/message.rs
@@ -274,8 +274,7 @@ pub enum ExternalMessage {
     /// An acknowledgement of the receipt of a message. Note this is only used as a response when the caller doesn't
     /// require any data in the response.
     Acknowledgement,
-    AddPeer,
-    RemovePeer,
+    /// The following are used for the new sync protocol
     InjectedProposal(InjectedProposal),
     MetaDataRequest(RequestBlocksByHeight),
     MetaDataResponse(Vec<BlockHeader>),
@@ -315,8 +314,6 @@ impl Display for ExternalMessage {
             ExternalMessage::InjectedProposal(p) => {
                 write!(f, "InjectedProposal {}", p.block.number())
             }
-            ExternalMessage::AddPeer => write!(f, "AddPeer"),
-            ExternalMessage::RemovePeer => write!(f, "RemovePeer"),
             ExternalMessage::Proposal(p) => write!(f, "Proposal({})", p.view()),
             ExternalMessage::Vote(v) => write!(f, "Vote({})", v.view),
             ExternalMessage::NewView(n) => write!(f, "NewView({})", n.view),
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index b5b829063..fe6e59c33 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -245,10 +245,7 @@ impl Sync {
                 }
             }
             _ => {
-                tracing::debug!(
-                    "sync::SyncProposal : syncing {} blocks in pipeline",
-                    self.in_pipeline
-                );
+                tracing::debug!("sync::SyncProposal : syncing {} blocks", self.in_pipeline);
             }
         }
 
@@ -442,7 +439,7 @@ impl Sync {
         // Early exit if there's a request in-flight; and if it has not expired.
         if self.in_flight.is_some() || self.in_pipeline > self.max_blocks_in_flight {
             tracing::warn!(
-                "sync::RequestMissingBlocks : syncing {}/{} blocks in pipeline",
+                "sync::RequestMissingBlocks : syncing {}/{} blocks",
                 self.in_pipeline,
                 self.max_blocks_in_flight
             );
@@ -776,7 +773,7 @@ impl Sync {
         if self.in_flight.is_some() || self.in_pipeline > self.max_batch_size {
             // anything more than this and we cannot be sure whether the segment hits history
             tracing::warn!(
-                "sync::RequestMissingMetadata : syncing {}/{} blocks in pipeline",
+                "sync::RequestMissingMetadata : syncing {}/{} blocks",
                 self.in_pipeline,
                 self.max_batch_size
             );

From 9b28c959236d51bf56ded9e1b4ce87fede6210c1 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 28 Jan 2025 11:01:37 +0800
Subject: [PATCH 108/119] feat: added place-holder for active/passive sync.

---
 zilliqa/src/consensus.rs    |  4 ++--
 zilliqa/src/node.rs         |  2 +-
 zilliqa/src/sync.rs         | 24 ++++++++++++++++++++----
 zilliqa/tests/it/staking.rs |  2 +-
 4 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index 9487ae98b..9b80e9510 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -3130,8 +3130,8 @@ impl Consensus {
         trace!("request_missing_blocks from timer");
 
         // TODO: Drive passive-sync from Timeouts
-        if self.sync.am_syncing()? {
-            self.sync.sync_internal()?;
+        if !self.sync.am_syncing()? {
+            self.sync.sync_to_genesis()?;
         } else {
             trace!("not syncing ...");
         }
diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index c1dccc01f..5e50e14b5 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -909,7 +909,7 @@ impl Node {
                 self.message_sender.broadcast_proposal(message)?;
             }
         } else {
-            self.consensus.sync.sync_proposal(proposal)?; // proposal is already verified
+            self.consensus.sync.sync_from_proposal(proposal)?; // proposal is already verified
         }
 
         Ok(())
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index fe6e59c33..f284e26e7 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -133,16 +133,27 @@ impl Sync {
         })
     }
 
+    /// P2P Failure
+    ///
+    /// This gets called for any libp2p request failure.
     pub fn handle_request_failure(&mut self, failure: OutgoingMessageFailure) -> Result<()> {
         // chekc if the request is a sync messages
         if let Some((peer, req_id)) = self.in_flight.as_ref() {
             // downgrade peer due to timeout
             if peer.peer_id == failure.peer && *req_id == failure.request_id {
                 tracing::warn!(to = %peer.peer_id, err = %failure.error,
-                    "sync::RequestFailure : in-flight request failed"
+                    "sync::RequestFailure : in-flight failed"
                 );
                 self.peers
                     .done_with_peer(self.in_flight.take(), DownGrade::Timeout);
+                // Retry if failed in Phase 2 for whatever reason
+                match self.state {
+                    SyncState::Phase1(_) if Self::DO_SPECULATIVE => {
+                        self.request_missing_metadata(None)?
+                    }
+                    SyncState::Phase2(_) => self.state = SyncState::Retry1,
+                    _ => {}
+                }
             }
         }
         Ok(())
@@ -156,17 +167,22 @@ impl Sync {
     /// If we find its parent in history, we inject the entire queue. Otherwise, we start syncing.
     ///
     /// We do not perform checks on the Proposal here. This is done in the consensus layer.
-    pub fn sync_proposal(&mut self, proposal: Proposal) -> Result<()> {
+    pub fn sync_from_proposal(&mut self, proposal: Proposal) -> Result<()> {
         // just stuff the latest proposal into the fixed-size queue.
         while self.recent_proposals.len() >= self.max_batch_size {
             self.recent_proposals.pop_front();
         }
         self.recent_proposals.push_back(proposal);
 
-        self.sync_internal()
+        self.internal_sync()
+    }
+
+    // TODO: Passive-sync place-holder
+    pub fn sync_to_genesis(&mut self) -> Result<()> {
+        Ok(())
     }
 
-    pub fn sync_internal(&mut self) -> Result<()> {
+    fn internal_sync(&mut self) -> Result<()> {
         if self.recent_proposals.is_empty() {
             // Do nothing if there's no recent proposals.
             tracing::debug!("sync::Internal : missing recent proposals");
diff --git a/zilliqa/tests/it/staking.rs b/zilliqa/tests/it/staking.rs
index b423ffcde..966662f21 100644
--- a/zilliqa/tests/it/staking.rs
+++ b/zilliqa/tests/it/staking.rs
@@ -532,7 +532,7 @@ async fn validators_can_join_and_become_proposer(mut network: Network) {
 
     // Give new node time to catch up to block including deposit_v3 deployment
     network
-        .run_until_block(&staker_wallet, 24.into(), 200)
+        .run_until_block(&staker_wallet, 24.into(), 424)
         .await;
 
     let deposit_hash = deposit_v3_stake(

From c5d9b92c0e00b982e6e8dfa0923b03ad4a29d3b6 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 28 Jan 2025 11:44:11 +0800
Subject: [PATCH 109/119] fix #2227; and remove txn.verify() during Phase 2 -
 it is checked during Injection.

---
 zilliqa/src/sync.rs | 22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index f284e26e7..a4083a066 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -321,23 +321,6 @@ impl Sync {
         &mut self,
         from: PeerId,
         response: Vec<Proposal>,
-    ) -> Result<()> {
-        // Verify transactions on the client-side
-        let proposals = response
-            .into_iter()
-            .map(|p| {
-                let (b, t) = p.into_parts();
-                let txns = t.into_iter().map(|t| t.verify().unwrap()).collect_vec();
-                Proposal::from_parts(b, txns)
-            })
-            .collect_vec();
-        self.inner_handle_multiblock_response(from, proposals)
-    }
-
-    pub fn inner_handle_multiblock_response(
-        &mut self,
-        from: PeerId,
-        response: Vec<Proposal>,
     ) -> Result<()> {
         if let Some((peer, _)) = self.in_flight.as_ref() {
             if peer.peer_id != from {
@@ -399,7 +382,7 @@ impl Sync {
             .collect_vec();
 
         self.db.pop_sync_segment()?;
-        self.inject_proposals(proposals)?;
+        self.inject_proposals(proposals)?; // txns are verified when processing InjectedProposal.
 
         // Done with phase 2
         if self.db.count_sync_segments()? == 0 {
@@ -614,7 +597,7 @@ impl Sync {
                     .sorted_by(|a, b| b.number().cmp(&a.number()))
                     .collect_vec();
 
-                self.inner_handle_multiblock_response(from, multi_blocks)?;
+                self.handle_multiblock_response(from, multi_blocks)?;
             }
             _ => {
                 tracing::error!(
@@ -929,7 +912,6 @@ impl Sync {
     pub fn am_syncing(&self) -> Result<bool> {
         Ok(self.in_pipeline != 0
             || !matches!(self.state, SyncState::Phase0)
-            || !self.recent_proposals.is_empty()
             || self.db.count_sync_segments()? != 0)
     }
 

From dae52d57f1655618ba79a0d3b84c3bcbdf0ce97b Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 28 Jan 2025 12:24:12 +0800
Subject: [PATCH 110/119] feat: place-holder to store old ZIL txn blocks.

---
 zilliqa/src/sync.rs | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index a4083a066..e0545304b 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -22,7 +22,7 @@ use crate::{
     },
     node::{MessageSender, OutgoingMessageFailure, RequestId},
     time::SystemTime,
-    transaction::EvmGas,
+    transaction::{EvmGas, SignedTransaction},
 };
 
 // Syncing Algorithm
@@ -874,19 +874,27 @@ impl Sync {
 
         // Just pump the Proposals back to ourselves.
         for p in proposals {
-            tracing::trace!(
-                "sync::InjectProposals : injecting number: {} hash: {}",
-                p.number(),
-                p.hash(),
-            );
-
-            self.message_sender.send_external_message(
-                self.peer_id,
-                ExternalMessage::InjectedProposal(InjectedProposal {
-                    from: self.peer_id,
-                    block: p,
-                }),
-            )?;
+            if !p
+                .transactions
+                .iter()
+                .any(|t| matches!(t, SignedTransaction::Zilliqa { .. }))
+            {
+                tracing::trace!(
+                    number = %p.number(), hash = %p.hash(),
+                    "sync::InjectProposals : applying",
+                );
+                self.message_sender.send_external_message(
+                    self.peer_id,
+                    ExternalMessage::InjectedProposal(InjectedProposal {
+                        from: self.peer_id,
+                        block: p,
+                    }),
+                )?;
+            } else {
+                tracing::warn!(number = %p.number(), hash = %p.hash(), "sync::InjectProposals : storing");
+                // TODO: just store old ZIL blocks
+                todo!("store ZIL block");
+            }
         }
 
         self.inject_at = Some((std::time::Instant::now(), self.in_pipeline));

From 0c3614a09ca2e7d7580c9daacd5e34448673a273 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 28 Jan 2025 12:30:14 +0800
Subject: [PATCH 111/119] nit: simplify run_until_synced();

---
 zilliqa/src/sync.rs      | 22 ++++++++++++----------
 zilliqa/tests/it/main.rs |  9 ++-------
 2 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index e0545304b..47a22262e 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -177,7 +177,7 @@ impl Sync {
         self.internal_sync()
     }
 
-    // TODO: Passive-sync place-holder
+    // TODO: Passive-sync place-holder - https://github.com/Zilliqa/zq2/issues/2232
     pub fn sync_to_genesis(&mut self) -> Result<()> {
         Ok(())
     }
@@ -764,6 +764,8 @@ impl Sync {
     /// This constructs a chain history by requesting blocks from a peer, going backwards from a given block.
     /// If Phase 1 is in progress, it continues requesting blocks from the last known Phase 1 block.
     /// Otherwise, it requests blocks from the given starting metadata.
+    ///
+    /// TODO: speed it up - https://github.com/Zilliqa/zq2/issues/2158
     pub fn request_missing_metadata(&mut self, meta: Option<BlockHeader>) -> Result<()> {
         if !matches!(self.state, SyncState::Phase1(_)) && !matches!(self.state, SyncState::Phase0) {
             anyhow::bail!("sync::RequestMissingMetadata : invalid state");
@@ -883,18 +885,18 @@ impl Sync {
                     number = %p.number(), hash = %p.hash(),
                     "sync::InjectProposals : applying",
                 );
-                self.message_sender.send_external_message(
-                    self.peer_id,
-                    ExternalMessage::InjectedProposal(InjectedProposal {
-                        from: self.peer_id,
-                        block: p,
-                    }),
-                )?;
             } else {
                 tracing::warn!(number = %p.number(), hash = %p.hash(), "sync::InjectProposals : storing");
-                // TODO: just store old ZIL blocks
-                todo!("store ZIL block");
+                // TODO: just store old ZIL blocks - https://github.com/Zilliqa/zq2/issues/2232
             }
+
+            self.message_sender.send_external_message(
+                self.peer_id,
+                ExternalMessage::InjectedProposal(InjectedProposal {
+                    from: self.peer_id,
+                    block: p,
+                }),
+            )?;
         }
 
         self.inject_at = Some((std::time::Instant::now(), self.in_pipeline));
diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs
index c90064ef7..47bd10434 100644
--- a/zilliqa/tests/it/main.rs
+++ b/zilliqa/tests/it/main.rs
@@ -1061,17 +1061,12 @@ impl Network {
                 break i;
             }
         };
-        let mut debounce = 0;
-        let mut old_height = 0;
         self.run_until(
             |net| {
+                let syncing = net.get_node(index).consensus.sync.am_syncing().unwrap();
                 let height_i = net.get_node(index).get_finalized_height().unwrap();
                 let height_c = net.get_node(check).get_finalized_height().unwrap();
-                if height_c == height_i && height_i > old_height {
-                    debounce += 1;
-                    old_height = height_i;
-                }
-                debounce == 3
+                height_c == height_i && height_i > 0 && !syncing
             },
             2000,
         )

From d0d3f00203c6b885fec79bdd0b3f58b081579e3d Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 28 Jan 2025 15:27:40 +0800
Subject: [PATCH 112/119] fix: flaw in get_next_peer().

---
 zilliqa/src/sync.rs | 45 +++++++++++++++------------------------------
 1 file changed, 15 insertions(+), 30 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 47a22262e..a72cd3f5d 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -504,10 +504,7 @@ impl Sync {
                 self.in_flight = Some((peer_info, request_id));
             }
         } else {
-            tracing::warn!(
-                "sync::RequestMissingBlocks : {} insufficient peers to handle request",
-                self.peers.len()
-            );
+            tracing::warn!("sync::RequestMissingBlocks : insufficient peers to handle request");
         }
         Ok(())
     }
@@ -842,10 +839,7 @@ impl Sync {
                 .send_external_message(peer_info.peer_id, message)?;
             self.in_flight = Some((peer_info, request_id));
         } else {
-            tracing::warn!(
-                "sync::RequestMissingBlocks : {} insufficient peers to handle request",
-                self.peers.len()
-            );
+            tracing::warn!("sync::RequestMissingBlocks : insufficient peers to handle request",);
         }
         Ok(())
     }
@@ -996,11 +990,10 @@ impl SyncPeers {
     /// Add bulk peers
     pub fn add_peers(&self, peers: Vec<PeerId>) {
         tracing::debug!("sync::AddPeers {:?}", peers);
-        for peer in peers {
-            if peer != self.peer_id {
-                self.add_peer(peer);
-            }
-        }
+        peers
+            .into_iter()
+            .filter(|p| *p != self.peer_id)
+            .for_each(|p| self.add_peer(p));
     }
 
     /// Add a peer to the list of peers.
@@ -1014,7 +1007,7 @@ impl SyncPeers {
             peer_id: peer,
             last_used: Instant::now(),
         };
-        // ensure that it is unique - avoids single source of truth
+        // ensure that it is unique
         peers.retain(|p: &PeerInfo| p.peer_id != peer);
         peers.push(new_peer);
 
@@ -1029,17 +1022,17 @@ impl SyncPeers {
     }
 
     /// Get the next best peer to use
-    pub fn get_next_peer(&self) -> Option<PeerInfo> {
-        let mut peer = self.peers.lock().unwrap().pop()?;
-        peer.last_used = std::time::Instant::now();
-        // dynamic sizing should not be needed, if we're syncing recent blocks.
-        // self.max_batch_size = self.dynamic_batch_sizing(&peer);
-        tracing::trace!("sync::GetNextPeer {} ({})", peer.peer_id, peer.score);
-        Some(peer)
+    fn get_next_peer(&self) -> Option<PeerInfo> {
+        if let Some(mut peer) = self.peers.lock().unwrap().pop() {
+            peer.last_used = std::time::Instant::now();
+            tracing::trace!(peer = % peer.peer_id, score= %peer.score, "sync::GetNextPeer");
+            return Some(peer);
+        }
+        None
     }
 
     /// Reinserts the peer such that it is at the front of the queue.
-    pub fn reinsert_peer(&self, peer: PeerInfo) -> Result<()> {
+    fn reinsert_peer(&self, peer: PeerInfo) -> Result<()> {
         if peer.score == u32::MAX {
             return Ok(());
         }
@@ -1057,14 +1050,6 @@ impl SyncPeers {
         peers.push(peer);
         Ok(())
     }
-
-    pub fn len(&self) -> usize {
-        self.peers.lock().unwrap().len()
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.peers.lock().unwrap().is_empty()
-    }
 }
 
 #[derive(Debug, Clone, Eq, PartialEq)]

From dba0235a927e34ed1d3b8462890e08bb4653ee11 Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Tue, 28 Jan 2025 15:37:54 +0800
Subject: [PATCH 113/119] feat: early prototype for issue #1878.

---
 zilliqa/src/node.rs |  4 +++-
 zilliqa/src/sync.rs | 42 +++++++++++++++++++++++++++++++++++++-----
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs
index 5e50e14b5..f02d7fa46 100644
--- a/zilliqa/src/node.rs
+++ b/zilliqa/src/node.rs
@@ -350,7 +350,9 @@ impl Node {
             ExternalMessage::BlockResponse(response) => {
                 self.consensus.sync.handle_block_response(from, response)?
             }
-            ExternalMessage::Acknowledgement => {}
+            ExternalMessage::Acknowledgement => {
+                self.consensus.sync.handle_acknowledgement(from)?;
+            }
             msg => {
                 warn!(%msg, "unexpected message type");
             }
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index a72cd3f5d..19bca2ede 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -133,13 +133,42 @@ impl Sync {
         })
     }
 
+    /// Skip Failure
+    ///
+    /// We get a plain ACK in certain cases - treated as an empty response.
+    pub fn handle_acknowledgement(&mut self, from: PeerId) -> Result<()> {
+        if let Some((peer, _)) = self.in_flight.as_ref() {
+            // downgrade peer due to empty response
+            if peer.peer_id == from {
+                tracing::warn!(to = %peer.peer_id,
+                    "sync::Acknowledgement : empty response"
+                );
+                self.peers
+                    .done_with_peer(self.in_flight.take(), DownGrade::Empty);
+                // Retry if failed in Phase 2 for whatever reason
+                match self.state {
+                    SyncState::Phase1(_) if Self::DO_SPECULATIVE => {
+                        self.request_missing_metadata(None)?
+                    }
+                    SyncState::Phase2(_) => self.state = SyncState::Retry1,
+                    _ => {}
+                }
+            } else {
+                tracing::warn!(to = %peer.peer_id,
+                    "sync::Acknowledgement : spurious"
+                );
+            }
+        }
+        Ok(())
+    }
+
     /// P2P Failure
     ///
-    /// This gets called for any libp2p request failure.
+    /// This gets called for any libp2p request failure - treated as a network failure
     pub fn handle_request_failure(&mut self, failure: OutgoingMessageFailure) -> Result<()> {
-        // chekc if the request is a sync messages
+        // check if the request is a sync messages
         if let Some((peer, req_id)) = self.in_flight.as_ref() {
-            // downgrade peer due to timeout
+            // downgrade peer due to network failure
             if peer.peer_id == failure.peer && *req_id == failure.request_id {
                 tracing::warn!(to = %peer.peer_id, err = %failure.error,
                     "sync::RequestFailure : in-flight failed"
@@ -154,6 +183,10 @@ impl Sync {
                     SyncState::Phase2(_) => self.state = SyncState::Retry1,
                     _ => {}
                 }
+            } else {
+                tracing::warn!(to = %peer.peer_id,
+                    "sync::RequestFailure : spurious"
+                );
             }
         }
         Ok(())
@@ -726,8 +759,7 @@ impl Sync {
             return Ok(ExternalMessage::Acknowledgement);
         }
 
-        // TODO: Check if we should service this request
-        // Validators could respond to this request if there is nothing else to do.
+        // TODO: Check if we should service this request - https://github.com/Zilliqa/zq2/issues/1878
 
         let batch_size: usize = self
             .max_batch_size

From 5dce6ae16fde4b3c0bae05c50bf27fe4c13d994c Mon Sep 17 00:00:00 2001
From: James Hinshelwood <jameshinshelwood1@gmail.com>
Date: Wed, 29 Jan 2025 12:06:14 +0000
Subject: [PATCH 114/119] Delete all non-finalized blocks from database at
 startup

Previously we only deleted 'canonical' blocks.
---
 zilliqa/src/consensus.rs | 12 +++++-------
 zilliqa/src/db.rs        | 25 ++++++++++---------------
 2 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs
index 9b80e9510..9718ad13e 100644
--- a/zilliqa/src/consensus.rs
+++ b/zilliqa/src/consensus.rs
@@ -277,21 +277,19 @@ impl Consensus {
                     // If we have newer blocks, erase them
                     // @todo .. more elegantly :-)
                     loop {
-                        let highest_block_number = db
-                            .get_highest_canonical_block_number()?
-                            .ok_or_else(|| anyhow!("can't find highest block num in database!"))?;
                         let head_block = db
-                            .get_canonical_block_by_number(highest_block_number)?
-                            .ok_or_else(|| anyhow!("missing head block!"))?;
+                            .get_highest_recorded_block()?
+                            .ok_or_else(|| anyhow!("can't find highest block in database!"))?;
                         trace!(
-                            "recovery: highest_block_number {highest_block_number} view {0}",
+                            "recovery: highest_block_number {} view {}",
+                            head_block.number(),
                             head_block.view()
                         );
 
                         if head_block.view() > high_block.view()
                             && head_block.view() > finalized_number
                         {
-                            trace!("recovery: stored block {0} reverted", highest_block_number);
+                            trace!("recovery: stored block {0} reverted", head_block.number());
                             db.remove_transactions_executed_in_block(&head_block.hash())?;
                             db.remove_block(&head_block)?;
                         } else {
diff --git a/zilliqa/src/db.rs b/zilliqa/src/db.rs
index b4d1b8e77..3bb0a30eb 100644
--- a/zilliqa/src/db.rs
+++ b/zilliqa/src/db.rs
@@ -177,6 +177,7 @@ enum BlockFilter {
     Hash(Hash),
     View(u64),
     Height(u64),
+    MaxHeight,
 }
 
 const CHECKPOINT_HEADER_BYTES: [u8; 8] = *b"ZILCHKPT";
@@ -781,19 +782,6 @@ impl Db {
             .unwrap_or(None))
     }
 
-    // Deliberately not named get_highest_block_number() because there used to be one
-    // of those with unclear semantics, so changing name to force the compiler to error
-    // if it was used.
-    pub fn get_highest_recorded_block_number(&self) -> Result<Option<u64>> {
-        Ok(self
-            .db
-            .lock()
-            .unwrap()
-            .prepare_cached("SELECT height FROM blocks ORDER BY height DESC LIMIT 1")?
-            .query_row((), |row| row.get(0))
-            .optional()?)
-    }
-
     pub fn get_highest_canonical_block_number(&self) -> Result<Option<u64>> {
         Ok(self
             .db
@@ -1028,8 +1016,8 @@ impl Db {
             })
         }
         macro_rules! query_block {
-            ($cond: tt, $key: tt) => {
-                self.db.lock().unwrap().prepare_cached(concat!("SELECT block_hash, view, height, qc, signature, state_root_hash, transactions_root_hash, receipts_root_hash, timestamp, gas_used, gas_limit, agg FROM blocks WHERE ", $cond),)?.query_row([$key], make_block).optional()?
+            ($cond: tt $(, $key:tt)*) => {
+                self.db.lock().unwrap().prepare_cached(concat!("SELECT block_hash, view, height, qc, signature, state_root_hash, transactions_root_hash, receipts_root_hash, timestamp, gas_used, gas_limit, agg FROM blocks WHERE ", $cond),)?.query_row([$($key),*], make_block).optional()?
             };
         }
         Ok(match filter {
@@ -1042,6 +1030,9 @@ impl Db {
             BlockFilter::Height(height) => {
                 query_block!("height = ?1 AND is_canonical = TRUE", height)
             }
+            BlockFilter::MaxHeight => {
+                query_block!("TRUE ORDER BY height DESC LIMIT 1")
+            }
         })
     }
 
@@ -1072,6 +1063,10 @@ impl Db {
         self.get_block(BlockFilter::Height(number))
     }
 
+    pub fn get_highest_recorded_block(&self) -> Result<Option<Block>> {
+        self.get_block(BlockFilter::MaxHeight)
+    }
+
     pub fn contains_block(&self, block_hash: &Hash) -> Result<bool> {
         Ok(self
             .db

From 516fc63b6d7f2e20bc8d0ad0f7dbade1a998fd62 Mon Sep 17 00:00:00 2001
From: James Hinshelwood <jameshinshelwood1@gmail.com>
Date: Wed, 29 Jan 2025 17:29:43 +0000
Subject: [PATCH 115/119] Don't fail benchmark workflows on alert

---
 .github/workflows/base_benchmarks.yaml | 1 -
 .github/workflows/pr_benchmarks.yaml   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/.github/workflows/base_benchmarks.yaml b/.github/workflows/base_benchmarks.yaml
index 69c20fff5..5d0912ae1 100644
--- a/.github/workflows/base_benchmarks.yaml
+++ b/.github/workflows/base_benchmarks.yaml
@@ -33,7 +33,6 @@ jobs:
           --threshold-max-sample-size 64 \
           --threshold-upper-boundary 0.99 \
           --thresholds-reset \
-          --err \
           --adapter rust_criterion \
           --github-actions '${{ secrets.GITHUB_TOKEN }}' \
           cargo bench
diff --git a/.github/workflows/pr_benchmarks.yaml b/.github/workflows/pr_benchmarks.yaml
index 305e99d16..de51b72e5 100644
--- a/.github/workflows/pr_benchmarks.yaml
+++ b/.github/workflows/pr_benchmarks.yaml
@@ -34,7 +34,6 @@ jobs:
           --start-point-clone-thresholds \
           --start-point-reset \
           --testbed self-hosted \
-          --err \
           --adapter rust_criterion \
           --github-actions '${{ secrets.GITHUB_TOKEN }}' \
           cargo bench

From 770465f5540e2229ccd03f78f72d8b8b73ebc41c Mon Sep 17 00:00:00 2001
From: James Hinshelwood <jameshinshelwood1@gmail.com>
Date: Wed, 29 Jan 2025 17:45:28 +0000
Subject: [PATCH 116/119] Remove redundant config

---
 zilliqa/src/p2p_node.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/zilliqa/src/p2p_node.rs b/zilliqa/src/p2p_node.rs
index 21737468e..24cc271ad 100644
--- a/zilliqa/src/p2p_node.rs
+++ b/zilliqa/src/p2p_node.rs
@@ -112,8 +112,7 @@ impl P2pNode {
                 Ok(Behaviour {
                     request_response: request_response::cbor::Behaviour::new(
                         iter::once((StreamProtocol::new("/zq2-message/1"), ProtocolSupport::Full)),
-                        request_response::Config::default()
-                            .with_request_timeout(Duration::from_secs(10)),
+                        Default::default(),
                     ),
                     gossipsub: gossipsub::Behaviour::new(
                         MessageAuthenticity::Signed(key_pair.clone()),

From eb42fbb4d45f45f42cc4c0af703bacf8e8b19ff4 Mon Sep 17 00:00:00 2001
From: James Hinshelwood <jameshinshelwood1@gmail.com>
Date: Thu, 30 Jan 2025 17:07:57 +0000
Subject: [PATCH 117/119] Hide listen addrs

---
 zilliqa/src/p2p_node.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/zilliqa/src/p2p_node.rs b/zilliqa/src/p2p_node.rs
index 24cc271ad..05a130181 100644
--- a/zilliqa/src/p2p_node.rs
+++ b/zilliqa/src/p2p_node.rs
@@ -135,8 +135,7 @@ impl P2pNode {
                     // So, the nodes are unable to see each other directly and remain isolated, defeating kademlia and autonat.
                     identify: identify::Behaviour::new(
                         identify::Config::new("zilliqa/1.0.0".into(), key_pair.public())
-                            .with_hide_listen_addrs(false)
-                            .with_push_listen_addr_updates(true),
+                            .with_hide_listen_addrs(true),
                     ),
                 })
             })?

From b10602e9c99b6ef80c9c1e2a77b9f5ecba735b2d Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Fri, 31 Jan 2025 13:06:07 +0800
Subject: [PATCH 118/119] feat: store raw header blob in sync_metadata().

---
 zilliqa/src/db.rs   | 23 ++++++++++-------------
 zilliqa/src/sync.rs | 46 ++++++++-------------------------------------
 2 files changed, 18 insertions(+), 51 deletions(-)

diff --git a/zilliqa/src/db.rs b/zilliqa/src/db.rs
index b52d4ece2..f8fa197da 100644
--- a/zilliqa/src/db.rs
+++ b/zilliqa/src/db.rs
@@ -341,10 +341,9 @@ impl Db {
             block_hash BLOB NOT NULL UNIQUE,
             parent_hash BLOB NOT NULL,
             block_number INTEGER NOT NULL PRIMARY KEY,
-            view_number INTEGER NOT NULL,
-            gas_used INTEGER NOT NULL,
             version INTEGER DEFAULT 0,
-            peer BLOB DEFAULT NULL
+            peer BLOB DEFAULT NULL,
+            rawdata BLOB NOT NULL
         );
         CREATE INDEX IF NOT EXISTS idx_sync_metadata ON sync_metadata(block_number) WHERE peer IS NOT NULL;",
         )?;
@@ -408,14 +407,14 @@ impl Db {
     /// Peeks into the top of the segment stack.
     pub fn last_sync_segment(&self) -> Result<Option<(BlockHeader, PeerInfo)>> {
         let db = self.db.lock().unwrap();
-        let r = db.prepare_cached("SELECT parent_hash, block_hash, block_number, view_number, gas_used, version, peer FROM sync_metadata WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
+        let r = db.prepare_cached("SELECT rawdata, version, peer FROM sync_metadata WHERE peer IS NOT NULL ORDER BY block_number ASC LIMIT 1")?
         .query_row([], |row| Ok((
-            BlockHeader::from_meta_data(row.get(0)?,row.get(1)?, row.get(2)?, row.get(3)?, row.get(4)?),
+            serde_json::from_slice(row.get::<_,Vec<u8>>(0)?.as_slice()).unwrap(),
         PeerInfo {
             last_used: Instant::now(),
             score: u32::MAX,
-            version: row.get(5)?,
-            peer_id: PeerId::from_bytes(row.get::<_,Vec<u8>>(6)?.as_slice()).unwrap(),
+            version: row.get(1)?,
+            peer_id: PeerId::from_bytes(row.get::<_,Vec<u8>>(2)?.as_slice()).unwrap(),
         }))).optional()?;
         Ok(r)
     }
@@ -424,16 +423,15 @@ impl Db {
     pub fn push_sync_segment(&self, peer: PeerInfo, meta: BlockHeader) -> Result<()> {
         let db = self.db.lock().unwrap();
         db.prepare_cached(
-                "INSERT OR REPLACE INTO sync_metadata (parent_hash, block_hash, block_number, view_number, gas_used, version, peer) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :gas_used, :version, :peer)")?
+                "INSERT OR REPLACE INTO sync_metadata (parent_hash, block_hash, block_number, version, peer, rawdata) VALUES (:parent_hash, :block_hash, :block_number, :version, :peer, :rawdata)")?
                 .execute(
                 named_params! {
                     ":parent_hash": meta.qc.block_hash,
                     ":block_hash": meta.hash,
                     ":block_number": meta.number,
-                    ":view_number": meta.view,
-                    ":gas_used": meta.gas_used,
                     ":peer": peer.peer_id.to_bytes(),
                     ":version": peer.version,
+                    ":rawdata": serde_json::to_vec(&meta).unwrap(),
                 },
             )?;
         Ok(())
@@ -446,14 +444,13 @@ impl Db {
 
         for meta in metas {
             tx.prepare_cached(
-                "INSERT OR REPLACE INTO sync_metadata (parent_hash, block_hash, block_number, view_number, gas_used) VALUES (:parent_hash, :block_hash, :block_number, :view_number, :gas_used)")?
+                "INSERT OR REPLACE INTO sync_metadata (parent_hash, block_hash, block_number, rawdata) VALUES (:parent_hash, :block_hash, :block_number, :rawdata)")?
                 .execute(
                 named_params! {
                     ":parent_hash": meta.qc.block_hash,
                     ":block_hash": meta.hash,
                     ":block_number": meta.number,
-                    ":view_number": meta.view,
-                    ":gas_used": meta.gas_used,
+                    ":rawdata": serde_json::to_vec(meta).unwrap(),
             })?;
         }
         tx.commit()?;
diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index 19bca2ede..b62746cd9 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -22,7 +22,7 @@ use crate::{
     },
     node::{MessageSender, OutgoingMessageFailure, RequestId},
     time::SystemTime,
-    transaction::{EvmGas, SignedTransaction},
+    transaction::SignedTransaction,
 };
 
 // Syncing Algorithm
@@ -229,17 +229,7 @@ impl Sync {
                 if !self.db.contains_block(&parent_hash)? {
                     // No parent block, trigger sync
                     tracing::warn!("sync::SyncProposal : syncing from {parent_hash}",);
-                    let block_hash = self.recent_proposals.back().unwrap().hash();
-                    let block_number = self.recent_proposals.back().unwrap().number();
-                    let view_number = self.recent_proposals.back().unwrap().view();
-                    let gas_used = self.recent_proposals.back().unwrap().header.gas_used;
-                    let meta = BlockHeader::from_meta_data(
-                        parent_hash,
-                        block_hash,
-                        block_number,
-                        view_number,
-                        gas_used,
-                    );
+                    let meta = self.recent_proposals.back().unwrap().header;
                     self.request_missing_metadata(Some(meta))?;
 
                     let highest_block = self
@@ -707,6 +697,12 @@ impl Sync {
         // Chain segment is sane
         let segment = response;
 
+        // Record the constructed chain metadata
+        self.db.insert_sync_metadata(&segment)?;
+
+        // Record landmark(s), including peer that has this set of blocks
+        self.db.push_sync_segment(segment_peer, *meta)?;
+
         tracing::info!(
             "sync::MetadataResponse : received {} metadata segment #{} from {}",
             segment.len(),
@@ -714,12 +710,6 @@ impl Sync {
             from
         );
 
-        // Record the constructed chain metadata
-        self.db.insert_sync_metadata(&segment)?;
-
-        // Record landmark(s), including peer that has this set of blocks
-        self.db.push_sync_segment(segment_peer, *meta)?;
-
         // Record the oldest block in the chain's parent
         self.state = SyncState::Phase1(segment.last().cloned().unwrap());
         let last_block_hash = segment.last().as_ref().unwrap().hash;
@@ -1161,23 +1151,3 @@ impl ToSql for PeerVer {
         Ok((self.clone() as u32).into())
     }
 }
-
-impl BlockHeader {
-    pub fn from_meta_data(
-        parent_hash: Hash,
-        block_hash: Hash,
-        block_number: u64,
-        view_number: u64,
-        gas_used: EvmGas,
-    ) -> BlockHeader {
-        let mut meta = BlockHeader {
-            gas_used,
-            view: view_number,
-            number: block_number,
-            hash: block_hash,
-            ..Default::default()
-        };
-        meta.qc.block_hash = parent_hash;
-        meta
-    }
-}

From 071d40bef688e0ba02d12a4c05ce14d51716e43e Mon Sep 17 00:00:00 2001
From: Shawn <shawn@zilliqa.com>
Date: Mon, 3 Feb 2025 14:43:29 +0800
Subject: [PATCH 119/119] feat: minor log changes; remove redundant check in
 handle_metadata_response().

---
 zilliqa/src/sync.rs | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/zilliqa/src/sync.rs b/zilliqa/src/sync.rs
index b62746cd9..0d761a883 100644
--- a/zilliqa/src/sync.rs
+++ b/zilliqa/src/sync.rs
@@ -111,10 +111,9 @@ impl Sync {
             SyncState::Retry1 // continue sync
         };
 
-        let latest_block_number = latest_block
+        let (latest_block_number, latest_block_hash) = latest_block
             .as_ref()
-            .expect("Some(block) expected")
-            .number();
+            .map_or_else(|| (u64::MIN, Hash::ZERO), |b| (b.number(), b.hash()));
 
         Ok(Self {
             db,
@@ -129,7 +128,7 @@ impl Sync {
             recent_proposals: VecDeque::with_capacity(max_batch_size),
             inject_at: None,
             started_at_block_number: latest_block_number,
-            checkpoint_hash: Hash::ZERO,
+            checkpoint_hash: latest_block_hash,
         })
     }
 
@@ -171,7 +170,7 @@ impl Sync {
             // downgrade peer due to network failure
             if peer.peer_id == failure.peer && *req_id == failure.request_id {
                 tracing::warn!(to = %peer.peer_id, err = %failure.error,
-                    "sync::RequestFailure : in-flight failed"
+                    "sync::RequestFailure : network error"
                 );
                 self.peers
                     .done_with_peer(self.in_flight.take(), DownGrade::Timeout);
@@ -228,7 +227,7 @@ impl Sync {
                 let parent_hash = self.recent_proposals.back().unwrap().header.qc.block_hash;
                 if !self.db.contains_block(&parent_hash)? {
                     // No parent block, trigger sync
-                    tracing::warn!("sync::SyncProposal : syncing from {parent_hash}",);
+                    tracing::info!("sync::SyncProposal : syncing from {parent_hash}",);
                     let meta = self.recent_proposals.back().unwrap().header;
                     self.request_missing_metadata(Some(meta))?;
 
@@ -371,6 +370,10 @@ impl Sync {
                 .done_with_peer(self.in_flight.take(), DownGrade::None);
         }
 
+        let SyncState::Phase2(check_sum) = self.state else {
+            anyhow::bail!("sync::MultiBlockResponse : invalid state");
+        };
+
         tracing::info!(
             "sync::MultiBlockResponse : received {} blocks for segment #{} from {}",
             response.len(),
@@ -379,10 +382,6 @@ impl Sync {
         );
 
         // If the checksum does not match, retry phase 1. Maybe the node has pruned the segment.
-        let SyncState::Phase2(check_sum) = self.state else {
-            anyhow::bail!("sync::MultiBlockResponse : invalid state");
-        };
-
         let checksum = response
             .iter()
             .fold(Hash::builder().with(Hash::ZERO.as_bytes()), |sum, p| {
@@ -460,7 +459,7 @@ impl Sync {
         }
         // Early exit if there's a request in-flight; and if it has not expired.
         if self.in_flight.is_some() || self.in_pipeline > self.max_blocks_in_flight {
-            tracing::warn!(
+            tracing::debug!(
                 "sync::RequestMissingBlocks : syncing {}/{} blocks",
                 self.in_pipeline,
                 self.max_blocks_in_flight
@@ -667,11 +666,11 @@ impl Sync {
                 .done_with_peer(self.in_flight.take(), DownGrade::None);
         }
 
-        // Check the linkage of the returned chain
         let SyncState::Phase1(meta) = &self.state else {
             anyhow::bail!("sync::MetadataResponse : invalid state");
         };
 
+        // Check the linkage of the returned chain
         let mut block_hash = meta.qc.block_hash;
         let mut block_num = meta.number;
         for meta in response.iter() {
@@ -712,14 +711,13 @@ impl Sync {
 
         // Record the oldest block in the chain's parent
         self.state = SyncState::Phase1(segment.last().cloned().unwrap());
-        let last_block_hash = segment.last().as_ref().unwrap().hash;
 
         // If the checkpoint is in this segment
         let checkpointed = segment.iter().any(|b| b.hash == self.checkpoint_hash);
         let started = self.started_at_block_number <= segment.first().as_ref().unwrap().number
             && self.started_at_block_number >= segment.last().as_ref().unwrap().number;
         // If the segment hits our history, start Phase 2.
-        if started || checkpointed || self.db.contains_block(&last_block_hash)? {
+        if started || checkpointed {
             self.state = SyncState::Phase2(Hash::ZERO);
         } else if Self::DO_SPECULATIVE {
             self.request_missing_metadata(None)?;
@@ -792,7 +790,7 @@ impl Sync {
         // Early exit if there's a request in-flight; and if it has not expired.
         if self.in_flight.is_some() || self.in_pipeline > self.max_batch_size {
             // anything more than this and we cannot be sure whether the segment hits history
-            tracing::warn!(
+            tracing::debug!(
                 "sync::RequestMissingMetadata : syncing {}/{} blocks",
                 self.in_pipeline,
                 self.max_batch_size
@@ -905,7 +903,6 @@ impl Sync {
                 tracing::warn!(number = %p.number(), hash = %p.hash(), "sync::InjectProposals : storing");
                 // TODO: just store old ZIL blocks - https://github.com/Zilliqa/zq2/issues/2232
             }
-
             self.message_sender.send_external_message(
                 self.peer_id,
                 ExternalMessage::InjectedProposal(InjectedProposal {