Skip to content

Commit ac96be4

Browse files
authoredApr 23, 2024
feat: add tracing for remaining consensus messages, abci calls, and peer updates (celestiaorg#1292)
## Description adds tracing for remaining consensus messages, abci calls (prepare, process, and commit), along with adding and removing peers.
1 parent c89f6f9 commit ac96be4

18 files changed

+482
-62
lines changed
 

‎consensus/reactor.go

+175-19
Original file line numberDiff line numberDiff line change
@@ -272,21 +272,55 @@ func (conR *Reactor) ReceiveEnvelope(e p2p.Envelope) {
272272
conR.conS.mtx.Lock()
273273
initialHeight := conR.conS.state.InitialHeight
274274
conR.conS.mtx.Unlock()
275+
schema.WriteConsensusState(
276+
conR.traceClient,
277+
msg.Height,
278+
msg.Round,
279+
string(e.Src.ID()),
280+
schema.ConsensusNewRoundStep,
281+
schema.Download,
282+
fmt.Sprintf("%d", msg.Step),
283+
)
275284
if err = msg.ValidateHeight(initialHeight); err != nil {
276285
conR.Logger.Error("Peer sent us invalid msg", "peer", e.Src, "msg", msg, "err", err)
277286
conR.Switch.StopPeerForError(e.Src, err)
278287
return
279288
}
280289
ps.ApplyNewRoundStepMessage(msg)
281290
case *NewValidBlockMessage:
291+
schema.WriteConsensusState(
292+
conR.traceClient,
293+
msg.Height,
294+
msg.Round,
295+
string(e.Src.ID()),
296+
schema.ConsensusNewValidBlock,
297+
schema.Download,
298+
)
282299
ps.ApplyNewValidBlockMessage(msg)
283300
case *HasVoteMessage:
284301
ps.ApplyHasVoteMessage(msg)
302+
schema.WriteConsensusState(
303+
conR.traceClient,
304+
msg.Height,
305+
msg.Round,
306+
string(e.Src.ID()),
307+
schema.ConsensusHasVote,
308+
schema.Download,
309+
msg.Type.String(),
310+
)
285311
case *VoteSetMaj23Message:
286312
cs := conR.conS
287313
cs.mtx.Lock()
288314
height, votes := cs.Height, cs.Votes
289315
cs.mtx.Unlock()
316+
schema.WriteConsensusState(
317+
conR.traceClient,
318+
msg.Height,
319+
msg.Round,
320+
string(e.Src.ID()),
321+
schema.ConsensusVoteSet23Precommit,
322+
schema.Download,
323+
)
290324
if height != msg.Height {
291325
return
292326
}
@@ -316,10 +350,20 @@ func (conR *Reactor) ReceiveEnvelope(e p2p.Envelope) {
316350
if votes := ourVotes.ToProto(); votes != nil {
317351
eMsg.Votes = *votes
318352
}
319-
p2p.TrySendEnvelopeShim(e.Src, p2p.Envelope{ //nolint: staticcheck
353+
if p2p.TrySendEnvelopeShim(e.Src, p2p.Envelope{ //nolint: staticcheck
320354
ChannelID: VoteSetBitsChannel,
321355
Message: eMsg,
322-
}, conR.Logger)
356+
}, conR.Logger) {
357+
schema.WriteConsensusState(
358+
conR.traceClient,
359+
msg.Height,
360+
msg.Round,
361+
string(e.Src.ID()),
362+
schema.ConsensusVoteSetBits,
363+
schema.Upload,
364+
msg.Type.String(),
365+
)
366+
}
323367
default:
324368
conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg)))
325369
}
@@ -333,12 +377,27 @@ func (conR *Reactor) ReceiveEnvelope(e p2p.Envelope) {
333377
case *ProposalMessage:
334378
ps.SetHasProposal(msg.Proposal)
335379
conR.conS.peerMsgQueue <- msgInfo{msg, e.Src.ID()}
380+
schema.WriteProposal(
381+
conR.traceClient,
382+
msg.Proposal.Height,
383+
msg.Proposal.Round,
384+
string(e.Src.ID()),
385+
schema.Download,
386+
)
336387
case *ProposalPOLMessage:
337388
ps.ApplyProposalPOLMessage(msg)
389+
schema.WriteConsensusState(
390+
conR.traceClient,
391+
msg.Height,
392+
msg.ProposalPOLRound,
393+
string(e.Src.ID()),
394+
schema.ConsensusPOL,
395+
schema.Download,
396+
)
338397
case *BlockPartMessage:
339398
ps.SetHasProposalBlockPart(msg.Height, msg.Round, int(msg.Part.Index))
340399
conR.Metrics.BlockParts.With("peer_id", string(e.Src.ID())).Add(1)
341-
schema.WriteBlockPart(conR.traceClient, msg.Height, msg.Round, e.Src.ID(), msg.Part.Index, schema.Download)
400+
schema.WriteBlockPart(conR.traceClient, msg.Height, msg.Round, msg.Part.Index, false, string(e.Src.ID()), schema.Download)
342401
conR.conS.peerMsgQueue <- msgInfo{msg, e.Src.ID()}
343402
default:
344403
conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg)))
@@ -357,7 +416,7 @@ func (conR *Reactor) ReceiveEnvelope(e p2p.Envelope) {
357416
cs.Validators.Size(), cs.LastCommit.Size()
358417
cs.mtx.RUnlock()
359418

360-
schema.WriteVote(conR.traceClient, height, round, msg.Vote, e.Src.ID(), schema.Download)
419+
schema.WriteVote(conR.traceClient, height, round, msg.Vote, string(e.Src.ID()), schema.Download)
361420

362421
ps.EnsureVoteBitArrays(height, valSize)
363422
ps.EnsureVoteBitArrays(height-1, lastCommitSize)
@@ -477,6 +536,15 @@ func (conR *Reactor) broadcastNewRoundStepMessage(rs *cstypes.RoundState) {
477536
ChannelID: StateChannel,
478537
Message: nrsMsg,
479538
})
539+
schema.WriteConsensusState(
540+
conR.traceClient,
541+
nrsMsg.Height,
542+
nrsMsg.Round,
543+
schema.Broadcast,
544+
schema.ConsensusNewRoundStep,
545+
schema.Upload,
546+
fmt.Sprintf("%d", nrsMsg.Step),
547+
)
480548
}
481549

482550
func (conR *Reactor) broadcastNewValidBlockMessage(rs *cstypes.RoundState) {
@@ -492,6 +560,14 @@ func (conR *Reactor) broadcastNewValidBlockMessage(rs *cstypes.RoundState) {
492560
ChannelID: StateChannel,
493561
Message: csMsg,
494562
})
563+
schema.WriteConsensusState(
564+
conR.traceClient,
565+
rs.Height,
566+
rs.Round,
567+
schema.Broadcast,
568+
schema.ConsensusNewValidBlock,
569+
schema.Upload,
570+
)
495571
}
496572

497573
// Broadcasts HasVoteMessage to peers that care.
@@ -506,6 +582,15 @@ func (conR *Reactor) broadcastHasVoteMessage(vote *types.Vote) {
506582
ChannelID: StateChannel,
507583
Message: msg,
508584
})
585+
schema.WriteConsensusState(
586+
conR.traceClient,
587+
vote.Height,
588+
vote.Round,
589+
schema.Broadcast,
590+
schema.ConsensusHasVote,
591+
schema.Upload,
592+
vote.Type.String(),
593+
)
509594
/*
510595
// TODO: Make this broadcast more selective.
511596
for _, peer := range conR.Switch.Peers().List() {
@@ -544,10 +629,20 @@ func makeRoundStepMessage(rs *cstypes.RoundState) (nrsMsg *cmtcons.NewRoundStep)
544629
func (conR *Reactor) sendNewRoundStepMessage(peer p2p.Peer) {
545630
rs := conR.getRoundState()
546631
nrsMsg := makeRoundStepMessage(rs)
547-
p2p.SendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck
632+
if p2p.SendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck
548633
ChannelID: StateChannel,
549634
Message: nrsMsg,
550-
}, conR.Logger)
635+
}, conR.Logger) {
636+
schema.WriteConsensusState(
637+
conR.traceClient,
638+
nrsMsg.Height,
639+
nrsMsg.Round,
640+
string(peer.ID()),
641+
schema.ConsensusNewRoundStep,
642+
schema.Upload,
643+
fmt.Sprintf("%d", nrsMsg.Step),
644+
)
645+
}
551646
}
552647

553648
func (conR *Reactor) updateRoundStateRoutine() {
@@ -599,7 +694,7 @@ OUTER_LOOP:
599694
Part: *parts,
600695
},
601696
}, logger) {
602-
schema.WriteBlockPart(conR.traceClient, rs.Height, rs.Round, peer.ID(), part.Index, schema.Upload)
697+
schema.WriteBlockPart(conR.traceClient, rs.Height, rs.Round, part.Index, false, string(peer.ID()), schema.Upload)
603698
ps.SetHasProposalBlockPart(prs.Height, prs.Round, index)
604699
}
605700
continue OUTER_LOOP
@@ -653,6 +748,13 @@ OUTER_LOOP:
653748
// NOTE[ZM]: A peer might have received different proposal msg so this Proposal msg will be rejected!
654749
// TODO But yet we send block parts of this proposal to the peer (in the first if statement of the current function) while the proposal is rejected. This part of the protocol could be improved by sending the proposal block parts only if the proposal is accepted.
655750
ps.SetHasProposal(rs.Proposal)
751+
schema.WriteProposal(
752+
conR.traceClient,
753+
rs.Height,
754+
rs.Round,
755+
string(peer.ID()),
756+
schema.Upload,
757+
)
656758
}
657759
}
658760
// ProposalPOL: lets peer know which POL votes we have so far.
@@ -661,14 +763,23 @@ OUTER_LOOP:
661763
// so we definitely have rs.Votes.Prevotes(rs.Proposal.POLRound).
662764
if 0 <= rs.Proposal.POLRound {
663765
logger.Debug("Sending POL", "height", prs.Height, "round", prs.Round)
664-
p2p.SendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck
766+
if p2p.SendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck
665767
ChannelID: DataChannel,
666768
Message: &cmtcons.ProposalPOL{
667769
Height: rs.Height,
668770
ProposalPolRound: rs.Proposal.POLRound,
669771
ProposalPol: *rs.Votes.Prevotes(rs.Proposal.POLRound).BitArray().ToProto(),
670772
},
671-
}, logger)
773+
}, logger) {
774+
schema.WriteConsensusState(
775+
conR.traceClient,
776+
rs.Height,
777+
rs.Round,
778+
string(peer.ID()),
779+
schema.ConsensusPOL,
780+
schema.Upload,
781+
)
782+
}
672783
}
673784
continue OUTER_LOOP
674785
}
@@ -720,6 +831,15 @@ func (conR *Reactor) gossipDataForCatchup(logger log.Logger, rs *cstypes.RoundSt
720831
},
721832
}, logger) {
722833
ps.SetHasProposalBlockPart(prs.Height, prs.Round, index)
834+
schema.WriteBlockPart(
835+
conR.traceClient,
836+
prs.Height,
837+
prs.Round,
838+
uint32(index),
839+
true,
840+
string(peer.ID()),
841+
schema.Upload,
842+
)
723843
} else {
724844
logger.Debug("Sending block part for catchup failed")
725845
// sleep to avoid retrying too fast
@@ -784,7 +904,7 @@ OUTER_LOOP:
784904
if vote != nil {
785905
logger.Debug("Picked Catchup commit to send", "height", prs.Height)
786906
schema.WriteVote(conR.traceClient, rs.Height, rs.Round, vote,
787-
ps.peer.ID(), schema.Upload)
907+
string(ps.peer.ID()), schema.Upload)
788908
continue OUTER_LOOP
789909
}
790910
}
@@ -813,7 +933,7 @@ func (conR *Reactor) pickSendVoteAndTrace(votes types.VoteSetReader, rs *cstypes
813933
vote := ps.PickSendVote(votes)
814934
if vote != nil { // if a vote is sent, trace it
815935
schema.WriteVote(conR.traceClient, rs.Height, rs.Round, vote,
816-
ps.peer.ID(), schema.Upload)
936+
string(ps.peer.ID()), schema.Upload)
817937
return true
818938
}
819939
return false
@@ -895,15 +1015,24 @@ OUTER_LOOP:
8951015
if rs.Height == prs.Height {
8961016
if maj23, ok := rs.Votes.Prevotes(prs.Round).TwoThirdsMajority(); ok {
8971017

898-
p2p.TrySendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck
1018+
if p2p.TrySendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck
8991019
ChannelID: StateChannel,
9001020
Message: &cmtcons.VoteSetMaj23{
9011021
Height: prs.Height,
9021022
Round: prs.Round,
9031023
Type: cmtproto.PrevoteType,
9041024
BlockID: maj23.ToProto(),
9051025
},
906-
}, ps.logger)
1026+
}, ps.logger) {
1027+
schema.WriteConsensusState(
1028+
conR.traceClient,
1029+
rs.Height,
1030+
rs.Round,
1031+
string(peer.ID()),
1032+
schema.ConsensusVoteSet23Prevote,
1033+
schema.Upload,
1034+
)
1035+
}
9071036
time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration)
9081037
}
9091038
}
@@ -915,15 +1044,24 @@ OUTER_LOOP:
9151044
prs := ps.GetRoundState()
9161045
if rs.Height == prs.Height {
9171046
if maj23, ok := rs.Votes.Precommits(prs.Round).TwoThirdsMajority(); ok {
918-
p2p.TrySendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck
1047+
if p2p.TrySendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck
9191048
ChannelID: StateChannel,
9201049
Message: &cmtcons.VoteSetMaj23{
9211050
Height: prs.Height,
9221051
Round: prs.Round,
9231052
Type: cmtproto.PrecommitType,
9241053
BlockID: maj23.ToProto(),
9251054
},
926-
}, ps.logger)
1055+
}, ps.logger) {
1056+
schema.WriteConsensusState(
1057+
conR.traceClient,
1058+
rs.Height,
1059+
rs.Round,
1060+
string(peer.ID()),
1061+
schema.ConsensusVoteSet23Precommit,
1062+
schema.Upload,
1063+
)
1064+
}
9271065
time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration)
9281066
}
9291067
}
@@ -936,15 +1074,24 @@ OUTER_LOOP:
9361074
if rs.Height == prs.Height && prs.ProposalPOLRound >= 0 {
9371075
if maj23, ok := rs.Votes.Prevotes(prs.ProposalPOLRound).TwoThirdsMajority(); ok {
9381076

939-
p2p.TrySendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck
1077+
if p2p.TrySendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck
9401078
ChannelID: StateChannel,
9411079
Message: &cmtcons.VoteSetMaj23{
9421080
Height: prs.Height,
9431081
Round: prs.ProposalPOLRound,
9441082
Type: cmtproto.PrevoteType,
9451083
BlockID: maj23.ToProto(),
9461084
},
947-
}, ps.logger)
1085+
}, ps.logger) {
1086+
schema.WriteConsensusState(
1087+
conR.traceClient,
1088+
rs.Height,
1089+
rs.Round,
1090+
string(peer.ID()),
1091+
schema.ConsensusPOL,
1092+
schema.Upload,
1093+
)
1094+
}
9481095
time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration)
9491096
}
9501097
}
@@ -959,15 +1106,24 @@ OUTER_LOOP:
9591106
if prs.CatchupCommitRound != -1 && prs.Height > 0 && prs.Height <= conR.conS.blockStore.Height() &&
9601107
prs.Height >= conR.conS.blockStore.Base() {
9611108
if commit := conR.conS.LoadCommit(prs.Height); commit != nil {
962-
p2p.TrySendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck
1109+
if p2p.TrySendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck
9631110
ChannelID: StateChannel,
9641111
Message: &cmtcons.VoteSetMaj23{
9651112
Height: prs.Height,
9661113
Round: commit.Round,
9671114
Type: cmtproto.PrecommitType,
9681115
BlockID: commit.BlockID.ToProto(),
9691116
},
970-
}, ps.logger)
1117+
}, ps.logger) {
1118+
schema.WriteConsensusState(
1119+
conR.traceClient,
1120+
prs.Height,
1121+
prs.Round,
1122+
string(peer.ID()),
1123+
schema.ConsensusVoteSet23Precommit,
1124+
schema.Upload,
1125+
)
1126+
}
9711127
time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration)
9721128
}
9731129
}

0 commit comments

Comments
 (0)
Please sign in to comment.