From 58f23ccbb872c96f508a6369cbb52ada68a92324 Mon Sep 17 00:00:00 2001 From: awskii Date: Thu, 27 Feb 2025 10:20:59 +0000 Subject: [PATCH] save --- cmd/commitment-prefix/main.go | 10 ++- erigon-lib/commitment/commitment.go | 45 +++++++++-- erigon-lib/state/aggregator.go | 1 + eth/integrity/commitment.go | 105 +++++++++++++++++++++++++ eth/integrity/integrity_action_type.go | 3 +- turbo/app/snapshots_cmd.go | 4 + 6 files changed, 156 insertions(+), 12 deletions(-) create mode 100644 eth/integrity/commitment.go diff --git a/cmd/commitment-prefix/main.go b/cmd/commitment-prefix/main.go index 464b78e8a26..15255d47cd3 100644 --- a/cmd/commitment-prefix/main.go +++ b/cmd/commitment-prefix/main.go @@ -220,11 +220,13 @@ func extractKVPairFromCompressed(filename string, keysSink chan commitment.Branc if depth > len(key) { continue } - stat := commitment.DecodeBranchAndCollectStat(key, val, tv) - if stat == nil { - fmt.Printf("failed to decode branch: %x %x\n", key, val) + stat, err := commitment.DecodeBranchAndCollectStat(key, val, tv) + if err != nil { + // show and continue + fmt.Printf("\n\n%w: failed to decode branch: %x %x\n", err, key, val) + } else { + keysSink <- *stat } - keysSink <- *stat } return nil } diff --git a/erigon-lib/commitment/commitment.go b/erigon-lib/commitment/commitment.go index a6a6d61d2af..ca940f7704e 100644 --- a/erigon-lib/commitment/commitment.go +++ b/erigon-lib/commitment/commitment.go @@ -559,6 +559,25 @@ func (branchData BranchData) IsComplete() bool { return ^touchMap&afterMap == 0 } +func (branchData BranchData) Verify(itsPrefix []byte) error { + if bytes.Equal(itsPrefix, []byte("state")) { + return nil + } + + // most checks happen during decoding + _, am, cells, err := branchData.decodeCells() + if err != nil { + return err + } + if am == 0 && len(branchData) > 4 { + fmt.Printf("tombstone %x contains %d trailing bytes\n", itsPrefix, len(branchData)-4) + return fmt.Errorf("tombstone %x contains %d trailing bytes\n", itsPrefix, len(branchData)-4) + } + _ = cells + + return nil +} + // MergeHexBranches combines two branchData, number 2 coming after (and potentially shadowing) number 1 func (branchData BranchData) MergeHexBranches(branchData2 BranchData, newData []byte) (BranchData, error) { if branchData2 == nil { @@ -640,23 +659,35 @@ func (branchData BranchData) MergeHexBranches(branchData2 BranchData, newData [] } func (branchData BranchData) decodeCells() (touchMap, afterMap uint16, row [16]*cell, err error) { + pos, decCount := 4, 0 + if len(branchData) < pos { + return 0, 0, row, fmt.Errorf("branch is shorter than 4 bytes") + } touchMap = binary.BigEndian.Uint16(branchData[0:]) afterMap = binary.BigEndian.Uint16(branchData[2:]) - pos := 4 for bitset, j := touchMap, 0; bitset != 0; j++ { bit := bitset & -bitset nibble := bits.TrailingZeros16(bit) if afterMap&bit != 0 { + if pos >= len(branchData) { + return 0, 0, row, fmt.Errorf("branch ended at %d without nibble %x", nibble) + } fields := cellFields(branchData[pos]) pos++ row[nibble] = new(cell) if pos, err = row[nibble].fillFromFields(branchData, pos, fields); err != nil { - err = fmt.Errorf("failed to fill cell at nibble %x: %w", nibble, err) - return + return 0, 0, row, fmt.Errorf("failed to read cell at nibble %x: %w", nibble, err) } + decCount++ } bitset ^= bit } + if exp := bits.OnesCount16(afterMap); exp != decCount { + if afterMap != touchMap && decCount == bits.OnesCount16(touchMap) { + return 0, 0, row, fmt.Errorf("decoded partial update for %d cells, expected %d", decCount, exp) + } + return 0, 0, row, fmt.Errorf("decoded %d cells, expected %d", decCount, exp) + } return } @@ -839,10 +870,10 @@ func (bs *BranchStat) Collect(other *BranchStat) { bs.LeafHashCount += other.LeafHashCount } -func DecodeBranchAndCollectStat(key, branch []byte, tv TrieVariant) *BranchStat { +func DecodeBranchAndCollectStat(key, branch []byte, tv TrieVariant) (*BranchStat, error) { stat := &BranchStat{} if len(key) == 0 { - return nil + return nil, nil } stat.KeySize = uint64(len(key)) @@ -855,7 +886,7 @@ func DecodeBranchAndCollectStat(key, branch []byte, tv TrieVariant) *BranchStat tm, am, cells, err := BranchData(branch).decodeCells() if err != nil { - return nil + return nil, err } stat.TAMapsSize = uint64(2 + 2) // touchMap + afterMap stat.CellCount = uint64(bits.OnesCount16(tm & am)) @@ -919,7 +950,7 @@ func DecodeBranchAndCollectStat(key, branch []byte, tv TrieVariant) *BranchStat } } } - return stat + return stat, nil } // Defines how to evaluate commitments diff --git a/erigon-lib/state/aggregator.go b/erigon-lib/state/aggregator.go index 1b9cfeeccc3..c0db22b79c6 100644 --- a/erigon-lib/state/aggregator.go +++ b/erigon-lib/state/aggregator.go @@ -790,6 +790,7 @@ func (ac *AggregatorRoTx) StepsInFiles(entitySet ...kv.Domain) uint64 { return txNumInFiles / ac.a.StepSize() } +// TxNumsInFiles - returns minimal TxNum in files across domains in entitySet func (ac *AggregatorRoTx) TxNumsInFiles(entitySet ...kv.Domain) (minTxNum uint64) { if len(entitySet) == 0 { panic("assert: missed arguments") diff --git a/eth/integrity/commitment.go b/eth/integrity/commitment.go new file mode 100644 index 00000000000..73723989a9e --- /dev/null +++ b/eth/integrity/commitment.go @@ -0,0 +1,105 @@ +// Copyright 2025 The Erigon Authors +// This file is part of Erigon. +// +// Erigon is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Erigon is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with Erigon. If not, see . + +package integrity + +import ( + "context" + "fmt" + "sync" + "sync/atomic" + "time" + + "golang.org/x/sync/errgroup" + + "github.com/erigontech/erigon-lib/commitment" + "github.com/erigontech/erigon-lib/common" + "github.com/erigontech/erigon-lib/kv" + "github.com/erigontech/erigon-lib/log/v3" + "github.com/erigontech/erigon-lib/state" + "github.com/erigontech/erigon/turbo/services" +) + +// E3 History - usually don't have anything attributed to 1-st system txs (except genesis) +func CommitmentFilesSanity(ctx context.Context, db kv.TemporalRwDB, blockReader services.FullBlockReader, agg *state.Aggregator) error { + logEvery := time.NewTicker(20 * time.Second) + defer logEvery.Stop() + start := time.Now() + var count atomic.Uint64 + g := new(errgroup.Group) + info := sync.Once{} + + for j := 0; j < 256; j++ { + j := j + for jj := 0; jj < 255; jj++ { + jj := jj + g.Go(func() error { + tx, err := db.BeginTemporalRo(ctx) + if err != nil { + return err + } + defer tx.Rollback() + + aggTx := tx.(state.HasAggTx).AggTx().(*state.AggregatorRoTx) + info.Do(func() { + log.Info("Checking commitment files", "domain", kv.CommitmentDomain, "txn", aggTx.TxNumsInFiles(kv.CommitmentDomain)) + }) + keys, err := aggTx.RangeLatest(tx, kv.CommitmentDomain, []byte{byte(j), byte(jj)}, []byte{byte(j), byte(jj + 1)}, -1) + if err != nil { + return err + } + defer keys.Close() + + for keys.HasNext() { + prefix, branchData, err := keys.Next() + if err != nil { + return err + } + + err = commitment.BranchData(branchData).Verify(prefix) + if err != nil { + return err + } + // if aggTx.DbgDomain(kv.CommitmentDomain).() + // during fetching latest all branches are dereferenced so plain keys for nodes are available + bdDereferenced, _, _, err := aggTx.GetLatest(kv.CommitmentDomain, prefix, tx) + if err != nil { + return err + } + if len(branchData) > len(bdDereferenced) { + return fmt.Errorf("defererenced branch %x is shorter than referenced", prefix) + } + if err = commitment.BranchData(bdDereferenced).Verify(prefix); err != nil { + return err + } + + select { + case <-logEvery.C: + log.Warn(fmt.Sprintf("[dbg] checked=%s prefixes", common.PrettyCounter(count.Load()))) + default: + } + count.Add(1) + } + return nil + }) + } + } + if err := g.Wait(); err != nil { + return err + } + log.Info("finished checking commitment sanity", "prefixes", common.PrettyCounter(count.Load()), "time spent", time.Since(start)) + return nil +} diff --git a/eth/integrity/integrity_action_type.go b/eth/integrity/integrity_action_type.go index 51134c1e600..06424e21136 100644 --- a/eth/integrity/integrity_action_type.go +++ b/eth/integrity/integrity_action_type.go @@ -28,10 +28,11 @@ const ( BorSpans Check = "BorSpans" BorCheckpoints Check = "BorCheckpoints" BorMilestones Check = "BorMilestones" // this check is informational, and we don't run it by default (e.g. gaps may exist but that is ok) + Commitment Check = "Commitment" ) var AllChecks = []Check{ - Blocks, BlocksTxnID, InvertedIndex, HistoryNoSystemTxs, ReceiptsNoDups, BorEvents, BorSpans, BorCheckpoints, + Blocks, BlocksTxnID, InvertedIndex, HistoryNoSystemTxs, ReceiptsNoDups, BorEvents, BorSpans, BorCheckpoints, Commitment, } var NonDefaultChecks = []Check{ diff --git a/turbo/app/snapshots_cmd.go b/turbo/app/snapshots_cmd.go index 243ea2481b6..58d8b028717 100644 --- a/turbo/app/snapshots_cmd.go +++ b/turbo/app/snapshots_cmd.go @@ -599,6 +599,10 @@ func doIntegrity(cliCtx *cli.Context) error { if err := integrity.ReceiptsNoDuplicates(ctx, db, blockReader, failFast); err != nil { return err } + case integrity.Commitment: + if err := integrity.CommitmentFilesSanity(ctx, db, blockReader, agg); err != nil { + return err + } default: return fmt.Errorf("unknown check: %s", chk) }