Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spec compliant merge shares #261

Merged
merged 35 commits into from
Apr 6, 2021
Merged
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
494db15
start spec compliant share merging
evan-forbes Mar 31, 2021
0d15df4
refactor and finish unit testing
evan-forbes Apr 1, 2021
3f0d808
whoops
evan-forbes Apr 1, 2021
8535122
linter gods
evan-forbes Apr 1, 2021
8bae88b
fix initial changes and use constants
evan-forbes Apr 1, 2021
cbb5d76
use constant
evan-forbes Apr 1, 2021
b523831
more polish
evan-forbes Apr 1, 2021
f0f38a6
docs fix
evan-forbes Apr 1, 2021
014a00e
review feedback: docs and out of range panic protection
evan-forbes Apr 1, 2021
0896a3b
review feedback: add panic protection from empty input
evan-forbes Apr 1, 2021
96eafc7
use constant instead of recalculating `ShareSize`
evan-forbes Apr 1, 2021
c3d897a
don't redeclare existing var
evan-forbes Apr 1, 2021
28f9769
be more explicit with returned nil
evan-forbes Apr 1, 2021
9ff16f5
use constant instead of recalculating `ShareSize`
evan-forbes Apr 1, 2021
39ce26a
review feedback: use consistent capitalization
evan-forbes Apr 1, 2021
8794411
stop accepting reserved namespaces as normal messages
evan-forbes Apr 1, 2021
2b28c59
use a descriptive var name for message length
evan-forbes Apr 1, 2021
d118716
linter and comparison fix
evan-forbes Apr 1, 2021
3ae1f79
reorg tests, add test for parse delimiter, DataFromBlock and fix evid…
evan-forbes Apr 2, 2021
dfe2a07
catch error for linter
evan-forbes Apr 3, 2021
e9f3a2e
update test MakeShares to include length delimiters for the SHARE_RES…
evan-forbes Apr 3, 2021
221b0fb
minor iteration change
evan-forbes Apr 3, 2021
37316e8
refactor share splitting to fix bug
evan-forbes Apr 3, 2021
e3736d6
fix all bugs with third and final refactor
evan-forbes Apr 4, 2021
632bb37
Merge branch 'master' into evan/merge-shares
evan-forbes Apr 4, 2021
3ca2afa
fix conflict
evan-forbes Apr 4, 2021
0f930fd
revert unnecessary changes
evan-forbes Apr 4, 2021
91c3989
review feedback: better docs
evan-forbes Apr 5, 2021
043812d
reivew feedback: add comment for safeLen
evan-forbes Apr 5, 2021
76d7a4b
review feedback: remove unnecessay comments
evan-forbes Apr 5, 2021
1cb4030
review feedback: split up share merging and splitting into their own …
evan-forbes Apr 5, 2021
a88db3b
review feedback: more descriptive var names
evan-forbes Apr 5, 2021
2aad8fd
fix accidental change
evan-forbes Apr 5, 2021
887aa08
add some constant docs
evan-forbes Apr 5, 2021
40a57c1
spelling error
evan-forbes Apr 5, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
refactor and finish unit testing
evan-forbes committed Apr 1, 2021

Verified

This commit was signed with the committer’s verified signature.
evan-forbes Evan Forbes
commit 0d15df4ec80d7decbe17193426004558c45c4332
247 changes: 106 additions & 141 deletions types/shares.go
Original file line number Diff line number Diff line change
@@ -46,9 +46,7 @@ func (tx Tx) MarshalDelimited() ([]byte, error) {
lenBuf := make([]byte, binary.MaxVarintLen64)
length := uint64(len(tx))
n := binary.PutUvarint(lenBuf, length)
out := append(lenBuf[:n], tx...)

return out, nil
return append(lenBuf[:n], tx...), nil
}

// MarshalDelimited marshals the raw data (excluding the namespace) of this
@@ -88,10 +86,12 @@ func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare {
innerIndex := 0
for outerIndex < len(rawDatas) {
var rawData []byte
rawData, outerIndex, innerIndex, _ = getNextChunk(rawDatas, outerIndex, innerIndex, MsgShareSize)
rawShare := append(append(
make([]byte, 0, MsgShareSize),
startIndex := 0
rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, MsgShareSize)
rawShare := append(append(append(
make([]byte, 0, len(nid)+1+len(rawData)),
nid...),
byte(startIndex)),
rawData...)
paddedShare := zeroPadIfNecessary(rawShare, ShareSize)
share := NamespacedShare{paddedShare, nid}
@@ -211,16 +211,16 @@ func DataFromSquare(eds *rsmt2d.ExtendedDataSquare) (Data, error) {
share := eds.Cell(x, y)
nid := share[:NamespaceSize]
switch {
case bytes.Compare(TxNamespaceID, nid) == 0:
case bytes.Equal(TxNamespaceID, nid):
txsShares = append(txsShares, share)

case bytes.Compare(IntermediateStateRootsNamespaceID, nid) == 0:
case bytes.Equal(IntermediateStateRootsNamespaceID, nid):
isrShares = append(isrShares, share)

case bytes.Compare(EvidenceNamespaceID, nid) == 0:
case bytes.Equal(EvidenceNamespaceID, nid):
evdShares = append(evdShares, share)

case bytes.Compare(TailPaddingNamespaceID, nid) == 0:
case bytes.Equal(TailPaddingNamespaceID, nid):
continue

// every other namespaceID should be a message
@@ -231,9 +231,15 @@ func DataFromSquare(eds *rsmt2d.ExtendedDataSquare) (Data, error) {
}

// pass the raw share data to their respective parsers
txs := parseTxs(txsShares)
txs, err := parseTxs(txsShares)
if err != nil {
return Data{}, err
}

isrs := parseIsrs(isrShares)
isrs, err := parseIsrs(isrShares)
if err != nil {
return Data{}, err
}

evd, err := parseEvd(evdShares)
if err != nil {
@@ -253,33 +259,44 @@ func DataFromSquare(eds *rsmt2d.ExtendedDataSquare) (Data, error) {
}, nil
}

func parseTxs(shares [][]byte) Txs {
// parseTxs collects all of the transactions from the shares provided
func parseTxs(shares [][]byte) (Txs, error) {
// parse the sharse
rawTxs := parseShares(shares)
rawTxs, err := processContiguousShares(shares)
if err != nil {
return nil, err
}

// convert to the Tx type
txs := make(Txs, len(rawTxs))
for i := 0; i < len(txs); i++ {
txs[i] = Tx(rawTxs[i])
}

return txs
return txs, nil
}

func parseIsrs(shares [][]byte) IntermediateStateRoots {
rawISRs := parseShares(shares)
// parseIsrs collects all the intermediate state roots from the shares provided
func parseIsrs(shares [][]byte) (IntermediateStateRoots, error) {
rawISRs, err := processContiguousShares(shares)
if err != nil {
return IntermediateStateRoots{}, err
}

ISRs := make([]tmbytes.HexBytes, len(rawISRs))
for i := 0; i < len(ISRs); i++ {
ISRs[i] = rawISRs[i]
}

return IntermediateStateRoots{RawRootsList: ISRs}
return IntermediateStateRoots{RawRootsList: ISRs}, nil
}

// parseMsgs collects all messages from the shares provided
// parseMsgs collects all evidence from the shares provided
func parseEvd(shares [][]byte) (EvidenceData, error) {
rawEvd := parseShares(shares)
rawEvd, err := processContiguousShares(shares)
if err != nil {
return EvidenceData{}, err
}

evdList := make(EvidenceList, len(rawEvd))

@@ -315,90 +332,59 @@ func parseMsgs(shares [][]byte) (Messages, error) {
}, nil
}

// parseShares iterates through raw shares and separates the contiguous chunks
// of data. we use this for transactions, evidence, and intermediate state roots
func parseShares(shares [][]byte) [][]byte {
currentShare := shares[0][NamespaceSize+ShareReservedBytes:]
txLen := uint8(shares[0][NamespaceSize])
var parsed [][]byte
for cursor := 0; cursor < len(shares); {
var p []byte

currentShare, cursor, txLen, p = next(shares, currentShare, cursor, txLen)
if p != nil {
parsed = append(parsed, p)
}
func processContiguousShares(shares [][]byte) (txs [][]byte, err error) {
share := shares[0][NamespaceSize+ShareReservedBytes:]
share, txLen, err := parseDelimiter(share)
if err != nil {
return nil, err
}

return parsed
}

// next returns the next chunk of a contiguous share. Used for parsing
// transaction, evidence, and intermediate state root block data.
func next(shares [][]byte, current []byte, cursor int, l uint8) ([]byte, int, uint8, []byte) {
switch {
// the rest of the shares should be tail padding
case l == 0:

cursor++
if len(shares) != cursor {
panic("contiguous share of length zero")
for i := 0; i < len(shares); i++ {
var newTxs [][]byte
newTxs, share, txLen, err = collectTxFromShare(share, txLen)
if err != nil {
return nil, err
}
return nil, cursor, 0, nil

// the tx is contained in the current share
case int(l) < len(current):

tx := append(make([]byte, 0, l), current[:l]...)
txs = append(txs, newTxs...)

// set the next txLen and update the next share
txLen := current[l]

// make sure that nothing panics if txLen is at the end of the share
if len(current) < int(l)+ShareReservedBytes+1 {
cursor++
return shares[cursor][NamespaceSize:], cursor, txLen, tx
// if there is no next share
if len(shares) <= i+1 {
break
}

current := current[l+ShareReservedBytes:]
// try printing current everytime instead

return current, cursor, txLen, tx
nextShare := shares[i+1][NamespaceSize+ShareReservedBytes:]

// the tx requires some portion of the following share
case int(l) > len(current):

cursor++

// merge the current and the next share

current := append(current, shares[cursor][NamespaceSize:]...)

// try again using the next share
return next(shares, current, cursor, l)

// the tx is exactly the same size of the current share
case int(l) == len(current):
// if there is no current share, process the next share
if len(share) == 0 {
share, txLen, err = parseDelimiter(nextShare)
continue
}

tx := make([]byte, l)
copy(tx, current)
// create the next share by merging the next share with the extra
share = append(share, shares[i+1][NamespaceSize+ShareReservedBytes:]...)
}

cursor++
return txs, nil
}

// if this is the end of shares only return the tx
if cursor == len(shares) {
return []byte{}, cursor, 0, tx
func collectTxFromShare(share []byte, txLen uint64) (txs [][]byte, extra []byte, l uint64, err error) {
for uint64(len(share)) >= txLen {
tx := share[:txLen]
if len(tx) == 0 {
share = nil
break
}

// set the next txLen and next share
next := shares[cursor][NamespaceSize+ShareReservedBytes:]
nextTxLen := shares[cursor][NamespaceSize]
txs = append(txs, tx)
share = share[txLen:]

return next, cursor, nextTxLen, tx
share, txLen, err = parseDelimiter(share)
if txLen == 0 {
break
}
}

// this code is unreachable but the compiler doesn't know that
return nil, 0, 0, nil
return txs, share, txLen, nil
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, this can serve as a kind of blueprint for the pseudo-code that could end up in the spec.


// parseMessages iterates through raw shares and separates the contiguous chunks
@@ -409,15 +395,15 @@ func parseMsgShares(shares [][]byte) ([]Message, error) {
currentShare := shares[0][NamespaceSize:]

// find and remove the msg len delimiter
currentShare, msgLen, err := tailorMsg(currentShare)
currentShare, msgLen, err := parseDelimiter(currentShare)
if err != nil {
return nil, err
}

var msgs []Message
for cursor := 0; cursor < len(shares); {
for cursor := uint64(0); cursor < uint64(len(shares)); {
var msg Message
currentShare, cursor, msgLen, msg, err = nextMsg(
currentShare, nid, cursor, msgLen, msg, err = nextMsg(
shares,
currentShare,
nid,
@@ -435,69 +421,48 @@ func parseMsgShares(shares [][]byte) ([]Message, error) {
return msgs, nil
}

// next returns the next chunk of a contiguous share. Used for parsing
// transaction, evidence, and intermediate state root block data.
func nextMsg(shares [][]byte, current, nid []byte, cursor int, l uint64) ([]byte, int, uint64, Message, error) {
func nextMsg(shares [][]byte, current, nid []byte, cursor, l uint64) ([]byte, []byte, uint64, uint64, Message, error) {
switch {
// the rest of the share should be tail padding
case l == 0:
// the message uses all of the current share data and at least some of the
// next share
case l > uint64(len(current)):
// add the next share to the current one and try again
cursor++
if len(shares) != cursor {
panic("message of length zero")
}
return nil, cursor, 0, MessageEmpty, nil

// the msg is contained in the current share
case int(l) < len(current):
msg := Message{NamespaceID: nid, Data: current[:l]}

// set the next msgLen and update the next share
next, msgLen, err := tailorMsg(current[l:])
if err != nil {
return nil, 0, 0, MessageEmpty, err
}

return next, cursor, msgLen, msg, nil

// the msg requires some portion of the following share
case int(l) > len(current):
cursor++

// merge the current and the next share
current := append(current, shares[cursor][NamespaceSize:]...)

// try again using the next share
return nextMsg(shares, current, nid, cursor, l)

// the msg is exactly the same size of the current share
case l == uint64(len(current)):
msg := Message{NamespaceID: nid, Data: current}

// the msg we're looking for is contained in the current share
case l <= uint64(len(current)):
msg := Message{nid, current[:l]}
cursor++

// if this is the end of shares only return the msg
if cursor == len(shares) {
return []byte{}, cursor, 0, msg, nil
// call it a day if the work is done
if cursor >= uint64(len(shares)) {
return nil, nil, cursor, 0, msg, nil
}

// set the next msgLen and next share
next, nextMsgLen, err := tailorMsg(shares[cursor][NamespaceSize:])
if err != nil {
return nil, 0, 0, MessageEmpty, err
}

return next, cursor, nextMsgLen, msg, nil
nextNid := shares[cursor][:NamespaceSize]
next, msgLen, err := parseDelimiter(shares[cursor][NamespaceSize:])
return next, nextNid, cursor, msgLen, msg, err
}

// this code is unreachable but the compiler doesn't know that
return nil, 0, 0, MessageEmpty, nil
return nil, nil, 0, 0, MessageEmpty, nil
}

// tailorMsg finds and returns the length delimiter of the message provided
// parseDelimiter finds and returns the length delimiter of the message provided
// while also removing the delimiter bytes from the input
func tailorMsg(input []byte) ([]byte, uint64, error) {
func parseDelimiter(input []byte) ([]byte, uint64, error) {
if len(input) == 0 {
return input, 0, nil
}

l := binary.MaxVarintLen64
if len(input) < binary.MaxVarintLen64 {
l = len(input)
}

// read the length of the message
r := bytes.NewBuffer(input[:binary.MaxVarintLen64])
r := bytes.NewBuffer(input[:l])
msgLen, err := binary.ReadUvarint(r)
if err != nil {
return nil, 0, err
@@ -508,5 +473,5 @@ func tailorMsg(input []byte) ([]byte, uint64, error) {
n := binary.PutUvarint(lenBuf, msgLen)

// return the input without the length delimiter
return input[n+1:], msgLen, nil
return input[n:], msgLen, nil
}
190 changes: 183 additions & 7 deletions types/shares_test.go
Original file line number Diff line number Diff line change
@@ -2,6 +2,8 @@ package types

import (
"bytes"
"fmt"
"math/rand"
"reflect"
"testing"

@@ -194,14 +196,188 @@ func Test_appendToSharesOverwrite(t *testing.T) {
assert.Equal(t, extraCopy, []byte(newShare.Share[:MsgShareSize]))
}

func generateRandomNamespacedShares(count, leafSize int) []NamespacedShare {
shares := generateRandNamespacedRawData(count, NamespaceSize, leafSize)
nsShares := make(NamespacedShares, count)
func Test_processContiguousShares(t *testing.T) {
// exactTxShareSize is the length of tx that will fit exactly into a single
// share, accounting for namespace id and the length delimiter prepended to
// each tx
const exactTxShareSize = TxShareSize - 1

type test struct {
name string
txSize int
txCount int
}

// each test is ran twice, once using txSize as an exact size, and again
// using it as a cap for randomly sized txs
tests := []test{
{"single small tx", 10, 1},
{"many small txs", 80, 10},
{"single big tx", 1000, 1},
{"many big txs", 1000, 10},
{"single exact size tx", exactTxShareSize, 1},
{"many exact size txs", exactTxShareSize, 2},
}

for _, tc := range tests {

// run the tests with identically sized txs
t.Run(fmt.Sprintf("idendically sized %s", tc.name), func(t *testing.T) {
txs := generateRandomContiguousShares(tc.txCount, tc.txSize)

shares := txs.splitIntoShares()

parsedTxs, err := processContiguousShares(shares.RawShares())
if err != nil {
t.Error(err)
}

// check that the data parsed is identical
for i := 0; i < len(txs); i++ {
assert.Equal(t, []byte(txs[i]), parsedTxs[i])
}
})

// run the same tests using randomly sized txs with caps of tc.msgSize
tc := tc
t.Run(fmt.Sprintf("randomly sized %s", tc.name), func(t *testing.T) {
txs := generateRandomlySizedContiguousShares(tc.txCount, tc.txSize)

shares := txs.splitIntoShares()

parsedTxs, err := processContiguousShares(shares.RawShares())
if err != nil {
t.Error(err)
}

// check that the data parsed is identical
for i := 0; i < len(txs); i++ {
assert.Equal(t, []byte(txs[i]), parsedTxs[i])
}
})
}
}

func generateRandomlySizedContiguousShares(count, max int) Txs {
txs := make(Txs, count)
for i := 0; i < count; i++ {
size := rand.Intn(max)
// TODO: find out why
// txs smaller than two bytes that get mixed in with other randomly
// sized txs *sometime* cause processContiguousShares to end early
if size <= 2 {
size = max
}
txs[i] = generateRandomContiguousShares(1, size)[0]
}
return txs
}

func generateRandomContiguousShares(count, size int) Txs {
txs := make(Txs, count)
for i := 0; i < count; i++ {
tx := make([]byte, size)
_, err := rand.Read(tx)
if err != nil {
panic(err)
}
txs[i] = Tx(tx)
}
return txs
}

func Test_parseMsgShares(t *testing.T) {
// exactMsgShareSize is the length of message that will fit exactly into a single
// share, accounting for namespace id and the length delimiter prepended to
// each message
const exactMsgShareSize = MsgShareSize - 2

type test struct {
name string
msgSize int
msgCount int
}

// each test is ran twice, once using msgSize as an exact size, and again
// using it as a cap for randomly sized leaves
tests := []test{
{"single small msg", 1, 1},
{"many small msgs", 4, 10},
{"single big msg", 1000, 1},
{"many big msgs", 1000, 10},
{"single exact size msg", exactMsgShareSize, 1},
{"many exact size msgs", exactMsgShareSize, 10},
}

for _, tc := range tests {

// run the tests with identically sized messagses
t.Run(fmt.Sprintf("idendically sized %s", tc.name), func(t *testing.T) {
rawmsgs := make([]Message, tc.msgCount)
for i := 0; i < tc.msgCount; i++ {
rawmsgs[i] = generateRandomMessage(tc.msgSize)
}
msgs := Messages{MessagesList: rawmsgs}

shares := msgs.splitIntoShares()

parsedMsgs, err := parseMsgShares(shares.RawShares())
if err != nil {
t.Error(err)
}

// check that the namesapces and data are the same
for i := 0; i < len(msgs.MessagesList); i++ {
assert.Equal(t, msgs.MessagesList[i].NamespaceID, parsedMsgs[i].NamespaceID)
assert.Equal(t, msgs.MessagesList[i].Data, parsedMsgs[i].Data)
}
})

// run the same tests using randomly sized messages with caps of tc.msgSize
tc := tc
t.Run(fmt.Sprintf("randomly sized %s", tc.name), func(t *testing.T) {
msgs := generateRandomlySizedMessages(tc.msgCount, tc.msgSize)
shares := msgs.splitIntoShares()

parsedMsgs, err := parseMsgShares(shares.RawShares())
if err != nil {
t.Error(err)
}

// check that the namesapces and data are the same
for i := 0; i < len(msgs.MessagesList); i++ {
assert.Equal(t, msgs.MessagesList[i].NamespaceID, parsedMsgs[i].NamespaceID)
assert.Equal(t, msgs.MessagesList[i].Data, parsedMsgs[i].Data)
}
})
}
}

func generateRandomlySizedMessages(count, maxMsgSize int) Messages {
msgs := make([]Message, count)
for i := 0; i < count; i++ {
msgs[i] = generateRandomMessage(rand.Intn(maxMsgSize))
}
return Messages{MessagesList: msgs}
}

func generateRandomMessage(size int) Message {
share := generateRandomNamespacedShares(1, size)[0]
msg := Message{
NamespaceID: share.NamespaceID(),
Data: share.Data(),
}
return msg
}

func generateRandomNamespacedShares(count, msgSize int) NamespacedShares {
shares := generateRandNamespacedRawData(count, NamespaceSize, msgSize)
msgs := make([]Message, count)
for i, s := range shares {
nsShares[i] = NamespacedShare{
Share: s[NamespaceSize:],
ID: s[:NamespaceSize],
msgs[i] = Message{
Data: s[NamespaceSize:],
NamespaceID: s[:NamespaceSize],
}
}
return nsShares
return Messages{MessagesList: msgs}.splitIntoShares()
}