celestiaorg · Sep 22, 2021
diff --git a/‎pkg/consts/consts.go
+6 b/‎pkg/consts/consts.go
+6
diff --git a/‎pkg/da/data_availability_header.go
+25-32 b/‎pkg/da/data_availability_header.go
+25-32
diff --git a/‎pkg/da/data_availability_header_test.go
+31-11 b/‎pkg/da/data_availability_header_test.go
+31-11
diff --git a/‎pkg/wrapper/nmt_wrapper_test.go
+4-4 b/‎pkg/wrapper/nmt_wrapper_test.go
+4-4
diff --git a/‎types/block.go
+79-23 b/‎types/block.go
+79-23
diff --git a/‎types/share_merging.go
+333 b/‎types/share_merging.go
+333
diff --git a/‎types/share_splitting.go
+148 b/‎types/share_splitting.go
+148
diff --git a/‎types/shares.go
-62 b/‎types/shares.go
-62
diff --git a/‎types/shares_test.go
+458-32 b/‎types/shares_test.go
+458-32
diff --git a/‎types/tx.go
+5-4 b/‎types/tx.go
+5-4
@@ -4,6 +4,7 @@ import (
 	"crypto/sha256"
 
 	"github.com/celestiaorg/nmt/namespace"
+	"github.com/celestiaorg/rsmt2d"
 )
 
 // This contains all constants of:
@@ -61,4 +62,9 @@ var (
 
 	// NewBaseHashFunc change accordingly if another hash.Hash should be used as a base hasher in the NMT:
 	NewBaseHashFunc = sha256.New
+
+	// DefaultCodec is the default codec creator used for data erasure
+	// TODO(ismail): for better efficiency and a larger number shares
+	// we should switch to the rsmt2d.LeopardFF16 codec:
+	DefaultCodec = rsmt2d.NewRSGF8Codec
 )
@@ -14,8 +14,8 @@ import (
 )
 
 const (
-	maxDAHSize = consts.MaxSquareSize * 2
-	minDAHSize = consts.MinSquareSize * 2
+	maxExtendedSquareWidth = consts.MaxSquareSize * 2
+	minExtendedSquareWidth = consts.MinSquareSize * 2
 )
 
 // DataAvailabilityHeader (DAHeader) contains the row and column roots of the erasure
@@ -38,10 +38,23 @@ type DataAvailabilityHeader struct {
 }
 
 // NewDataAvailabilityHeader generates a DataAvailability header using the provided square size and shares
-func NewDataAvailabilityHeader(squareSize uint64, shares [][]byte) (DataAvailabilityHeader, error) {
+func NewDataAvailabilityHeader(eds *rsmt2d.ExtendedDataSquare) DataAvailabilityHeader {
+	// generate the row and col roots using the EDS
+	dah := DataAvailabilityHeader{
+		RowsRoots:   eds.RowRoots(),
+		ColumnRoots: eds.ColRoots(),
+	}
+
+	// generate the hash of the data using the new roots
+	dah.Hash()
+
+	return dah
+}
+
+func ExtendShares(squareSize uint64, shares [][]byte) (*rsmt2d.ExtendedDataSquare, error) {
 	// Check that square size is with range
 	if squareSize < consts.MinSquareSize || squareSize > consts.MaxSquareSize {
-		return DataAvailabilityHeader{}, fmt.Errorf(
+		return nil, fmt.Errorf(
 			"invalid square size: min %d max %d provided %d",
 			consts.MinSquareSize,
 			consts.MaxSquareSize,
@@ -50,32 +63,14 @@ func NewDataAvailabilityHeader(squareSize uint64, shares [][]byte) (DataAvailabi
 	}
 	// check that valid number of shares have been provided
 	if squareSize*squareSize != uint64(len(shares)) {
-		return DataAvailabilityHeader{}, fmt.Errorf(
+		return nil, fmt.Errorf(
 			"must provide valid number of shares for square size: got %d wanted %d",
 			len(shares),
 			squareSize*squareSize,
 		)
 	}
-
 	tree := wrapper.NewErasuredNamespacedMerkleTree(squareSize)
-
-	// TODO(ismail): for better efficiency and a larger number shares
-	// we should switch to the rsmt2d.LeopardFF16 codec:
-	extendedDataSquare, err := rsmt2d.ComputeExtendedDataSquare(shares, rsmt2d.NewRSGF8Codec(), tree.Constructor)
-	if err != nil {
-		return DataAvailabilityHeader{}, err
-	}
-
-	// generate the row and col roots using the EDS
-	dah := DataAvailabilityHeader{
-		RowsRoots:   extendedDataSquare.RowRoots(),
-		ColumnRoots: extendedDataSquare.ColRoots(),
-	}
-
-	// generate the hash of the data using the new roots
-	dah.Hash()
-
-	return dah, nil
+	return rsmt2d.ComputeExtendedDataSquare(shares, consts.DefaultCodec(), tree.Constructor)
 }
 
 // String returns hex representation of merkle hash of the DAHeader.
@@ -143,16 +138,16 @@ func (dah *DataAvailabilityHeader) ValidateBasic() error {
 	if dah == nil {
 		return errors.New("nil data availability header is not valid")
 	}
-	if len(dah.ColumnRoots) < minDAHSize || len(dah.RowsRoots) < minDAHSize {
+	if len(dah.ColumnRoots) < minExtendedSquareWidth || len(dah.RowsRoots) < minExtendedSquareWidth {
 		return fmt.Errorf(
 			"minimum valid DataAvailabilityHeader has at least %d row and column roots",
-			minDAHSize,
+			minExtendedSquareWidth,
 		)
 	}
-	if len(dah.ColumnRoots) > maxDAHSize || len(dah.RowsRoots) > maxDAHSize {
+	if len(dah.ColumnRoots) > maxExtendedSquareWidth || len(dah.RowsRoots) > maxExtendedSquareWidth {
 		return fmt.Errorf(
 			"maximum valid DataAvailabilityHeader has at most %d row and column roots",
-			maxDAHSize,
+			maxExtendedSquareWidth,
 		)
 	}
 	if len(dah.ColumnRoots) != len(dah.RowsRoots) {
@@ -190,13 +185,11 @@ func MinDataAvailabilityHeader() DataAvailabilityHeader {
 	for i := 0; i < consts.MinSharecount; i++ {
 		shares[i] = tailPaddingShare
 	}
-	dah, err := NewDataAvailabilityHeader(
-		consts.MinSquareSize,
-		shares,
-	)
+	eds, err := ExtendShares(consts.MinSquareSize, shares)
 	if err != nil {
 		panic(err)
 	}
+	dah := NewDataAvailabilityHeader(eds)
 	return dah
 }
 
 
@@ -37,15 +37,13 @@ func TestNewDataAvailabilityHeader(t *testing.T) {
 	type test struct {
 		name         string
 		expectedHash []byte
-		expectedErr  bool
 		squareSize   uint64
 		shares       [][]byte
 	}
 
 	tests := []test{
 		{
-			name:        "typical",
-			expectedErr: false,
+			name: "typical",
 			expectedHash: []byte{
 				0xfe, 0x9c, 0x6b, 0xd8, 0xe5, 0x7c, 0xd1, 0x5d, 0x1f, 0xd6, 0x55, 0x7e, 0x87, 0x7d, 0xd9, 0x7d,
 				0xdb, 0xf2, 0x66, 0xfa, 0x60, 0x24, 0x2d, 0xb3, 0xa0, 0x9c, 0x4f, 0x4e, 0x5b, 0x2a, 0x2c, 0x2a,
@@ -54,15 +52,36 @@ func TestNewDataAvailabilityHeader(t *testing.T) {
 			shares:     generateShares(4, 1),
 		},
 		{
-			name:        "max square size",
-			expectedErr: false,
+			name: "max square size",
 			expectedHash: []byte{
 				0xe2, 0x87, 0x23, 0xd0, 0x2d, 0x54, 0x25, 0x5f, 0x79, 0x43, 0x8e, 0xfb, 0xb7, 0xe8, 0xfa, 0xf5,
 				0xbf, 0x93, 0x50, 0xb3, 0x64, 0xd0, 0x4f, 0xa7, 0x7b, 0xb1, 0x83, 0x3b, 0x8, 0xba, 0xd3, 0xa4,
 			},
 			squareSize: consts.MaxSquareSize,
 			shares:     generateShares(consts.MaxSquareSize*consts.MaxSquareSize, 99),
 		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		eds, err := ExtendShares(tt.squareSize, tt.shares)
+		require.NoError(t, err)
+		resdah := NewDataAvailabilityHeader(eds)
+		require.Equal(t, tt.squareSize*2, uint64(len(resdah.ColumnRoots)), tt.name)
+		require.Equal(t, tt.squareSize*2, uint64(len(resdah.RowsRoots)), tt.name)
+		require.Equal(t, tt.expectedHash, resdah.hash, tt.name)
+	}
+}
+
+func TestExtendShares(t *testing.T) {
+	type test struct {
+		name        string
+		expectedErr bool
+		squareSize  uint64
+		shares      [][]byte
+	}
+
+	tests := []test{
 		{
 			name:        "too large square size",
 			expectedErr: true,
@@ -79,15 +98,13 @@ func TestNewDataAvailabilityHeader(t *testing.T) {
 
 	for _, tt := range tests {
 		tt := tt
-		resdah, err := NewDataAvailabilityHeader(tt.squareSize, tt.shares)
+		eds, err := ExtendShares(tt.squareSize, tt.shares)
 		if tt.expectedErr {
 			require.NotNil(t, err)
 			continue
 		}
 		require.NoError(t, err)
-		require.Equal(t, tt.squareSize*2, uint64(len(resdah.ColumnRoots)), tt.name)
-		require.Equal(t, tt.squareSize*2, uint64(len(resdah.RowsRoots)), tt.name)
-		require.Equal(t, tt.expectedHash, resdah.hash, tt.name)
+		require.Equal(t, tt.squareSize*2, eds.Width(), tt.name)
 	}
 }
 
@@ -98,8 +115,9 @@ func TestDataAvailabilityHeaderProtoConversion(t *testing.T) {
 	}
 
 	shares := generateShares(consts.MaxSquareSize*consts.MaxSquareSize, 1)
-	bigdah, err := NewDataAvailabilityHeader(consts.MaxSquareSize, shares)
+	eds, err := ExtendShares(consts.MaxSquareSize, shares)
 	require.NoError(t, err)
+	bigdah := NewDataAvailabilityHeader(eds)
 
 	tests := []test{
 		{
@@ -133,8 +151,10 @@ func Test_DAHValidateBasic(t *testing.T) {
 	}
 
 	shares := generateShares(consts.MaxSquareSize*consts.MaxSquareSize, 1)
-	bigdah, err := NewDataAvailabilityHeader(consts.MaxSquareSize, shares)
+	eds, err := ExtendShares(consts.MaxSquareSize, shares)
 	require.NoError(t, err)
+	bigdah := NewDataAvailabilityHeader(eds)
+
 	// make a mutant dah that has too many roots
 	var tooBigDah DataAvailabilityHeader
 	tooBigDah.ColumnRoots = make([][]byte, consts.MaxSquareSize*consts.MaxSquareSize)
 
@@ -27,7 +27,7 @@ func TestPushErasuredNamespacedMerkleTree(t *testing.T) {
 		tree := n.Constructor()
 
 		// push test data to the tree
-		for i, d := range generateErasuredData(t, tc.squareSize, rsmt2d.NewRSGF8Codec()) {
+		for i, d := range generateErasuredData(t, tc.squareSize, consts.DefaultCodec()) {
 			// push will panic if there's an error
 			tree.Push(d, rsmt2d.SquareIndex{Axis: uint(0), Cell: uint(i)})
 		}
@@ -64,7 +64,7 @@ func TestErasureNamespacedMerkleTreePanics(t *testing.T) {
 			"push over square size",
 			assert.PanicTestFunc(
 				func() {
-					data := generateErasuredData(t, 16, rsmt2d.NewRSGF8Codec())
+					data := generateErasuredData(t, 16, consts.DefaultCodec())
 					n := NewErasuredNamespacedMerkleTree(uint64(15))
 					tree := n.Constructor()
 					for i, d := range data {
@@ -76,7 +76,7 @@ func TestErasureNamespacedMerkleTreePanics(t *testing.T) {
 			"push in incorrect lexigraphic order",
 			assert.PanicTestFunc(
 				func() {
-					data := generateErasuredData(t, 16, rsmt2d.NewRSGF8Codec())
+					data := generateErasuredData(t, 16, consts.DefaultCodec())
 					n := NewErasuredNamespacedMerkleTree(uint64(16))
 					tree := n.Constructor()
 					for i := len(data) - 1; i > 0; i-- {
@@ -104,7 +104,7 @@ func TestExtendedDataSquare(t *testing.T) {
 
 	tree := NewErasuredNamespacedMerkleTree(uint64(squareSize))
 
-	_, err := rsmt2d.ComputeExtendedDataSquare(raw, rsmt2d.NewRSGF8Codec(), tree.Constructor)
+	_, err := rsmt2d.ComputeExtendedDataSquare(raw, consts.DefaultCodec(), tree.Constructor)
 	assert.NoError(t, err)
 }
 
 
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"errors"
 	"fmt"
+	"math"
 	"strings"
 	"time"
 
@@ -1112,6 +1113,69 @@ func (data *Data) Hash() tmbytes.HexBytes {
 	return data.hash
 }
 
+// ComputeShares splits block data into shares of an original data square and
+// returns them along with an amount of non-redundant shares. The shares
+// returned are padded to complete a square size that is a power of two
+func (data *Data) ComputeShares() (NamespacedShares, int) {
+	// TODO(ismail): splitting into shares should depend on the block size and layout
+	// see: https://github.com/celestiaorg/celestia-specs/blob/master/specs/block_proposer.md#laying-out-transactions-and-messages
+
+	// reserved shares:
+	txShares := data.Txs.SplitIntoShares()
+	intermRootsShares := data.IntermediateStateRoots.SplitIntoShares()
+	evidenceShares := data.Evidence.SplitIntoShares()
+
+	// application data shares from messages:
+	msgShares := data.Messages.SplitIntoShares()
+	curLen := len(txShares) + len(intermRootsShares) + len(evidenceShares) + len(msgShares)
+
+	// find the number of shares needed to create a square that has a power of
+	// two width
+	wantLen := paddedLen(curLen)
+
+	// ensure that the min square size is used
+	if wantLen < consts.MinSharecount {
+		wantLen = consts.MinSharecount
+	}
+
+	tailShares := TailPaddingShares(wantLen - curLen)
+
+	return append(append(append(append(
+		txShares,
+		intermRootsShares...),
+		evidenceShares...),
+		msgShares...),
+		tailShares...), curLen
+}
+
+// paddedLen calculates the number of shares needed to make a power of 2 square
+// given the current number of shares
+func paddedLen(length int) int {
+	width := uint32(math.Ceil(math.Sqrt(float64(length))))
+	width = nextHighestPowerOf2(width)
+	return int(width * width)
+}
+
+// nextPowerOf2 returns the next highest power of 2 unless the input is a power
+// of two, in which case it returns the input
+func nextHighestPowerOf2(v uint32) uint32 {
+	if v == 0 {
+		return 0
+	}
+
+	// find the next highest power using bit mashing
+	v--
+	v |= v >> 1
+	v |= v >> 2
+	v |= v >> 4
+	v |= v >> 8
+	v |= v >> 16
+	v++
+
+	// return the next highest power
+	return v
+}
+
 type Messages struct {
 	MessagesList []Message `json:"msgs"`
 }
@@ -1120,26 +1184,27 @@ type IntermediateStateRoots struct {
 	RawRootsList []tmbytes.HexBytes `json:"intermediate_roots"`
 }
 
-func (roots IntermediateStateRoots) splitIntoShares(shareSize int) NamespacedShares {
-	shares := make([]NamespacedShare, 0)
+func (roots IntermediateStateRoots) SplitIntoShares() NamespacedShares {
+	rawDatas := make([][]byte, 0, len(roots.RawRootsList))
 	for _, root := range roots.RawRootsList {
 		rawData, err := root.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("app returned intermediate state root that can not be encoded %#v", root))
 		}
-		shares = appendToShares(shares, consts.IntermediateStateRootsNamespaceID, rawData, shareSize)
+		rawDatas = append(rawDatas, rawData)
 	}
+	shares := splitContiguous(consts.IntermediateStateRootsNamespaceID, rawDatas)
 	return shares
 }
 
-func (msgs Messages) splitIntoShares(shareSize int) NamespacedShares {
+func (msgs Messages) SplitIntoShares() NamespacedShares {
 	shares := make([]NamespacedShare, 0)
 	for _, m := range msgs.MessagesList {
 		rawData, err := m.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("app accepted a Message that can not be encoded %#v", m))
 		}
-		shares = appendToShares(shares, m.NamespaceID, rawData, shareSize)
+		shares = appendToShares(shares, m.NamespaceID, rawData)
 	}
 	return shares
 }
@@ -1346,29 +1411,20 @@ func (data *EvidenceData) FromProto(eviData *tmproto.EvidenceList) error {
 	return nil
 }
 
-func (data *EvidenceData) splitIntoShares(shareSize int) NamespacedShares {
-	shares := make([]NamespacedShare, 0)
+func (data *EvidenceData) SplitIntoShares() NamespacedShares {
+	rawDatas := make([][]byte, 0, len(data.Evidence))
 	for _, ev := range data.Evidence {
-		var rawData []byte
-		var err error
-		switch cev := ev.(type) {
-		case *DuplicateVoteEvidence:
-			rawData, err = protoio.MarshalDelimited(cev.ToProto())
-		case *LightClientAttackEvidence:
-			pcev, iErr := cev.ToProto()
-			if iErr != nil {
-				err = iErr
-				break
-			}
-			rawData, err = protoio.MarshalDelimited(pcev)
-		default:
-			panic(fmt.Sprintf("unknown evidence included in evidence pool (don't know how to encode this) %#v", ev))
+		pev, err := EvidenceToProto(ev)
+		if err != nil {
+			panic("failure to convert evidence to equivalent proto type")
 		}
+		rawData, err := protoio.MarshalDelimited(pev)
 		if err != nil {
-			panic(fmt.Sprintf("evidence included in evidence pool that can not be encoded %#v, err: %v", ev, err))
+			panic(err)
 		}
-		shares = appendToShares(shares, consts.EvidenceNamespaceID, rawData, shareSize)
+		rawDatas = append(rawDatas, rawData)
 	}
+	shares := splitContiguous(consts.EvidenceNamespaceID, rawDatas)
 	return shares
 }
 
 
@@ -0,0 +1,333 @@
+package types
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+
+	"github.com/celestiaorg/rsmt2d"
+	"github.com/gogo/protobuf/proto"
+	tmbytes "github.com/tendermint/tendermint/libs/bytes"
+	"github.com/tendermint/tendermint/pkg/consts"
+	tmproto "github.com/tendermint/tendermint/proto/tendermint/types"
+)
+
+// DataFromSquare extracts block data from an extended data square.
+func DataFromSquare(eds *rsmt2d.ExtendedDataSquare) (Data, error) {
+	originalWidth := eds.Width() / 2
+
+	// sort block data shares by namespace
+	var (
+		sortedTxShares  [][]byte
+		sortedISRShares [][]byte
+		sortedEvdShares [][]byte
+		sortedMsgShares [][]byte
+	)
+
+	// iterate over each row index
+	for x := uint(0); x < originalWidth; x++ {
+		// iterate over each share in the original data square
+		row := eds.Row(x)
+
+		for _, share := range row[:originalWidth] {
+			// sort the data of that share types via namespace
+			nid := share[:consts.NamespaceSize]
+			switch {
+			case bytes.Equal(consts.TxNamespaceID, nid):
+				sortedTxShares = append(sortedTxShares, share)
+
+			case bytes.Equal(consts.IntermediateStateRootsNamespaceID, nid):
+				sortedISRShares = append(sortedISRShares, share)
+
+			case bytes.Equal(consts.EvidenceNamespaceID, nid):
+				sortedEvdShares = append(sortedEvdShares, share)
+
+			case bytes.Equal(consts.TailPaddingNamespaceID, nid):
+				continue
+
+			// ignore unused but reserved namespaces
+			case bytes.Compare(nid, consts.MaxReservedNamespace) < 1:
+				continue
+
+			// every other namespaceID should be a message
+			default:
+				sortedMsgShares = append(sortedMsgShares, share)
+			}
+		}
+	}
+
+	// pass the raw share data to their respective parsers
+	txs, err := parseTxs(sortedTxShares)
+	if err != nil {
+		return Data{}, err
+	}
+
+	isrs, err := parseISRs(sortedISRShares)
+	if err != nil {
+		return Data{}, err
+	}
+
+	evd, err := parseEvd(sortedEvdShares)
+	if err != nil {
+		return Data{}, err
+	}
+
+	msgs, err := parseMsgs(sortedMsgShares)
+	if err != nil {
+		return Data{}, err
+	}
+
+	return Data{
+		Txs:                    txs,
+		IntermediateStateRoots: isrs,
+		Evidence:               evd,
+		Messages:               msgs,
+	}, nil
+}
+
+// parseTxs collects all of the transactions from the shares provided
+func parseTxs(shares [][]byte) (Txs, error) {
+	// parse the sharse
+	rawTxs, err := processContiguousShares(shares)
+	if err != nil {
+		return nil, err
+	}
+
+	// convert to the Tx type
+	txs := make(Txs, len(rawTxs))
+	for i := 0; i < len(txs); i++ {
+		txs[i] = Tx(rawTxs[i])
+	}
+
+	return txs, nil
+}
+
+// parseISRs collects all the intermediate state roots from the shares provided
+func parseISRs(shares [][]byte) (IntermediateStateRoots, error) {
+	rawISRs, err := processContiguousShares(shares)
+	if err != nil {
+		return IntermediateStateRoots{}, err
+	}
+
+	ISRs := make([]tmbytes.HexBytes, len(rawISRs))
+	for i := 0; i < len(ISRs); i++ {
+		ISRs[i] = rawISRs[i]
+	}
+
+	return IntermediateStateRoots{RawRootsList: ISRs}, nil
+}
+
+// parseEvd collects all evidence from the shares provided.
+func parseEvd(shares [][]byte) (EvidenceData, error) {
+	// the raw data returned does not have length delimiters or namespaces and
+	// is ready to be unmarshaled
+	rawEvd, err := processContiguousShares(shares)
+	if err != nil {
+		return EvidenceData{}, err
+	}
+
+	evdList := make(EvidenceList, len(rawEvd))
+
+	// parse into protobuf bytes
+	for i := 0; i < len(rawEvd); i++ {
+		// unmarshal the evidence
+		var protoEvd tmproto.Evidence
+		err := proto.Unmarshal(rawEvd[i], &protoEvd)
+		if err != nil {
+			return EvidenceData{}, err
+		}
+		evd, err := EvidenceFromProto(&protoEvd)
+		if err != nil {
+			return EvidenceData{}, err
+		}
+
+		evdList[i] = evd
+	}
+
+	return EvidenceData{Evidence: evdList}, nil
+}
+
+// parseMsgs collects all messages from the shares provided
+func parseMsgs(shares [][]byte) (Messages, error) {
+	msgList, err := parseMsgShares(shares)
+	if err != nil {
+		return Messages{}, err
+	}
+
+	return Messages{
+		MessagesList: msgList,
+	}, nil
+}
+
+// processContiguousShares takes raw shares and extracts out transactions,
+// intermediate state roots, or evidence. The returned [][]byte do have
+// namespaces or length delimiters and are ready to be unmarshalled
+func processContiguousShares(shares [][]byte) (txs [][]byte, err error) {
+	if len(shares) == 0 {
+		return nil, nil
+	}
+
+	ss := newShareStack(shares)
+	return ss.resolve()
+}
+
+// shareStack hold variables for peel
+type shareStack struct {
+	shares [][]byte
+	txLen  uint64
+	txs    [][]byte
+	cursor int
+}
+
+func newShareStack(shares [][]byte) *shareStack {
+	return &shareStack{shares: shares}
+}
+
+func (ss *shareStack) resolve() ([][]byte, error) {
+	if len(ss.shares) == 0 {
+		return nil, nil
+	}
+	err := ss.peel(ss.shares[0][consts.NamespaceSize+consts.ShareReservedBytes:], true)
+	return ss.txs, err
+}
+
+// peel recursively parses each chunk of data (either a transaction,
+// intermediate state root, or evidence) and adds it to the underlying slice of data.
+func (ss *shareStack) peel(share []byte, delimited bool) (err error) {
+	if delimited {
+		var txLen uint64
+		share, txLen, err = parseDelimiter(share)
+		if err != nil {
+			return err
+		}
+		if txLen == 0 {
+			return nil
+		}
+		ss.txLen = txLen
+	}
+	// safeLen describes the point in the share where it can be safely split. If
+	// split beyond this point, it is possible to break apart a length
+	// delimiter, which will result in incorrect share merging
+	safeLen := len(share) - binary.MaxVarintLen64
+	if safeLen < 0 {
+		safeLen = 0
+	}
+	if ss.txLen <= uint64(safeLen) {
+		ss.txs = append(ss.txs, share[:ss.txLen])
+		share = share[ss.txLen:]
+		return ss.peel(share, true)
+	}
+	// add the next share to the current share to continue merging if possible
+	if len(ss.shares) > ss.cursor+1 {
+		ss.cursor++
+		share := append(share, ss.shares[ss.cursor][consts.NamespaceSize+consts.ShareReservedBytes:]...)
+		return ss.peel(share, false)
+	}
+	// collect any remaining data
+	if ss.txLen <= uint64(len(share)) {
+		ss.txs = append(ss.txs, share[:ss.txLen])
+		share = share[ss.txLen:]
+		return ss.peel(share, true)
+	}
+	return errors.New("failure to parse block data: transaction length exceeded data length")
+}
+
+// parseMsgShares iterates through raw shares and separates the contiguous chunks
+// of data. It is only used for Messages, i.e. shares with a non-reserved namespace.
+func parseMsgShares(shares [][]byte) ([]Message, error) {
+	if len(shares) == 0 {
+		return nil, nil
+	}
+
+	// set the first nid and current share
+	nid := shares[0][:consts.NamespaceSize]
+	currentShare := shares[0][consts.NamespaceSize:]
+	// find and remove the msg len delimiter
+	currentShare, msgLen, err := parseDelimiter(currentShare)
+	if err != nil {
+		return nil, err
+	}
+
+	var msgs []Message
+	for cursor := uint64(0); cursor < uint64(len(shares)); {
+		var msg Message
+		currentShare, nid, cursor, msgLen, msg, err = nextMsg(
+			shares,
+			currentShare,
+			nid,
+			cursor,
+			msgLen,
+		)
+		if err != nil {
+			return nil, err
+		}
+		if msg.Data != nil {
+			msgs = append(msgs, msg)
+		}
+	}
+
+	return msgs, nil
+}
+
+func nextMsg(
+	shares [][]byte,
+	current,
+	nid []byte,
+	cursor,
+	msgLen uint64,
+) ([]byte, []byte, uint64, uint64, Message, error) {
+	switch {
+	// the message uses all of the current share data and at least some of the
+	// next share
+	case msgLen > uint64(len(current)):
+		// add the next share to the current one and try again
+		cursor++
+		current = append(current, shares[cursor][consts.NamespaceSize:]...)
+		return nextMsg(shares, current, nid, cursor, msgLen)
+
+	// the msg we're looking for is contained in the current share
+	case msgLen <= uint64(len(current)):
+		msg := Message{nid, current[:msgLen]}
+		cursor++
+
+		// call it a day if the work is done
+		if cursor >= uint64(len(shares)) {
+			return nil, nil, cursor, 0, msg, nil
+		}
+
+		nextNid := shares[cursor][:consts.NamespaceSize]
+		next, msgLen, err := parseDelimiter(shares[cursor][consts.NamespaceSize:])
+		return next, nextNid, cursor, msgLen, msg, err
+	}
+	// this code is unreachable but the compiler doesn't know that
+	return nil, nil, 0, 0, Message{}, nil
+}
+
+// parseDelimiter finds and returns the length delimiter of the message provided
+// while also removing the delimiter bytes from the input
+func parseDelimiter(input []byte) ([]byte, uint64, error) {
+	if len(input) == 0 {
+		return input, 0, nil
+	}
+
+	l := binary.MaxVarintLen64
+	if len(input) < binary.MaxVarintLen64 {
+		l = len(input)
+	}
+
+	delimiter := zeroPadIfNecessary(input[:l], binary.MaxVarintLen64)
+
+	// read the length of the message
+	r := bytes.NewBuffer(delimiter)
+	msgLen, err := binary.ReadUvarint(r)
+	if err != nil {
+		return nil, 0, err
+	}
+
+	// calculate the number of bytes used by the delimiter
+	lenBuf := make([]byte, binary.MaxVarintLen64)
+	n := binary.PutUvarint(lenBuf, msgLen)
+
+	// return the input without the length delimiter
+	return input[n:], msgLen, nil
+}
@@ -0,0 +1,148 @@
+package types
+
+import (
+	"bytes"
+
+	"github.com/celestiaorg/nmt/namespace"
+	"github.com/tendermint/tendermint/pkg/consts"
+)
+
+// appendToShares appends raw data as shares.
+// Used for messages.
+func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare {
+	if len(rawData) <= consts.MsgShareSize {
+		rawShare := append(append(
+			make([]byte, 0, len(nid)+len(rawData)),
+			nid...),
+			rawData...,
+		)
+		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
+		share := NamespacedShare{paddedShare, nid}
+		shares = append(shares, share)
+	} else { // len(rawData) > MsgShareSize
+		shares = append(shares, splitMessage(rawData, nid)...)
+	}
+	return shares
+}
+
+// splitMessage breaks the data in a message into the minimum number of
+// namespaced shares
+func splitMessage(rawData []byte, nid namespace.ID) []NamespacedShare {
+	shares := make([]NamespacedShare, 0)
+	firstRawShare := append(append(
+		make([]byte, 0, consts.ShareSize),
+		nid...),
+		rawData[:consts.MsgShareSize]...,
+	)
+	shares = append(shares, NamespacedShare{firstRawShare, nid})
+	rawData = rawData[consts.MsgShareSize:]
+	for len(rawData) > 0 {
+		shareSizeOrLen := min(consts.MsgShareSize, len(rawData))
+		rawShare := append(append(
+			make([]byte, 0, consts.ShareSize),
+			nid...),
+			rawData[:shareSizeOrLen]...,
+		)
+		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
+		share := NamespacedShare{paddedShare, nid}
+		shares = append(shares, share)
+		rawData = rawData[shareSizeOrLen:]
+	}
+	return shares
+}
+
+// splitContiguous splits multiple raw data contiguously as shares.
+// Used for transactions, intermediate state roots, and evidence.
+func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare {
+	shares := make([]NamespacedShare, 0)
+	// Index into the outer slice of rawDatas
+	outerIndex := 0
+	// Index into the inner slice of rawDatas
+	innerIndex := 0
+	for outerIndex < len(rawDatas) {
+		var rawData []byte
+		startIndex := 0
+		rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, consts.TxShareSize)
+		rawShare := append(append(append(
+			make([]byte, 0, len(nid)+1+len(rawData)),
+			nid...),
+			byte(startIndex)),
+			rawData...)
+		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
+		share := NamespacedShare{paddedShare, nid}
+		shares = append(shares, share)
+	}
+	return shares
+}
+
+// getNextChunk gets the next chunk for contiguous shares
+// Precondition: none of the slices in rawDatas is zero-length
+// This precondition should always hold at this point since zero-length txs are simply invalid.
+func getNextChunk(rawDatas [][]byte, outerIndex int, innerIndex int, width int) ([]byte, int, int, int) {
+	rawData := make([]byte, 0, width)
+	startIndex := 0
+	firstBytesToFetch := 0
+
+	curIndex := 0
+	for curIndex < width && outerIndex < len(rawDatas) {
+		bytesToFetch := min(len(rawDatas[outerIndex])-innerIndex, width-curIndex)
+		if bytesToFetch == 0 {
+			panic("zero-length contiguous share data is invalid")
+		}
+		if curIndex == 0 {
+			firstBytesToFetch = bytesToFetch
+		}
+		// If we've already placed some data in this chunk, that means
+		// a new data segment begins
+		if curIndex != 0 {
+			// Offset by the fixed reserved bytes at the beginning of the share
+			startIndex = firstBytesToFetch + consts.NamespaceSize + consts.ShareReservedBytes
+		}
+		rawData = append(rawData, rawDatas[outerIndex][innerIndex:innerIndex+bytesToFetch]...)
+		innerIndex += bytesToFetch
+		if innerIndex >= len(rawDatas[outerIndex]) {
+			innerIndex = 0
+			outerIndex++
+		}
+		curIndex += bytesToFetch
+	}
+
+	return rawData, outerIndex, innerIndex, startIndex
+}
+
+// tail is filler for all tail padded shares
+// it is allocated once and used everywhere
+var tailPaddingShare = append(
+	append(make([]byte, 0, consts.ShareSize), consts.TailPaddingNamespaceID...),
+	bytes.Repeat([]byte{0}, consts.ShareSize-consts.NamespaceSize)...,
+)
+
+func TailPaddingShares(n int) NamespacedShares {
+	shares := make([]NamespacedShare, n)
+	for i := 0; i < n; i++ {
+		shares[i] = NamespacedShare{
+			Share: tailPaddingShare,
+			ID:    consts.TailPaddingNamespaceID,
+		}
+	}
+	return shares
+}
+
+func min(a, b int) int {
+	if a <= b {
+		return a
+	}
+	return b
+}
+
+func zeroPadIfNecessary(share []byte, width int) []byte {
+	oldLen := len(share)
+	if oldLen < width {
+		missingBytes := width - oldLen
+		padByte := []byte{0}
+		padding := bytes.Repeat(padByte, missingBytes)
+		share = append(share, padding...)
+		return share
+	}
+	return share
+}
@@ -1,19 +1,15 @@
 package types
 
 import (
-	"bytes"
 	"encoding/binary"
 
 	"github.com/celestiaorg/nmt/namespace"
-	"github.com/tendermint/tendermint/pkg/consts"
 )
 
 // Share contains the raw share data without the corresponding namespace.
 type Share []byte
 
 // NamespacedShare extends a Share with the corresponding namespace.
-// It implements the namespace.Data interface and hence can be used
-// for pushing the shares to the namespaced Merkle tree.
 type NamespacedShare struct {
 	Share
 	ID namespace.ID
@@ -45,7 +41,6 @@ func (tx Tx) MarshalDelimited() ([]byte, error) {
 	lenBuf := make([]byte, binary.MaxVarintLen64)
 	length := uint64(len(tx))
 	n := binary.PutUvarint(lenBuf, length)
-
 	return append(lenBuf[:n], tx...), nil
 }
 
@@ -55,62 +50,5 @@ func (m Message) MarshalDelimited() ([]byte, error) {
 	lenBuf := make([]byte, binary.MaxVarintLen64)
 	length := uint64(len(m.Data))
 	n := binary.PutUvarint(lenBuf, length)
-
 	return append(lenBuf[:n], m.Data...), nil
 }
-
-func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte, shareSize int) []NamespacedShare {
-	if len(rawData) < shareSize {
-		rawShare := rawData
-		paddedShare := zeroPadIfNecessary(rawShare, shareSize)
-		share := NamespacedShare{paddedShare, nid}
-		shares = append(shares, share)
-	} else { // len(rawData) >= shareSize
-		shares = append(shares, split(rawData, shareSize, nid)...)
-	}
-	return shares
-}
-
-// TODO(ismail): implement corresponding merge method for clients requesting
-// shares for a particular namespace
-func split(rawData []byte, shareSize int, nid namespace.ID) []NamespacedShare {
-	shares := make([]NamespacedShare, 0)
-	firstRawShare := rawData[:shareSize]
-	shares = append(shares, NamespacedShare{firstRawShare, nid})
-	rawData = rawData[shareSize:]
-	for len(rawData) > 0 {
-		shareSizeOrLen := min(shareSize, len(rawData))
-		paddedShare := zeroPadIfNecessary(rawData[:shareSizeOrLen], shareSize)
-		share := NamespacedShare{paddedShare, nid}
-		shares = append(shares, share)
-		rawData = rawData[shareSizeOrLen:]
-	}
-	return shares
-}
-
-func GenerateTailPaddingShares(n int, shareWidth int) NamespacedShares {
-	shares := make([]NamespacedShare, n)
-	for i := 0; i < n; i++ {
-		shares[i] = NamespacedShare{bytes.Repeat([]byte{0}, shareWidth), consts.TailPaddingNamespaceID}
-	}
-	return shares
-}
-
-func min(a, b int) int {
-	if a <= b {
-		return a
-	}
-	return b
-}
-
-func zeroPadIfNecessary(share []byte, width int) []byte {
-	oldLen := len(share)
-	if oldLen < width {
-		missingBytes := width - oldLen
-		padByte := []byte{0}
-		padding := bytes.Repeat(padByte, missingBytes)
-		share = append(share, padding...)
-		return share
-	}
-	return share
-}
@@ -80,15 +80,16 @@ func (txs Txs) Proof(i int) TxProof {
 	}
 }
 
-func (txs Txs) splitIntoShares(shareSize int) NamespacedShares {
-	shares := make([]NamespacedShare, 0)
-	for _, tx := range txs {
+func (txs Txs) SplitIntoShares() NamespacedShares {
+	rawDatas := make([][]byte, len(txs))
+	for i, tx := range txs {
 		rawData, err := tx.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("included Tx in mem-pool that can not be encoded %v", tx))
 		}
-		shares = appendToShares(shares, consts.TxNamespaceID, rawData, shareSize)
+		rawDatas[i] = rawData
 	}
+	shares := splitContiguous(consts.TxNamespaceID, rawDatas)
 	return shares
 }
Original file line number	Diff line number	Diff line change
`@@ -4,6 +4,7 @@ import (`
`4`	`4`	`"crypto/sha256"`
`5`	`5`
`6`	`6`	`"github.com/celestiaorg/nmt/namespace"`
	`7`	`+ "github.com/celestiaorg/rsmt2d"`
`7`	`8`	`)`
`8`	`9`
`9`	`10`	`// This contains all constants of:`
`@@ -61,4 +62,9 @@ var (`
`61`	`62`
`62`	`63`	`// NewBaseHashFunc change accordingly if another hash.Hash should be used as a base hasher in the NMT:`
`63`	`64`	`NewBaseHashFunc = sha256.New`
	`65`	`+`
	`66`	`+ // DefaultCodec is the default codec creator used for data erasure`
	`67`	`+ // TODO(ismail): for better efficiency and a larger number shares`
	`68`	`+ // we should switch to the rsmt2d.LeopardFF16 codec:`
	`69`	`+ DefaultCodec = rsmt2d.NewRSGF8Codec`
`64`	`70`	`)`
Original file line number	Diff line number	Diff line change
`@@ -80,15 +80,16 @@ func (txs Txs) Proof(i int) TxProof {`
`80`	`80`	`}`
`81`	`81`	`}`
`82`	`82`
`83`		`-func (txs Txs) splitIntoShares(shareSize int) NamespacedShares {`
`84`		`- shares := make([]NamespacedShare, 0)`
`85`		`- for _, tx := range txs {`
	`83`	`+func (txs Txs) SplitIntoShares() NamespacedShares {`
	`84`	`+ rawDatas := make([][]byte, len(txs))`
	`85`	`+ for i, tx := range txs {`
`86`	`86`	`rawData, err := tx.MarshalDelimited()`
`87`	`87`	`if err != nil {`
`88`	`88`	`panic(fmt.Sprintf("included Tx in mem-pool that can not be encoded %v", tx))`
`89`	`89`	`}`
`90`		`- shares = appendToShares(shares, consts.TxNamespaceID, rawData, shareSize)`
	`90`	`+ rawDatas[i] = rawData`
`91`	`91`	`}`
	`92`	`+ shares := splitContiguous(consts.TxNamespaceID, rawDatas)`
`92`	`93`	`return shares`
`93`	`94`	`}`
`94`	`95`