celestiaorg · evan-forbes · Apr 6, 2021 · Mar 31, 2021 · Apr 1, 2021 · Apr 1, 2021
diff --git a/types/block.go b/types/block.go
@@ -1626,23 +1626,13 @@ func (data *EvidenceData) FromProto(eviData *tmproto.EvidenceList) error {
 func (data *EvidenceData) splitIntoShares() NamespacedShares {
 	rawDatas := make([][]byte, 0, len(data.Evidence))
 	for _, ev := range data.Evidence {
-		var rawData []byte
-		var err error
-		switch cev := ev.(type) {
-		case *DuplicateVoteEvidence:
-			rawData, err = protoio.MarshalDelimited(cev.ToProto())
-		case *LightClientAttackEvidence:
-			pcev, iErr := cev.ToProto()
-			if iErr != nil {
-				err = iErr
-				break
-			}
-			rawData, err = protoio.MarshalDelimited(pcev)
-		default:
-			panic(fmt.Sprintf("unknown evidence included in evidence pool (don't know how to encode this) %#v", ev))
+		pev, err := EvidenceToProto(ev)
+		if err != nil {
+			panic("failure to convert evidence to equivalent proto type")
 		}
+		rawData, err := protoio.MarshalDelimited(pev)
 		if err != nil {
-			panic(fmt.Sprintf("evidence included in evidence pool that can not be encoded %#v, err: %v", ev, err))
+			panic(err)
 		}
 		rawDatas = append(rawDatas, rawData)
 	}

diff --git a/types/consts.go b/types/consts.go
@@ -33,10 +33,23 @@ const (
 )
 
 var (
-	TxNamespaceID                     = namespace.ID{0, 0, 0, 0, 0, 0, 0, 1}
+	// See spec for further details on the types of available data
+	// https://github.com/lazyledger/lazyledger-specs/blob/master/specs/consensus.md#reserved-namespace-ids
+	// https://github.com/lazyledger/lazyledger-specs/blob/de5f4f74f56922e9fa735ef79d9e6e6492a2bad1/specs/data_structures.md#availabledata
+
+	// TxNamespaceID is the namespace reserved for transaction data
+	TxNamespaceID = namespace.ID{0, 0, 0, 0, 0, 0, 0, 1}
+	// IntermediateStateRootsNamespaceID is the namespace reserved for
+	// intermediate state root data
 	IntermediateStateRootsNamespaceID = namespace.ID{0, 0, 0, 0, 0, 0, 0, 2}
-	EvidenceNamespaceID               = namespace.ID{0, 0, 0, 0, 0, 0, 0, 3}
-
+	// EvidenceNamespaceID is the namespace reserved for evidence
+	EvidenceNamespaceID = namespace.ID{0, 0, 0, 0, 0, 0, 0, 3}
+	// MaxReservedNamespace is the lexicographically largest namespace that is
+	// reserved for protocol use. It is derived from NAMESPACE_ID_MAX_RESERVED
+	// https://github.com/lazyledger/lazyledger-specs/blob/master/specs/consensus.md#constants
+	MaxReservedNamespace = namespace.ID{0, 0, 0, 0, 0, 0, 0, 255}
+	// TailPaddingNamespaceID is the namespace ID for tail padding. All data
+	// with this namespace will not be
 	TailPaddingNamespaceID  = namespace.ID{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE}
 	ParitySharesNamespaceID = namespace.ID{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}
 

diff --git a/types/share_merging.go b/types/share_merging.go
@@ -0,0 +1,332 @@
+package types
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+
+	"github.com/gogo/protobuf/proto"
+	tmbytes "github.com/lazyledger/lazyledger-core/libs/bytes"
+	tmproto "github.com/lazyledger/lazyledger-core/proto/tendermint/types"
+	"github.com/lazyledger/rsmt2d"
+)
+
+// DataFromSquare extracts block data from an extended data square.
+func DataFromSquare(eds *rsmt2d.ExtendedDataSquare) (Data, error) {
+	originalWidth := eds.Width() / 2
+
+	// sort block data shares by namespace
+	var (
+		sortedTxShares  [][]byte
+		sortedISRShares [][]byte
+		sortedEvdShares [][]byte
+		sortedMsgShares [][]byte
+	)
+
+	// iterate over each row index
+	for x := uint(0); x < originalWidth; x++ {
+		// iterate over each col index
+		for y := uint(0); y < originalWidth; y++ {
+			// sort the data of that share types via namespace
+			share := eds.Cell(x, y)
+			nid := share[:NamespaceSize]
+			switch {
+			case bytes.Equal(TxNamespaceID, nid):
+				sortedTxShares = append(sortedTxShares, share)
+
+			case bytes.Equal(IntermediateStateRootsNamespaceID, nid):
+				sortedISRShares = append(sortedISRShares, share)
+
+			case bytes.Equal(EvidenceNamespaceID, nid):
+				sortedEvdShares = append(sortedEvdShares, share)
+
+			case bytes.Equal(TailPaddingNamespaceID, nid):
+				continue
+
+			// ignore unused but reserved namespaces
+			case bytes.Compare(nid, MaxReservedNamespace) < 1:
+				continue
+
+			// every other namespaceID should be a message
+			default:
+				sortedMsgShares = append(sortedMsgShares, share)
+			}
+		}
+	}
+
+	// pass the raw share data to their respective parsers
+	txs, err := parseTxs(sortedTxShares)
+	if err != nil {
+		return Data{}, err
+	}
+
+	isrs, err := parseISRs(sortedISRShares)
+	if err != nil {
+		return Data{}, err
+	}
+
+	evd, err := parseEvd(sortedEvdShares)
+	if err != nil {
+		return Data{}, err
+	}
+
+	msgs, err := parseMsgs(sortedMsgShares)
+	if err != nil {
+		return Data{}, err
+	}
+
+	return Data{
+		Txs:                    txs,
+		IntermediateStateRoots: isrs,
+		Evidence:               evd,
+		Messages:               msgs,
+	}, nil
+}
+
+// parseTxs collects all of the transactions from the shares provided
+func parseTxs(shares [][]byte) (Txs, error) {
+	// parse the sharse
+	rawTxs, err := processContiguousShares(shares)
+	if err != nil {
+		return nil, err
+	}
+
+	// convert to the Tx type
+	txs := make(Txs, len(rawTxs))
+	for i := 0; i < len(txs); i++ {
+		txs[i] = Tx(rawTxs[i])
+	}
+
+	return txs, nil
+}
+
+// parseISRs collects all the intermediate state roots from the shares provided
+func parseISRs(shares [][]byte) (IntermediateStateRoots, error) {
+	rawISRs, err := processContiguousShares(shares)
+	if err != nil {
+		return IntermediateStateRoots{}, err
+	}
+
+	ISRs := make([]tmbytes.HexBytes, len(rawISRs))
+	for i := 0; i < len(ISRs); i++ {
+		ISRs[i] = rawISRs[i]
+	}
+
+	return IntermediateStateRoots{RawRootsList: ISRs}, nil
+}
+
+// parseEvd collects all evidence from the shares provided.
+func parseEvd(shares [][]byte) (EvidenceData, error) {
+	// the raw data returned does not have length delimiters or namespaces and
+	// is ready to be unmarshaled
+	rawEvd, err := processContiguousShares(shares)
+	if err != nil {
+		return EvidenceData{}, err
+	}
+
+	evdList := make(EvidenceList, len(rawEvd))
+
+	// parse into protobuf bytes
+	for i := 0; i < len(rawEvd); i++ {
+		// unmarshal the evidence
+		var protoEvd tmproto.Evidence
+		err := proto.Unmarshal(rawEvd[i], &protoEvd)
+		if err != nil {
+			return EvidenceData{}, err
+		}
+		evd, err := EvidenceFromProto(&protoEvd)
+		if err != nil {
+			return EvidenceData{}, err
+		}
+
+		evdList[i] = evd
+	}
+
+	return EvidenceData{Evidence: evdList}, nil
+}
+
+// parseMsgs collects all messages from the shares provided
+func parseMsgs(shares [][]byte) (Messages, error) {
+	msgList, err := parseMsgShares(shares)
+	if err != nil {
+		return MessagesEmpty, err
+	}
+
+	return Messages{
+		MessagesList: msgList,
+	}, nil
+}
+
+// processContiguousShares takes raw shares and extracts out transactions,
+// intermediate state roots, or evidence. The returned [][]byte do have
+// namespaces or length delimiters and are ready to be unmarshalled
+func processContiguousShares(shares [][]byte) (txs [][]byte, err error) {
+	if len(shares) == 0 {
+		return nil, nil
+	}
+
+	ss := newShareStack(shares)
+	return ss.resolve()
+}
+
+// shareStack hold variables for peel
+type shareStack struct {
+	shares [][]byte
+	txLen  uint64
+	txs    [][]byte
+	cursor int
+}
+
+func newShareStack(shares [][]byte) *shareStack {
+	return &shareStack{shares: shares}
+}
+
+func (ss *shareStack) resolve() ([][]byte, error) {
+	if len(ss.shares) == 0 {
+		return nil, nil
+	}
+	err := ss.peel(ss.shares[0][NamespaceSize+ShareReservedBytes:], true)
+	return ss.txs, err
+}
+
+// peel recursively parses each chunk of data (either a transaction,
+// intermediate state root, or evidence) and adds it to the underlying slice of data.
+func (ss *shareStack) peel(share []byte, delimited bool) (err error) {
+	if delimited {
+		var txLen uint64
+		share, txLen, err = parseDelimiter(share)
+		if err != nil {
+			return err
+		}
+		if txLen == 0 {
+			return nil
+		}
+		ss.txLen = txLen
+	}
+	// safeLen describes the point in the share where it can be safely split. If
+	// split beyond this point, it is possible to break apart a length
+	// delimiter, which will result in incorrect share merging
+	safeLen := len(share) - binary.MaxVarintLen64
+	if safeLen < 0 {
+		safeLen = 0
+	}
+	if ss.txLen <= uint64(safeLen) {
+		ss.txs = append(ss.txs, share[:ss.txLen])
+		share = share[ss.txLen:]
+		return ss.peel(share, true)
+	}
+	// add the next share to the current share to continue merging if possible
+	if len(ss.shares) > ss.cursor+1 {
+		ss.cursor++
+		share := append(share, ss.shares[ss.cursor][NamespaceSize+ShareReservedBytes:]...)
+		return ss.peel(share, false)
+	}
+	// collect any remaining data
+	if ss.txLen <= uint64(len(share)) {
+		ss.txs = append(ss.txs, share[:ss.txLen])
+		share = share[ss.txLen:]
+		return ss.peel(share, true)
+	}
+	return errors.New("failure to parse block data: transaction length exceeded data length")
+}
+
+// parseMsgShares iterates through raw shares and separates the contiguous chunks
+// of data. It is only used for Messages, i.e. shares with a non-reserved namespace.
+func parseMsgShares(shares [][]byte) ([]Message, error) {
+	if len(shares) == 0 {
+		return nil, nil
+	}
+
+	// set the first nid and current share
+	nid := shares[0][:NamespaceSize]
+	currentShare := shares[0][NamespaceSize:]
+
+	// find and remove the msg len delimiter
+	currentShare, msgLen, err := parseDelimiter(currentShare)
+	if err != nil {
+		return nil, err
+	}
+
+	var msgs []Message
+	for cursor := uint64(0); cursor < uint64(len(shares)); {
+		var msg Message
+		currentShare, nid, cursor, msgLen, msg, err = nextMsg(
+			shares,
+			currentShare,
+			nid,
+			cursor,
+			msgLen,
+		)
+		if err != nil {
+			return nil, err
+		}
+		if msg.Data != nil {
+			msgs = append(msgs, msg)
+		}
+	}
+
+	return msgs, nil
+}
+
+func nextMsg(
+	shares [][]byte,
+	current,
+	nid []byte,
+	cursor,
+	msgLen uint64,
+) ([]byte, []byte, uint64, uint64, Message, error) {
+	switch {
+	// the message uses all of the current share data and at least some of the
+	// next share
+	case msgLen > uint64(len(current)):
+		// add the next share to the current one and try again
+		cursor++
+		current = append(current, shares[cursor][NamespaceSize:]...)
+		return nextMsg(shares, current, nid, cursor, msgLen)
+
+	// the msg we're looking for is contained in the current share
+	case msgLen <= uint64(len(current)):
+		msg := Message{nid, current[:msgLen]}
+		cursor++
+
+		// call it a day if the work is done
+		if cursor >= uint64(len(shares)) {
+			return nil, nil, cursor, 0, msg, nil
+		}
+
+		nextNid := shares[cursor][:NamespaceSize]
+		next, msgLen, err := parseDelimiter(shares[cursor][NamespaceSize:])
+		return next, nextNid, cursor, msgLen, msg, err
+	}
+	// this code is unreachable but the compiler doesn't know that
+	return nil, nil, 0, 0, MessageEmpty, nil
+}
+
+// parseDelimiter finds and returns the length delimiter of the message provided
+// while also removing the delimiter bytes from the input
+func parseDelimiter(input []byte) ([]byte, uint64, error) {
+	if len(input) == 0 {
+		return input, 0, nil
+	}
+
+	l := binary.MaxVarintLen64
+	if len(input) < binary.MaxVarintLen64 {
+		l = len(input)
+	}
+
+	delimiter := zeroPadIfNecessary(input[:l], binary.MaxVarintLen64)
+
+	// read the length of the message
+	r := bytes.NewBuffer(delimiter)
+	msgLen, err := binary.ReadUvarint(r)
+	if err != nil {
+		return nil, 0, err
+	}
+
+	// calculate the number of bytes used by the delimiter
+	lenBuf := make([]byte, binary.MaxVarintLen64)
+	n := binary.PutUvarint(lenBuf, msgLen)
+
+	// return the input without the length delimiter
+	return input[n:], msgLen, nil
+}
diff --git a/types/share_splitting.go b/types/share_splitting.go
@@ -0,0 +1,137 @@
+package types
+
+import (
+	"bytes"
+
+	"github.com/lazyledger/nmt/namespace"
+)
+
+// appendToShares appends raw data as shares.
+// Used for messages.
+func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare {
+	if len(rawData) <= MsgShareSize {
+		rawShare := append(append(
+			make([]byte, 0, len(nid)+len(rawData)),
+			nid...),
+			rawData...,
+		)
+		paddedShare := zeroPadIfNecessary(rawShare, ShareSize)
+		share := NamespacedShare{paddedShare, nid}
+		shares = append(shares, share)
+	} else { // len(rawData) > MsgShareSize
+		shares = append(shares, splitMessage(rawData, nid)...)
+	}
+	return shares
+}
+
+// splitMessage breaks the data in a message into the minimum number of
+// namespaced shares
+func splitMessage(rawData []byte, nid namespace.ID) []NamespacedShare {
+	shares := make([]NamespacedShare, 0)
+	firstRawShare := append(append(
+		make([]byte, 0, ShareSize),
+		nid...),
+		rawData[:MsgShareSize]...,
+	)
+	shares = append(shares, NamespacedShare{firstRawShare, nid})
+	rawData = rawData[MsgShareSize:]
+	for len(rawData) > 0 {
+		shareSizeOrLen := min(MsgShareSize, len(rawData))
+		rawShare := append(append(
+			make([]byte, 0, ShareSize),
+			nid...),
+			rawData[:shareSizeOrLen]...,
+		)
+		paddedShare := zeroPadIfNecessary(rawShare, ShareSize)
+		share := NamespacedShare{paddedShare, nid}
+		shares = append(shares, share)
+		rawData = rawData[shareSizeOrLen:]
+	}
+	return shares
+}
+
+// splitContiguous splits multiple raw data contiguously as shares.
+// Used for transactions, intermediate state roots, and evidence.
+func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare {
+	shares := make([]NamespacedShare, 0)
+	// Index into the outer slice of rawDatas
+	outerIndex := 0
+	// Index into the inner slice of rawDatas
+	innerIndex := 0
+	for outerIndex < len(rawDatas) {
+		var rawData []byte
+		startIndex := 0
+		rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, TxShareSize)
+		rawShare := append(append(append(
+			make([]byte, 0, len(nid)+1+len(rawData)),
+			nid...),
+			byte(startIndex)),
+			rawData...)
+		paddedShare := zeroPadIfNecessary(rawShare, ShareSize)
+		share := NamespacedShare{paddedShare, nid}
+		shares = append(shares, share)
+	}
+	return shares
+}
+
+// getNextChunk gets the next chunk for contiguous shares
+// Precondition: none of the slices in rawDatas is zero-length
+// This precondition should always hold at this point since zero-length txs are simply invalid.
+func getNextChunk(rawDatas [][]byte, outerIndex int, innerIndex int, width int) ([]byte, int, int, int) {
+	rawData := make([]byte, 0, width)
+	startIndex := 0
+	firstBytesToFetch := 0
+
+	curIndex := 0
+	for curIndex < width && outerIndex < len(rawDatas) {
+		bytesToFetch := min(len(rawDatas[outerIndex])-innerIndex, width-curIndex)
+		if bytesToFetch == 0 {
+			panic("zero-length contiguous share data is invalid")
+		}
+		if curIndex == 0 {
+			firstBytesToFetch = bytesToFetch
+		}
+		// If we've already placed some data in this chunk, that means
+		// a new data segment begins
+		if curIndex != 0 {
+			// Offset by the fixed reserved bytes at the beginning of the share
+			startIndex = firstBytesToFetch + NamespaceSize + ShareReservedBytes
+		}
+		rawData = append(rawData, rawDatas[outerIndex][innerIndex:innerIndex+bytesToFetch]...)
+		innerIndex += bytesToFetch
+		if innerIndex >= len(rawDatas[outerIndex]) {
+			innerIndex = 0
+			outerIndex++
+		}
+		curIndex += bytesToFetch
+	}
+
+	return rawData, outerIndex, innerIndex, startIndex
+}
+
+func GenerateTailPaddingShares(n int, shareWidth int) NamespacedShares {
+	shares := make([]NamespacedShare, n)
+	for i := 0; i < n; i++ {
+		shares[i] = NamespacedShare{bytes.Repeat([]byte{0}, shareWidth), TailPaddingNamespaceID}
+	}
+	return shares
+}
+
+func min(a, b int) int {
+	if a <= b {
+		return a
+	}
+	return b
+}
+
+func zeroPadIfNecessary(share []byte, width int) []byte {
+	oldLen := len(share)
+	if oldLen < width {
+		missingBytes := width - oldLen
+		padByte := []byte{0}
+		padding := bytes.Repeat(padByte, missingBytes)
+		share = append(share, padding...)
+		return share
+	}
+	return share
+}
diff --git a/types/shares.go b/types/shares.go
@@ -1,7 +1,6 @@
 package types
 
 import (
-	"bytes"
 	"encoding/binary"
 
 	"github.com/lazyledger/nmt/namespace"
@@ -42,7 +41,6 @@ func (tx Tx) MarshalDelimited() ([]byte, error) {
 	lenBuf := make([]byte, binary.MaxVarintLen64)
 	length := uint64(len(tx))
 	n := binary.PutUvarint(lenBuf, length)
-
 	return append(lenBuf[:n], tx...), nil
 }
 
@@ -54,133 +52,3 @@ func (m Message) MarshalDelimited() ([]byte, error) {
 	n := binary.PutUvarint(lenBuf, length)
 	return append(lenBuf[:n], m.Data...), nil
 }
-
-// appendToShares appends raw data as shares.
-// Used for messages.
-func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare {
-	if len(rawData) <= MsgShareSize {
-		rawShare := append(append(
-			make([]byte, 0, len(nid)+len(rawData)),
-			nid...),
-			rawData...,
-		)
-		paddedShare := zeroPadIfNecessary(rawShare, ShareSize)
-		share := NamespacedShare{paddedShare, nid}
-		shares = append(shares, share)
-	} else { // len(rawData) > MsgShareSize
-		shares = append(shares, split(rawData, nid)...)
-	}
-	return shares
-}
-
-// splitContiguous splits multiple raw data contiguously as shares.
-// Used for transactions, intermediate state roots, and evidence.
-func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare {
-	shares := make([]NamespacedShare, 0)
-	// Index into the outer slice of rawDatas
-	outerIndex := 0
-	// Index into the inner slice of rawDatas
-	innerIndex := 0
-	for outerIndex < len(rawDatas) {
-		var rawData []byte
-		startIndex := 0
-		rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, TxShareSize)
-		rawShare := append(append(append(
-			make([]byte, 0, len(nid)+1+len(rawData)),
-			nid...),
-			byte(startIndex)),
-			rawData...)
-		paddedShare := zeroPadIfNecessary(rawShare, ShareSize)
-		share := NamespacedShare{paddedShare, nid}
-		shares = append(shares, share)
-	}
-	return shares
-}
-
-// TODO(ismail): implement corresponding merge method for clients requesting
-// shares for a particular namespace
-func split(rawData []byte, nid namespace.ID) []NamespacedShare {
-	shares := make([]NamespacedShare, 0)
-	firstRawShare := append(append(
-		make([]byte, 0, len(nid)+len(rawData[:MsgShareSize])),
-		nid...),
-		rawData[:MsgShareSize]...,
-	)
-	shares = append(shares, NamespacedShare{firstRawShare, nid})
-	rawData = rawData[MsgShareSize:]
-	for len(rawData) > 0 {
-		shareSizeOrLen := min(MsgShareSize, len(rawData))
-		rawShare := append(append(
-			make([]byte, 0, len(nid)+1+len(rawData[:shareSizeOrLen])),
-			nid...),
-			rawData[:shareSizeOrLen]...,
-		)
-		paddedShare := zeroPadIfNecessary(rawShare, ShareSize)
-		share := NamespacedShare{paddedShare, nid}
-		shares = append(shares, share)
-		rawData = rawData[shareSizeOrLen:]
-	}
-	return shares
-}
-
-// getNextChunk gets the next chunk for contiguous shares
-// Precondition: none of the slices in rawDatas is zero-length
-// This precondition should always hold at this point since zero-length txs are simply invalid.
-func getNextChunk(rawDatas [][]byte, outerIndex int, innerIndex int, width int) ([]byte, int, int, int) {
-	rawData := make([]byte, 0, width)
-	startIndex := 0
-	firstBytesToFetch := 0
-
-	curIndex := 0
-	for curIndex < width && outerIndex < len(rawDatas) {
-		bytesToFetch := min(len(rawDatas[outerIndex])-innerIndex, width-curIndex)
-		if bytesToFetch == 0 {
-			panic("zero-length contiguous share data is invalid")
-		}
-		if curIndex == 0 {
-			firstBytesToFetch = bytesToFetch
-		}
-		// If we've already placed some data in this chunk, that means
-		// a new data segment begins
-		if curIndex != 0 {
-			// Offset by the fixed reserved bytes at the beginning of the share
-			startIndex = firstBytesToFetch + NamespaceSize + ShareReservedBytes
-		}
-		rawData = append(rawData, rawDatas[outerIndex][innerIndex:innerIndex+bytesToFetch]...)
-		innerIndex += bytesToFetch
-		if innerIndex >= len(rawDatas[outerIndex]) {
-			innerIndex = 0
-			outerIndex++
-		}
-		curIndex += bytesToFetch
-	}
-
-	return rawData, outerIndex, innerIndex, startIndex
-}
-
-func GenerateTailPaddingShares(n int, shareWidth int) NamespacedShares {
-	shares := make([]NamespacedShare, n)
-	for i := 0; i < n; i++ {
-		shares[i] = NamespacedShare{bytes.Repeat([]byte{0}, shareWidth), TailPaddingNamespaceID}
-	}
-	return shares
-}
-
-func min(a, b int) int {
-	if a <= b {
-		return a
-	}
-	return b
-}
-
-func zeroPadIfNecessary(share []byte, width int) []byte {
-	oldLen := len(share)
-	if oldLen < width {
-		missingBytes := width - oldLen
-		padByte := []byte{0}
-		padding := bytes.Repeat(padByte, missingBytes)
-		share = append(share, padding...)
-		return share
-	}
-	return share
-}
diff --git a/types/shares_test.go b/types/shares_test.go
@@ -2,11 +2,18 @@ package types
 
 import (
 	"bytes"
+	"context"
+	"fmt"
+	"math"
+	"math/rand"
 	"reflect"
 	"testing"
+	"time"
 
+	tmbytes "github.com/lazyledger/lazyledger-core/libs/bytes"
 	"github.com/lazyledger/lazyledger-core/libs/protoio"
 	"github.com/lazyledger/nmt/namespace"
+	"github.com/lazyledger/rsmt2d"
 	"github.com/stretchr/testify/assert"
 )
 
@@ -26,7 +33,11 @@ func TestMakeShares(t *testing.T) {
 		VoteA: vote1,
 		VoteB: vote2,
 	}
-	testEvidenceBytes, err := protoio.MarshalDelimited(testEvidence.ToProto())
+	protoTestEvidence, err := EvidenceToProto(testEvidence)
+	if err != nil {
+		t.Error(err)
+	}
+	testEvidenceBytes, err := protoio.MarshalDelimited(protoTestEvidence)
 	largeTx := Tx(bytes.Repeat([]byte("large Tx"), 50))
 	largeTxLenDelimited, _ := largeTx.MarshalDelimited()
 	smolTx := Tx("small Tx")
@@ -194,14 +205,333 @@ func Test_appendToSharesOverwrite(t *testing.T) {
 	assert.Equal(t, extraCopy, []byte(newShare.Share[:MsgShareSize]))
 }
 
-func generateRandomNamespacedShares(count, leafSize int) []NamespacedShare {
-	shares := generateRandNamespacedRawData(count, NamespaceSize, leafSize)
-	nsShares := make(NamespacedShares, count)
+func TestDataFromSquare(t *testing.T) {
+	type test struct {
+		name     string
+		txCount  int
+		isrCount int
+		evdCount int
+		msgCount int
+		maxSize  int // max size of each tx or msg
+	}
+
+	tests := []test{
+		{"one of each random small size", 1, 1, 1, 1, 40},
+		{"one of each random large size", 1, 1, 1, 1, 400},
+		{"many of each random large size", 10, 10, 10, 10, 40},
+		{"many of each random large size", 10, 10, 10, 10, 400},
+		{"only transactions", 10, 0, 0, 0, 400},
+		{"only intermediate state roots", 0, 10, 0, 0, 400},
+		{"only evidence", 0, 0, 10, 0, 400},
+		{"only messages", 0, 0, 0, 10, 400},
+	}
+
+	for _, tc := range tests {
+		tc := tc
+
+		t.Run(tc.name, func(t *testing.T) {
+			// generate random data
+			data := generateRandomBlockData(
+				t,
+				tc.txCount,
+				tc.isrCount,
+				tc.evdCount,
+				tc.msgCount,
+				tc.maxSize,
+			)
+
+			shares, _ := data.ComputeShares()
+			rawShares := shares.RawShares()
+
+			eds, err := rsmt2d.ComputeExtendedDataSquare(rawShares, rsmt2d.RSGF8, rsmt2d.NewDefaultTree)
+			if err != nil {
+				t.Error(err)
+			}
+
+			res, err := DataFromSquare(eds)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			// we have to compare the evidence by string because the the
+			// timestamps differ not by actual time represented, but by
+			// internals see https://github.com/stretchr/testify/issues/666
+			for i := 0; i < len(data.Evidence.Evidence); i++ {
+				inputEvidence := data.Evidence.Evidence[i].(*DuplicateVoteEvidence)
+				resultEvidence := res.Evidence.Evidence[i].(*DuplicateVoteEvidence)
+				assert.Equal(t, inputEvidence.String(), resultEvidence.String())
+			}
+
+			// compare the original to the result w/o the evidence
+			data.Evidence = EvidenceData{}
+			res.Evidence = EvidenceData{}
+
+			assert.Equal(t, data, res)
+		})
+	}
+}
+
+func TestFuzz_DataFromSquare(t *testing.T) {
+	t.Skip()
+	// run random shares through processContiguousShares for a minute
+	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+	defer cancel()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		default:
+			TestDataFromSquare(t)
+		}
+	}
+}
+
+func Test_processContiguousShares(t *testing.T) {
+	// exactTxShareSize is the length of tx that will fit exactly into a single
+	// share, accounting for namespace id and the length delimiter prepended to
+	// each tx
+	const exactTxShareSize = TxShareSize - 1
+
+	type test struct {
+		name    string
+		txSize  int
+		txCount int
+	}
+
+	// each test is ran twice, once using txSize as an exact size, and again
+	// using it as a cap for randomly sized txs
+	tests := []test{
+		{"single small tx", 10, 1},
+		{"many small txs", 10, 10},
+		{"single big tx", 1000, 1},
+		{"many big txs", 1000, 10},
+		{"single exact size tx", exactTxShareSize, 1},
+		{"many exact size txs", exactTxShareSize, 10},
+	}
+
+	for _, tc := range tests {
+		tc := tc
+
+		// run the tests with identically sized txs
+		t.Run(fmt.Sprintf("%s idendically sized ", tc.name), func(t *testing.T) {
+			txs := generateRandomContiguousShares(tc.txCount, tc.txSize)
+
+			shares := txs.splitIntoShares()
+
+			parsedTxs, err := processContiguousShares(shares.RawShares())
+			if err != nil {
+				t.Error(err)
+			}
+
+			// check that the data parsed is identical
+			for i := 0; i < len(txs); i++ {
+				assert.Equal(t, []byte(txs[i]), parsedTxs[i])
+			}
+		})
+
+		// run the same tests using randomly sized txs with caps of tc.txSize
+		t.Run(fmt.Sprintf("%s randomly sized", tc.name), func(t *testing.T) {
+			txs := generateRandomlySizedContiguousShares(tc.txCount, tc.txSize)
+
+			shares := txs.splitIntoShares()
+
+			parsedTxs, err := processContiguousShares(shares.RawShares())
+			if err != nil {
+				t.Error(err)
+			}
+
+			// check that the data parsed is identical to the original
+			for i := 0; i < len(txs); i++ {
+				assert.Equal(t, []byte(txs[i]), parsedTxs[i])
+			}
+		})
+	}
+}
+
+func TestFuzz_processContiguousShares(t *testing.T) {
+	t.Skip()
+	// run random shares through processContiguousShares for a minute
+	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+	defer cancel()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		default:
+			Test_processContiguousShares(t)
+		}
+	}
+}
+
+func Test_parseMsgShares(t *testing.T) {
+	// exactMsgShareSize is the length of message that will fit exactly into a single
+	// share, accounting for namespace id and the length delimiter prepended to
+	// each message
+	const exactMsgShareSize = MsgShareSize - 2
+
+	type test struct {
+		name     string
+		msgSize  int
+		msgCount int
+	}
+
+	// each test is ran twice, once using msgSize as an exact size, and again
+	// using it as a cap for randomly sized leaves
+	tests := []test{
+		{"single small msg", 1, 1},
+		{"many small msgs", 4, 10},
+		{"single big msg", 1000, 1},
+		{"many big msgs", 1000, 10},
+		{"single exact size msg", exactMsgShareSize, 1},
+		{"many exact size msgs", exactMsgShareSize, 10},
+	}
+
+	for _, tc := range tests {
+		tc := tc
+
+		// run the tests with identically sized messagses
+		t.Run(fmt.Sprintf("%s idendically sized ", tc.name), func(t *testing.T) {
+			rawmsgs := make([]Message, tc.msgCount)
+			for i := 0; i < tc.msgCount; i++ {
+				rawmsgs[i] = generateRandomMessage(tc.msgSize)
+			}
+			msgs := Messages{MessagesList: rawmsgs}
+
+			shares := msgs.splitIntoShares()
+
+			parsedMsgs, err := parseMsgShares(shares.RawShares())
+			if err != nil {
+				t.Error(err)
+			}
+
+			// check that the namesapces and data are the same
+			for i := 0; i < len(msgs.MessagesList); i++ {
+				assert.Equal(t, msgs.MessagesList[i].NamespaceID, parsedMsgs[i].NamespaceID)
+				assert.Equal(t, msgs.MessagesList[i].Data, parsedMsgs[i].Data)
+			}
+		})
+
+		// run the same tests using randomly sized messages with caps of tc.msgSize
+		t.Run(fmt.Sprintf("%s randomly sized", tc.name), func(t *testing.T) {
+			msgs := generateRandomlySizedMessages(tc.msgCount, tc.msgSize)
+			shares := msgs.splitIntoShares()
+
+			parsedMsgs, err := parseMsgShares(shares.RawShares())
+			if err != nil {
+				t.Error(err)
+			}
+
+			// check that the namesapces and data are the same
+			for i := 0; i < len(msgs.MessagesList); i++ {
+				assert.Equal(t, msgs.MessagesList[i].NamespaceID, parsedMsgs[i].NamespaceID)
+				assert.Equal(t, msgs.MessagesList[i].Data, parsedMsgs[i].Data)
+			}
+		})
+	}
+}
+
+func Test_parseDelimiter(t *testing.T) {
+	for i := uint64(0); i < 100; i++ {
+		tx := generateRandomContiguousShares(1, int(i))[0]
+		input, err := tx.MarshalDelimited()
+		if err != nil {
+			panic(err)
+		}
+		res, txLen, err := parseDelimiter(input)
+		if err != nil {
+			panic(err)
+		}
+		assert.Equal(t, i, txLen)
+		assert.Equal(t, []byte(tx), res)
+	}
+}
+
+// ////////////////////////////
+// Test data generation
+// ////////////////////////////
+
+func generateRandomBlockData(t *testing.T, txCount, isrCount, evdCount, msgCount, maxSize int) Data {
+	var out Data
+	out.Txs = generateRandomlySizedContiguousShares(txCount, maxSize)
+	out.IntermediateStateRoots = generateRandomISR(isrCount)
+	out.Evidence = generateIdenticalEvidence(t, evdCount)
+	out.Messages = generateRandomlySizedMessages(msgCount, maxSize)
+	return out
+}
+
+func generateRandomlySizedContiguousShares(count, max int) Txs {
+	txs := make(Txs, count)
+	for i := 0; i < count; i++ {
+		size := rand.Intn(max)
+		if size == 0 {
+			size = 1
+		}
+		txs[i] = generateRandomContiguousShares(1, size)[0]
+	}
+	return txs
+}
+
+func generateRandomContiguousShares(count, size int) Txs {
+	txs := make(Txs, count)
+	for i := 0; i < count; i++ {
+		tx := make([]byte, size)
+		_, err := rand.Read(tx)
+		if err != nil {
+			panic(err)
+		}
+		txs[i] = Tx(tx)
+	}
+	return txs
+}
+
+func generateRandomISR(count int) IntermediateStateRoots {
+	roots := make([]tmbytes.HexBytes, count)
+	for i := 0; i < count; i++ {
+		roots[i] = tmbytes.HexBytes(generateRandomContiguousShares(1, 32)[0])
+	}
+	return IntermediateStateRoots{RawRootsList: roots}
+}
+
+func generateIdenticalEvidence(t *testing.T, count int) EvidenceData {
+	evidence := make([]Evidence, count)
+	for i := 0; i < count; i++ {
+		ev := NewMockDuplicateVoteEvidence(math.MaxInt64, time.Now(), "chainID")
+		evidence[i] = ev
+	}
+	return EvidenceData{Evidence: EvidenceList(evidence)}
+}
+
+func generateRandomlySizedMessages(count, maxMsgSize int) Messages {
+	msgs := make([]Message, count)
+	for i := 0; i < count; i++ {
+		msgs[i] = generateRandomMessage(rand.Intn(maxMsgSize))
+	}
+
+	// this is just to let us use assert.Equal
+	if count == 0 {
+		msgs = nil
+	}
+
+	return Messages{MessagesList: msgs}
+}
+
+func generateRandomMessage(size int) Message {
+	share := generateRandomNamespacedShares(1, size)[0]
+	msg := Message{
+		NamespaceID: share.NamespaceID(),
+		Data:        share.Data(),
+	}
+	return msg
+}
+
+func generateRandomNamespacedShares(count, msgSize int) NamespacedShares {
+	shares := generateRandNamespacedRawData(count, NamespaceSize, msgSize)
+	msgs := make([]Message, count)
 	for i, s := range shares {
-		nsShares[i] = NamespacedShare{
-			Share: s[NamespaceSize:],
-			ID:    s[:NamespaceSize],
+		msgs[i] = Message{
+			Data:        s[NamespaceSize:],
+			NamespaceID: s[:NamespaceSize],
 		}
 	}
-	return nsShares
+	return Messages{MessagesList: msgs}.splitIntoShares()
 }
diff --git a/types/tx.go b/types/tx.go
@@ -80,13 +80,13 @@ func (txs Txs) Proof(i int) TxProof {
 }
 
 func (txs Txs) splitIntoShares() NamespacedShares {
-	rawDatas := make([][]byte, 0, len(txs))
-	for _, tx := range txs {
+	rawDatas := make([][]byte, len(txs))
+	for i, tx := range txs {
 		rawData, err := tx.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("included Tx in mem-pool that can not be encoded %v", tx))
 		}
-		rawDatas = append(rawDatas, rawData)
+		rawDatas[i] = rawData
 	}
 	shares := splitContiguous(TxNamespaceID, rawDatas)
 	return shares