github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/executiondatasync/execution_data/store.go (about)

     1  package execution_data
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  
     9  	"github.com/ipfs/go-cid"
    10  
    11  	"github.com/onflow/flow-go/model/flow"
    12  	"github.com/onflow/flow-go/module/blobs"
    13  )
    14  
    15  // ExecutionDataGetter handles getting execution data from a blobstore
    16  type ExecutionDataGetter interface {
    17  	// Get gets the BlockExecutionData for the given root ID from the blobstore.
    18  	// Expected errors during normal operations:
    19  	// - BlobNotFoundError if some CID in the blob tree could not be found from the blobstore
    20  	// - MalformedDataError if some level of the blob tree cannot be properly deserialized
    21  	// - BlobSizeLimitExceededError if some blob in the blob tree exceeds the maximum allowed size
    22  	Get(ctx context.Context, rootID flow.Identifier) (*BlockExecutionData, error)
    23  }
    24  
    25  // ExecutionDataStore handles adding / getting execution data to / from a blobstore
    26  type ExecutionDataStore interface {
    27  	ExecutionDataGetter
    28  
    29  	// Add constructs a blob tree for the given BlockExecutionData, adds it to the blobstore,
    30  	// then returns the root CID.
    31  	// No errors are expected during normal operation.
    32  	Add(ctx context.Context, executionData *BlockExecutionData) (flow.Identifier, error)
    33  }
    34  
    35  type ExecutionDataStoreOption func(*store)
    36  
    37  // WithMaxBlobSize configures the maximum blob size of the store
    38  func WithMaxBlobSize(size int) ExecutionDataStoreOption {
    39  	return func(s *store) {
    40  		s.maxBlobSize = size
    41  	}
    42  }
    43  
    44  var _ ExecutionDataStore = (*store)(nil)
    45  
    46  type store struct {
    47  	blobstore   blobs.Blobstore
    48  	serializer  Serializer
    49  	maxBlobSize int
    50  }
    51  
    52  // NewExecutionDataStore creates a new Execution Data Store.
    53  func NewExecutionDataStore(blobstore blobs.Blobstore, serializer Serializer, opts ...ExecutionDataStoreOption) *store {
    54  	s := &store{
    55  		blobstore:   blobstore,
    56  		serializer:  serializer,
    57  		maxBlobSize: DefaultMaxBlobSize,
    58  	}
    59  
    60  	for _, opt := range opts {
    61  		opt(s)
    62  	}
    63  
    64  	return s
    65  }
    66  
    67  // Add constructs a blob tree for the given BlockExecutionData, adds it to the blobstore,
    68  // then returns the rootID.
    69  // No errors are expected during normal operation.
    70  func (s *store) Add(ctx context.Context, executionData *BlockExecutionData) (flow.Identifier, error) {
    71  	executionDataRoot := &flow.BlockExecutionDataRoot{
    72  		BlockID:               executionData.BlockID,
    73  		ChunkExecutionDataIDs: make([]cid.Cid, len(executionData.ChunkExecutionDatas)),
    74  	}
    75  
    76  	for i, chunkExecutionData := range executionData.ChunkExecutionDatas {
    77  		chunkExecutionDataID, err := s.addChunkExecutionData(ctx, chunkExecutionData)
    78  		if err != nil {
    79  			return flow.ZeroID, fmt.Errorf("could not add chunk execution data at index %d: %w", i, err)
    80  		}
    81  
    82  		executionDataRoot.ChunkExecutionDataIDs[i] = chunkExecutionDataID
    83  	}
    84  
    85  	buf := new(bytes.Buffer)
    86  	if err := s.serializer.Serialize(buf, executionDataRoot); err != nil {
    87  		return flow.ZeroID, fmt.Errorf("could not serialize execution data root: %w", err)
    88  	}
    89  
    90  	// this should never happen unless either:
    91  	// - maxBlobSize is set too low
    92  	// - an enormous number of chunks are included in the block
    93  	//   e.g. given a 1MB max size, 32 byte CID and 32 byte blockID:
    94  	//   1MB/32byes - 1 = 32767 chunk CIDs
    95  	// if the number of chunks in a block ever exceeds this, we will need to update the root blob
    96  	// generation to support splitting it up into a tree similar to addChunkExecutionData
    97  	if buf.Len() > s.maxBlobSize {
    98  		return flow.ZeroID, errors.New("root blob exceeds blob size limit")
    99  	}
   100  
   101  	rootBlob := blobs.NewBlob(buf.Bytes())
   102  	if err := s.blobstore.Put(ctx, rootBlob); err != nil {
   103  		return flow.ZeroID, fmt.Errorf("could not add execution data root: %w", err)
   104  	}
   105  
   106  	rootID, err := flow.CidToId(rootBlob.Cid())
   107  	if err != nil {
   108  		return flow.ZeroID, fmt.Errorf("could not get root ID: %w", err)
   109  	}
   110  
   111  	return rootID, nil
   112  }
   113  
   114  // addChunkExecutionData constructs a blob tree for the given ChunkExecutionData, adds it to the
   115  // blobstore, and returns the root CID.
   116  // No errors are expected during normal operation.
   117  func (s *store) addChunkExecutionData(ctx context.Context, chunkExecutionData *ChunkExecutionData) (cid.Cid, error) {
   118  	var v interface{} = chunkExecutionData
   119  
   120  	// given an arbitrarily large v, split it into blobs of size up to maxBlobSize, adding them to
   121  	// the blobstore. Then, combine the list of CIDs added into a second level of blobs, and repeat.
   122  	// This produces a tree of blobs, where the leaves are the actual data, and each internal node
   123  	// contains a list of CIDs for its children.
   124  	for i := 0; ; i++ {
   125  		// chunk and store the data, then get the list of CIDs added
   126  		cids, err := s.addBlobs(ctx, v)
   127  		if err != nil {
   128  			return cid.Undef, fmt.Errorf("failed to add blob tree level at height %d: %w", i, err)
   129  		}
   130  
   131  		// once a single CID is left, we have reached the root of the tree
   132  		if len(cids) == 1 {
   133  			return cids[0], nil
   134  		}
   135  
   136  		// the next level is the list of CIDs added in this level
   137  		v = cids
   138  	}
   139  }
   140  
   141  // addBlobs splits the given value into blobs of size up to maxBlobSize, adds them to the blobstore,
   142  // then returns the CIDs for each blob added.
   143  // No errors are expected during normal operation.
   144  func (s *store) addBlobs(ctx context.Context, v interface{}) ([]cid.Cid, error) {
   145  	// first, serialize the data into a large byte slice
   146  	buf := new(bytes.Buffer)
   147  	if err := s.serializer.Serialize(buf, v); err != nil {
   148  		return nil, fmt.Errorf("could not serialize execution data root: %w", err)
   149  	}
   150  
   151  	data := buf.Bytes()
   152  	var cids []cid.Cid
   153  	var blbs []blobs.Blob
   154  
   155  	// next, chunk the data into blobs of size up to maxBlobSize
   156  	for len(data) > 0 {
   157  		blobLen := s.maxBlobSize
   158  		if len(data) < blobLen {
   159  			blobLen = len(data)
   160  		}
   161  
   162  		blob := blobs.NewBlob(data[:blobLen])
   163  		data = data[blobLen:]
   164  		blbs = append(blbs, blob)
   165  		cids = append(cids, blob.Cid())
   166  	}
   167  
   168  	// finally, add the blobs to the blobstore and return the list of CIDs
   169  	if err := s.blobstore.PutMany(ctx, blbs); err != nil {
   170  		return nil, fmt.Errorf("could not add blobs: %w", err)
   171  	}
   172  
   173  	return cids, nil
   174  }
   175  
   176  // Get gets the BlockExecutionData for the given root ID from the blobstore.
   177  // Expected errors during normal operations:
   178  // - BlobNotFoundError if some CID in the blob tree could not be found from the blobstore
   179  // - MalformedDataError if some level of the blob tree cannot be properly deserialized
   180  func (s *store) Get(ctx context.Context, rootID flow.Identifier) (*BlockExecutionData, error) {
   181  	rootCid := flow.IdToCid(rootID)
   182  
   183  	// first, get the root blob. it will contain a list of blobs, one for each chunk
   184  	rootBlob, err := s.blobstore.Get(ctx, rootCid)
   185  	if err != nil {
   186  		if errors.Is(err, blobs.ErrNotFound) {
   187  			return nil, NewBlobNotFoundError(rootCid)
   188  		}
   189  
   190  		return nil, fmt.Errorf("failed to get root blob: %w", err)
   191  	}
   192  
   193  	rootData, err := s.serializer.Deserialize(bytes.NewBuffer(rootBlob.RawData()))
   194  	if err != nil {
   195  		return nil, NewMalformedDataError(err)
   196  	}
   197  
   198  	executionDataRoot, ok := rootData.(*flow.BlockExecutionDataRoot)
   199  	if !ok {
   200  		return nil, NewMalformedDataError(fmt.Errorf("root blob does not deserialize to a BlockExecutionDataRoot, got %T instead", rootData))
   201  	}
   202  
   203  	// next, get each chunk blob and deserialize it
   204  	blockExecutionData := &BlockExecutionData{
   205  		BlockID:             executionDataRoot.BlockID,
   206  		ChunkExecutionDatas: make([]*ChunkExecutionData, len(executionDataRoot.ChunkExecutionDataIDs)),
   207  	}
   208  
   209  	for i, chunkExecutionDataID := range executionDataRoot.ChunkExecutionDataIDs {
   210  		chunkExecutionData, err := s.getChunkExecutionData(ctx, chunkExecutionDataID)
   211  		if err != nil {
   212  			return nil, fmt.Errorf("could not get chunk execution data at index %d: %w", i, err)
   213  		}
   214  
   215  		blockExecutionData.ChunkExecutionDatas[i] = chunkExecutionData
   216  	}
   217  
   218  	return blockExecutionData, nil
   219  }
   220  
   221  // getChunkExecutionData gets the ChunkExecutionData for the given CID from the blobstore.
   222  // Expected errors during normal operations:
   223  // - BlobNotFoundError if some CID in the blob tree could not be found from the blobstore
   224  // - MalformedDataError if some level of the blob tree cannot be properly deserialized
   225  func (s *store) getChunkExecutionData(ctx context.Context, chunkExecutionDataID cid.Cid) (*ChunkExecutionData, error) {
   226  	cids := []cid.Cid{chunkExecutionDataID}
   227  
   228  	// given a root CID, get the blob tree level by level, until we reach the full ChunkExecutionData
   229  	for i := 0; ; i++ {
   230  		v, err := s.getBlobs(ctx, cids)
   231  		if err != nil {
   232  			return nil, fmt.Errorf("failed to get blob tree level at depth %d: %w", i, err)
   233  		}
   234  
   235  		switch v := v.(type) {
   236  		case *ChunkExecutionData:
   237  			return v, nil
   238  		case *[]cid.Cid:
   239  			cids = *v
   240  		default:
   241  			return nil, NewMalformedDataError(fmt.Errorf("blob tree contains unexpected type %T at level %d", v, i))
   242  		}
   243  	}
   244  }
   245  
   246  // getBlobs gets the blobs for the given CIDs from the blobstore, deserializes them, and returns
   247  // the deserialized value.
   248  // - BlobNotFoundError if any of the CIDs could not be found from the blobstore
   249  // - MalformedDataError if any of the blobs cannot be properly deserialized
   250  func (s *store) getBlobs(ctx context.Context, cids []cid.Cid) (interface{}, error) {
   251  	buf := new(bytes.Buffer)
   252  
   253  	// get each blob and append the raw data to the buffer
   254  	for _, cid := range cids {
   255  		blob, err := s.blobstore.Get(ctx, cid)
   256  		if err != nil {
   257  			if errors.Is(err, blobs.ErrNotFound) {
   258  				return nil, NewBlobNotFoundError(cid)
   259  			}
   260  
   261  			return nil, fmt.Errorf("failed to get blob: %w", err)
   262  		}
   263  
   264  		_, err = buf.Write(blob.RawData())
   265  		if err != nil {
   266  			return nil, fmt.Errorf("failed to write blob %s to deserialization buffer: %w", cid.String(), err)
   267  		}
   268  	}
   269  
   270  	// deserialize the buffer into a value, and return it
   271  	v, err := s.serializer.Deserialize(buf)
   272  	if err != nil {
   273  		return nil, NewMalformedDataError(err)
   274  	}
   275  
   276  	return v, nil
   277  }