github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/executiondatasync/execution_data/downloader.go (about)

     1  package execution_data
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  
     9  	"github.com/ipfs/go-cid"
    10  	"golang.org/x/sync/errgroup"
    11  
    12  	"github.com/onflow/flow-go/model/flow"
    13  	"github.com/onflow/flow-go/module"
    14  	"github.com/onflow/flow-go/module/blobs"
    15  	"github.com/onflow/flow-go/network"
    16  )
    17  
    18  // Downloader is used to download execution data blobs from the network via a blob service.
    19  type Downloader interface {
    20  	module.ReadyDoneAware
    21  	ExecutionDataGetter
    22  }
    23  
    24  var _ Downloader = (*downloader)(nil)
    25  
    26  type downloader struct {
    27  	blobService network.BlobService
    28  	maxBlobSize int
    29  	serializer  Serializer
    30  }
    31  
    32  type DownloaderOption func(*downloader)
    33  
    34  // WithSerializer configures the serializer for the downloader
    35  func WithSerializer(serializer Serializer) DownloaderOption {
    36  	return func(d *downloader) {
    37  		d.serializer = serializer
    38  	}
    39  }
    40  
    41  // NewDownloader creates a new Downloader instance
    42  func NewDownloader(blobService network.BlobService, opts ...DownloaderOption) *downloader {
    43  	d := &downloader{
    44  		blobService,
    45  		DefaultMaxBlobSize,
    46  		DefaultSerializer,
    47  	}
    48  
    49  	for _, opt := range opts {
    50  		opt(d)
    51  	}
    52  
    53  	return d
    54  }
    55  
    56  // Ready returns a channel that will be closed when the downloader is ready to be used
    57  func (d *downloader) Ready() <-chan struct{} {
    58  	return d.blobService.Ready()
    59  }
    60  
    61  // Done returns a channel that will be closed when the downloader is finished shutting down
    62  func (d *downloader) Done() <-chan struct{} {
    63  	return d.blobService.Done()
    64  }
    65  
    66  // Get downloads a blob tree identified by executionDataID from the network and returns the deserialized BlockExecutionData struct
    67  //
    68  // Expected errors during normal operations:
    69  // - BlobNotFoundError if some CID in the blob tree could not be found from the blob service
    70  // - MalformedDataError if some level of the blob tree cannot be properly deserialized
    71  // - BlobSizeLimitExceededError if some blob in the blob tree exceeds the maximum allowed size
    72  func (d *downloader) Get(ctx context.Context, executionDataID flow.Identifier) (*BlockExecutionData, error) {
    73  	blobGetter := d.blobService.GetSession(ctx)
    74  
    75  	// First, download the root execution data record which contains a list of chunk execution data
    76  	// blobs included in the original record.
    77  	edRoot, err := d.getExecutionDataRoot(ctx, executionDataID, blobGetter)
    78  	if err != nil {
    79  		return nil, fmt.Errorf("failed to get execution data root: %w", err)
    80  	}
    81  
    82  	g, gCtx := errgroup.WithContext(ctx)
    83  
    84  	// Next, download each of the chunk execution data blobs
    85  	chunkExecutionDatas := make([]*ChunkExecutionData, len(edRoot.ChunkExecutionDataIDs))
    86  	for i, chunkDataID := range edRoot.ChunkExecutionDataIDs {
    87  		i := i
    88  		chunkDataID := chunkDataID
    89  
    90  		g.Go(func() error {
    91  			ced, err := d.getChunkExecutionData(
    92  				gCtx,
    93  				chunkDataID,
    94  				blobGetter,
    95  			)
    96  
    97  			if err != nil {
    98  				return fmt.Errorf("failed to get chunk execution data at index %d: %w", i, err)
    99  			}
   100  
   101  			chunkExecutionDatas[i] = ced
   102  
   103  			return nil
   104  		})
   105  	}
   106  
   107  	if err := g.Wait(); err != nil {
   108  		return nil, err
   109  	}
   110  
   111  	// Finally, recombine data into original record.
   112  	bed := &BlockExecutionData{
   113  		BlockID:             edRoot.BlockID,
   114  		ChunkExecutionDatas: chunkExecutionDatas,
   115  	}
   116  
   117  	return bed, nil
   118  }
   119  
   120  // getExecutionDataRoot downloads the root execution data record from the network and returns the
   121  // deserialized flow.BlockExecutionDataRoot struct.
   122  //
   123  // Expected errors during normal operations:
   124  // - BlobNotFoundError if the root blob could not be found from the blob service
   125  // - MalformedDataError if the root blob cannot be properly deserialized
   126  // - BlobSizeLimitExceededError if the root blob exceeds the maximum allowed size
   127  func (d *downloader) getExecutionDataRoot(
   128  	ctx context.Context,
   129  	rootID flow.Identifier,
   130  	blobGetter network.BlobGetter,
   131  ) (*flow.BlockExecutionDataRoot, error) {
   132  	rootCid := flow.IdToCid(rootID)
   133  
   134  	blob, err := blobGetter.GetBlob(ctx, rootCid)
   135  	if err != nil {
   136  		if errors.Is(err, network.ErrBlobNotFound) {
   137  			return nil, NewBlobNotFoundError(rootCid)
   138  		}
   139  
   140  		return nil, fmt.Errorf("failed to get root blob: %w", err)
   141  	}
   142  
   143  	blobSize := len(blob.RawData())
   144  
   145  	if blobSize > d.maxBlobSize {
   146  		return nil, &BlobSizeLimitExceededError{blob.Cid()}
   147  	}
   148  
   149  	v, err := d.serializer.Deserialize(bytes.NewBuffer(blob.RawData()))
   150  	if err != nil {
   151  		return nil, NewMalformedDataError(err)
   152  	}
   153  
   154  	edRoot, ok := v.(*flow.BlockExecutionDataRoot)
   155  	if !ok {
   156  		return nil, NewMalformedDataError(fmt.Errorf("execution data root blob does not deserialize to a BlockExecutionDataRoot, got %T instead", v))
   157  	}
   158  
   159  	return edRoot, nil
   160  }
   161  
   162  // getChunkExecutionData downloads a chunk execution data blob from the network and returns the
   163  // deserialized ChunkExecutionData struct.
   164  //
   165  // Expected errors during normal operations:
   166  // - context.Canceled or context.DeadlineExceeded if the context is canceled or times out
   167  // - BlobNotFoundError if the root blob could not be found from the blob service
   168  // - MalformedDataError if the root blob cannot be properly deserialized
   169  // - BlobSizeLimitExceededError if the root blob exceeds the maximum allowed size
   170  func (d *downloader) getChunkExecutionData(
   171  	ctx context.Context,
   172  	chunkExecutionDataID cid.Cid,
   173  	blobGetter network.BlobGetter,
   174  ) (*ChunkExecutionData, error) {
   175  	cids := []cid.Cid{chunkExecutionDataID}
   176  
   177  	// iteratively process each level of the blob tree until a ChunkExecutionData is returned or an
   178  	// error is encountered
   179  	for i := 0; ; i++ {
   180  		v, err := d.getBlobs(ctx, blobGetter, cids)
   181  		if err != nil {
   182  			return nil, fmt.Errorf("failed to get level %d of blob tree: %w", i, err)
   183  		}
   184  
   185  		switch v := v.(type) {
   186  		case *ChunkExecutionData:
   187  			return v, nil
   188  		case *[]cid.Cid:
   189  			cids = *v
   190  		default:
   191  			return nil, NewMalformedDataError(fmt.Errorf("blob tree contains unexpected type %T at level %d", v, i))
   192  		}
   193  	}
   194  }
   195  
   196  // getBlobs gets the given CIDs from the blobservice, reassembles the blobs, and deserializes the reassembled data into an object.
   197  //
   198  // Expected errors during normal operations:
   199  // - context.Canceled or context.DeadlineExceeded if the context is canceled or times out
   200  // - BlobNotFoundError if the root blob could not be found from the blob service
   201  // - MalformedDataError if the root blob cannot be properly deserialized
   202  // - BlobSizeLimitExceededError if the root blob exceeds the maximum allowed size
   203  func (d *downloader) getBlobs(ctx context.Context, blobGetter network.BlobGetter, cids []cid.Cid) (interface{}, error) {
   204  	// this uses an optimization to deserialize the data in a streaming fashion as it is received
   205  	// from the network, reducing the amount of memory required to deserialize large objects.
   206  	blobCh, errCh := d.retrieveBlobs(ctx, blobGetter, cids)
   207  	bcr := blobs.NewBlobChannelReader(blobCh)
   208  
   209  	v, deserializeErr := d.serializer.Deserialize(bcr)
   210  
   211  	// blocks until all blobs have been retrieved or an error is encountered
   212  	err := <-errCh
   213  
   214  	if err != nil {
   215  		return nil, err
   216  	}
   217  
   218  	if deserializeErr != nil {
   219  		return nil, NewMalformedDataError(deserializeErr)
   220  	}
   221  
   222  	return v, nil
   223  }
   224  
   225  // retrieveBlobs asynchronously retrieves the blobs for the given CIDs with the given BlobGetter.
   226  // Blobs corresponding to the requested CIDs are returned in order on the response channel.
   227  //
   228  // Expected errors during normal operations:
   229  // - context.Canceled or context.DeadlineExceeded if the context is canceled or times out
   230  // - BlobNotFoundError if the root blob could not be found from the blob service
   231  // - MalformedDataError if the root blob cannot be properly deserialized
   232  // - BlobSizeLimitExceededError if the root blob exceeds the maximum allowed size
   233  func (d *downloader) retrieveBlobs(parent context.Context, blobGetter network.BlobGetter, cids []cid.Cid) (<-chan blobs.Blob, <-chan error) {
   234  	blobsOut := make(chan blobs.Blob, len(cids))
   235  	errCh := make(chan error, 1)
   236  
   237  	go func() {
   238  		var err error
   239  
   240  		ctx, cancel := context.WithCancel(parent)
   241  		defer cancel()
   242  		defer close(blobsOut)
   243  		defer func() {
   244  			errCh <- err
   245  			close(errCh)
   246  		}()
   247  
   248  		blobChan := blobGetter.GetBlobs(ctx, cids) // initiate a batch request for the given CIDs
   249  		cachedBlobs := make(map[cid.Cid]blobs.Blob)
   250  		cidCounts := make(map[cid.Cid]int) // used to account for duplicate CIDs
   251  
   252  		// record the number of times each CID appears in the list. this is later used to determine
   253  		// when it's safe to delete cached blobs during processing
   254  		for _, c := range cids {
   255  			cidCounts[c]++
   256  		}
   257  
   258  		// for each cid, find the corresponding blob from the incoming blob channel and send it to
   259  		// the outgoing blob channel in the proper order
   260  		for _, c := range cids {
   261  			blob, ok := cachedBlobs[c]
   262  
   263  			if !ok {
   264  				if blob, err = d.findBlob(blobChan, c, cachedBlobs); err != nil {
   265  					// the blob channel may be closed as a result of the context being canceled,
   266  					// in which case we should return the context error.
   267  					if ctxErr := ctx.Err(); ctxErr != nil {
   268  						err = ctxErr
   269  					}
   270  
   271  					return
   272  				}
   273  			}
   274  
   275  			// remove the blob from the cache if it's no longer needed
   276  			cidCounts[c]--
   277  
   278  			if cidCounts[c] == 0 {
   279  				delete(cachedBlobs, c)
   280  				delete(cidCounts, c)
   281  			}
   282  
   283  			blobsOut <- blob
   284  		}
   285  	}()
   286  
   287  	return blobsOut, errCh
   288  }
   289  
   290  // findBlob retrieves blobs from the given channel, caching them along the way, until it either
   291  // finds the target blob or exhausts the channel.
   292  //
   293  // This is necessary to ensure blobs can be reassembled in order from the underlying blobservice
   294  // which provides no guarantees for blob order on the response channel.
   295  //
   296  // Expected errors during normal operations:
   297  // - BlobNotFoundError if the root blob could not be found from the blob service
   298  // - BlobSizeLimitExceededError if the root blob exceeds the maximum allowed size
   299  func (d *downloader) findBlob(
   300  	blobChan <-chan blobs.Blob,
   301  	target cid.Cid,
   302  	cache map[cid.Cid]blobs.Blob,
   303  ) (blobs.Blob, error) {
   304  	// pull blobs off the blob channel until the target blob is found or the channel is closed
   305  	// Note: blobs are returned on the blob channel as they are found, in no particular order
   306  	for blob := range blobChan {
   307  		// check blob size
   308  		blobSize := len(blob.RawData())
   309  
   310  		if blobSize > d.maxBlobSize {
   311  			return nil, &BlobSizeLimitExceededError{blob.Cid()}
   312  		}
   313  
   314  		cache[blob.Cid()] = blob
   315  
   316  		if blob.Cid() == target {
   317  			return blob, nil
   318  		}
   319  	}
   320  
   321  	return nil, NewBlobNotFoundError(target)
   322  }