github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/executiondatasync/execution_data/downloader.go (about) 1 package execution_data 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "fmt" 8 9 "github.com/ipfs/go-cid" 10 "golang.org/x/sync/errgroup" 11 12 "github.com/onflow/flow-go/model/flow" 13 "github.com/onflow/flow-go/module" 14 "github.com/onflow/flow-go/module/blobs" 15 "github.com/onflow/flow-go/network" 16 ) 17 18 // Downloader is used to download execution data blobs from the network via a blob service. 19 type Downloader interface { 20 module.ReadyDoneAware 21 ExecutionDataGetter 22 } 23 24 var _ Downloader = (*downloader)(nil) 25 26 type downloader struct { 27 blobService network.BlobService 28 maxBlobSize int 29 serializer Serializer 30 } 31 32 type DownloaderOption func(*downloader) 33 34 // WithSerializer configures the serializer for the downloader 35 func WithSerializer(serializer Serializer) DownloaderOption { 36 return func(d *downloader) { 37 d.serializer = serializer 38 } 39 } 40 41 // NewDownloader creates a new Downloader instance 42 func NewDownloader(blobService network.BlobService, opts ...DownloaderOption) *downloader { 43 d := &downloader{ 44 blobService, 45 DefaultMaxBlobSize, 46 DefaultSerializer, 47 } 48 49 for _, opt := range opts { 50 opt(d) 51 } 52 53 return d 54 } 55 56 // Ready returns a channel that will be closed when the downloader is ready to be used 57 func (d *downloader) Ready() <-chan struct{} { 58 return d.blobService.Ready() 59 } 60 61 // Done returns a channel that will be closed when the downloader is finished shutting down 62 func (d *downloader) Done() <-chan struct{} { 63 return d.blobService.Done() 64 } 65 66 // Get downloads a blob tree identified by executionDataID from the network and returns the deserialized BlockExecutionData struct 67 // 68 // Expected errors during normal operations: 69 // - BlobNotFoundError if some CID in the blob tree could not be found from the blob service 70 // - MalformedDataError if some level of the blob tree cannot be properly deserialized 71 // - BlobSizeLimitExceededError if some blob in the blob tree exceeds the maximum allowed size 72 func (d *downloader) Get(ctx context.Context, executionDataID flow.Identifier) (*BlockExecutionData, error) { 73 blobGetter := d.blobService.GetSession(ctx) 74 75 // First, download the root execution data record which contains a list of chunk execution data 76 // blobs included in the original record. 77 edRoot, err := d.getExecutionDataRoot(ctx, executionDataID, blobGetter) 78 if err != nil { 79 return nil, fmt.Errorf("failed to get execution data root: %w", err) 80 } 81 82 g, gCtx := errgroup.WithContext(ctx) 83 84 // Next, download each of the chunk execution data blobs 85 chunkExecutionDatas := make([]*ChunkExecutionData, len(edRoot.ChunkExecutionDataIDs)) 86 for i, chunkDataID := range edRoot.ChunkExecutionDataIDs { 87 i := i 88 chunkDataID := chunkDataID 89 90 g.Go(func() error { 91 ced, err := d.getChunkExecutionData( 92 gCtx, 93 chunkDataID, 94 blobGetter, 95 ) 96 97 if err != nil { 98 return fmt.Errorf("failed to get chunk execution data at index %d: %w", i, err) 99 } 100 101 chunkExecutionDatas[i] = ced 102 103 return nil 104 }) 105 } 106 107 if err := g.Wait(); err != nil { 108 return nil, err 109 } 110 111 // Finally, recombine data into original record. 112 bed := &BlockExecutionData{ 113 BlockID: edRoot.BlockID, 114 ChunkExecutionDatas: chunkExecutionDatas, 115 } 116 117 return bed, nil 118 } 119 120 // getExecutionDataRoot downloads the root execution data record from the network and returns the 121 // deserialized flow.BlockExecutionDataRoot struct. 122 // 123 // Expected errors during normal operations: 124 // - BlobNotFoundError if the root blob could not be found from the blob service 125 // - MalformedDataError if the root blob cannot be properly deserialized 126 // - BlobSizeLimitExceededError if the root blob exceeds the maximum allowed size 127 func (d *downloader) getExecutionDataRoot( 128 ctx context.Context, 129 rootID flow.Identifier, 130 blobGetter network.BlobGetter, 131 ) (*flow.BlockExecutionDataRoot, error) { 132 rootCid := flow.IdToCid(rootID) 133 134 blob, err := blobGetter.GetBlob(ctx, rootCid) 135 if err != nil { 136 if errors.Is(err, network.ErrBlobNotFound) { 137 return nil, NewBlobNotFoundError(rootCid) 138 } 139 140 return nil, fmt.Errorf("failed to get root blob: %w", err) 141 } 142 143 blobSize := len(blob.RawData()) 144 145 if blobSize > d.maxBlobSize { 146 return nil, &BlobSizeLimitExceededError{blob.Cid()} 147 } 148 149 v, err := d.serializer.Deserialize(bytes.NewBuffer(blob.RawData())) 150 if err != nil { 151 return nil, NewMalformedDataError(err) 152 } 153 154 edRoot, ok := v.(*flow.BlockExecutionDataRoot) 155 if !ok { 156 return nil, NewMalformedDataError(fmt.Errorf("execution data root blob does not deserialize to a BlockExecutionDataRoot, got %T instead", v)) 157 } 158 159 return edRoot, nil 160 } 161 162 // getChunkExecutionData downloads a chunk execution data blob from the network and returns the 163 // deserialized ChunkExecutionData struct. 164 // 165 // Expected errors during normal operations: 166 // - context.Canceled or context.DeadlineExceeded if the context is canceled or times out 167 // - BlobNotFoundError if the root blob could not be found from the blob service 168 // - MalformedDataError if the root blob cannot be properly deserialized 169 // - BlobSizeLimitExceededError if the root blob exceeds the maximum allowed size 170 func (d *downloader) getChunkExecutionData( 171 ctx context.Context, 172 chunkExecutionDataID cid.Cid, 173 blobGetter network.BlobGetter, 174 ) (*ChunkExecutionData, error) { 175 cids := []cid.Cid{chunkExecutionDataID} 176 177 // iteratively process each level of the blob tree until a ChunkExecutionData is returned or an 178 // error is encountered 179 for i := 0; ; i++ { 180 v, err := d.getBlobs(ctx, blobGetter, cids) 181 if err != nil { 182 return nil, fmt.Errorf("failed to get level %d of blob tree: %w", i, err) 183 } 184 185 switch v := v.(type) { 186 case *ChunkExecutionData: 187 return v, nil 188 case *[]cid.Cid: 189 cids = *v 190 default: 191 return nil, NewMalformedDataError(fmt.Errorf("blob tree contains unexpected type %T at level %d", v, i)) 192 } 193 } 194 } 195 196 // getBlobs gets the given CIDs from the blobservice, reassembles the blobs, and deserializes the reassembled data into an object. 197 // 198 // Expected errors during normal operations: 199 // - context.Canceled or context.DeadlineExceeded if the context is canceled or times out 200 // - BlobNotFoundError if the root blob could not be found from the blob service 201 // - MalformedDataError if the root blob cannot be properly deserialized 202 // - BlobSizeLimitExceededError if the root blob exceeds the maximum allowed size 203 func (d *downloader) getBlobs(ctx context.Context, blobGetter network.BlobGetter, cids []cid.Cid) (interface{}, error) { 204 // this uses an optimization to deserialize the data in a streaming fashion as it is received 205 // from the network, reducing the amount of memory required to deserialize large objects. 206 blobCh, errCh := d.retrieveBlobs(ctx, blobGetter, cids) 207 bcr := blobs.NewBlobChannelReader(blobCh) 208 209 v, deserializeErr := d.serializer.Deserialize(bcr) 210 211 // blocks until all blobs have been retrieved or an error is encountered 212 err := <-errCh 213 214 if err != nil { 215 return nil, err 216 } 217 218 if deserializeErr != nil { 219 return nil, NewMalformedDataError(deserializeErr) 220 } 221 222 return v, nil 223 } 224 225 // retrieveBlobs asynchronously retrieves the blobs for the given CIDs with the given BlobGetter. 226 // Blobs corresponding to the requested CIDs are returned in order on the response channel. 227 // 228 // Expected errors during normal operations: 229 // - context.Canceled or context.DeadlineExceeded if the context is canceled or times out 230 // - BlobNotFoundError if the root blob could not be found from the blob service 231 // - MalformedDataError if the root blob cannot be properly deserialized 232 // - BlobSizeLimitExceededError if the root blob exceeds the maximum allowed size 233 func (d *downloader) retrieveBlobs(parent context.Context, blobGetter network.BlobGetter, cids []cid.Cid) (<-chan blobs.Blob, <-chan error) { 234 blobsOut := make(chan blobs.Blob, len(cids)) 235 errCh := make(chan error, 1) 236 237 go func() { 238 var err error 239 240 ctx, cancel := context.WithCancel(parent) 241 defer cancel() 242 defer close(blobsOut) 243 defer func() { 244 errCh <- err 245 close(errCh) 246 }() 247 248 blobChan := blobGetter.GetBlobs(ctx, cids) // initiate a batch request for the given CIDs 249 cachedBlobs := make(map[cid.Cid]blobs.Blob) 250 cidCounts := make(map[cid.Cid]int) // used to account for duplicate CIDs 251 252 // record the number of times each CID appears in the list. this is later used to determine 253 // when it's safe to delete cached blobs during processing 254 for _, c := range cids { 255 cidCounts[c]++ 256 } 257 258 // for each cid, find the corresponding blob from the incoming blob channel and send it to 259 // the outgoing blob channel in the proper order 260 for _, c := range cids { 261 blob, ok := cachedBlobs[c] 262 263 if !ok { 264 if blob, err = d.findBlob(blobChan, c, cachedBlobs); err != nil { 265 // the blob channel may be closed as a result of the context being canceled, 266 // in which case we should return the context error. 267 if ctxErr := ctx.Err(); ctxErr != nil { 268 err = ctxErr 269 } 270 271 return 272 } 273 } 274 275 // remove the blob from the cache if it's no longer needed 276 cidCounts[c]-- 277 278 if cidCounts[c] == 0 { 279 delete(cachedBlobs, c) 280 delete(cidCounts, c) 281 } 282 283 blobsOut <- blob 284 } 285 }() 286 287 return blobsOut, errCh 288 } 289 290 // findBlob retrieves blobs from the given channel, caching them along the way, until it either 291 // finds the target blob or exhausts the channel. 292 // 293 // This is necessary to ensure blobs can be reassembled in order from the underlying blobservice 294 // which provides no guarantees for blob order on the response channel. 295 // 296 // Expected errors during normal operations: 297 // - BlobNotFoundError if the root blob could not be found from the blob service 298 // - BlobSizeLimitExceededError if the root blob exceeds the maximum allowed size 299 func (d *downloader) findBlob( 300 blobChan <-chan blobs.Blob, 301 target cid.Cid, 302 cache map[cid.Cid]blobs.Blob, 303 ) (blobs.Blob, error) { 304 // pull blobs off the blob channel until the target blob is found or the channel is closed 305 // Note: blobs are returned on the blob channel as they are found, in no particular order 306 for blob := range blobChan { 307 // check blob size 308 blobSize := len(blob.RawData()) 309 310 if blobSize > d.maxBlobSize { 311 return nil, &BlobSizeLimitExceededError{blob.Cid()} 312 } 313 314 cache[blob.Cid()] = blob 315 316 if blob.Cid() == target { 317 return blob, nil 318 } 319 } 320 321 return nil, NewBlobNotFoundError(target) 322 }