github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/executiondatasync/execution_data/store.go (about) 1 package execution_data 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "fmt" 8 9 "github.com/ipfs/go-cid" 10 11 "github.com/onflow/flow-go/model/flow" 12 "github.com/onflow/flow-go/module/blobs" 13 ) 14 15 // ExecutionDataGetter handles getting execution data from a blobstore 16 type ExecutionDataGetter interface { 17 // Get gets the BlockExecutionData for the given root ID from the blobstore. 18 // Expected errors during normal operations: 19 // - BlobNotFoundError if some CID in the blob tree could not be found from the blobstore 20 // - MalformedDataError if some level of the blob tree cannot be properly deserialized 21 // - BlobSizeLimitExceededError if some blob in the blob tree exceeds the maximum allowed size 22 Get(ctx context.Context, rootID flow.Identifier) (*BlockExecutionData, error) 23 } 24 25 // ExecutionDataStore handles adding / getting execution data to / from a blobstore 26 type ExecutionDataStore interface { 27 ExecutionDataGetter 28 29 // Add constructs a blob tree for the given BlockExecutionData, adds it to the blobstore, 30 // then returns the root CID. 31 // No errors are expected during normal operation. 32 Add(ctx context.Context, executionData *BlockExecutionData) (flow.Identifier, error) 33 } 34 35 type ExecutionDataStoreOption func(*store) 36 37 // WithMaxBlobSize configures the maximum blob size of the store 38 func WithMaxBlobSize(size int) ExecutionDataStoreOption { 39 return func(s *store) { 40 s.maxBlobSize = size 41 } 42 } 43 44 var _ ExecutionDataStore = (*store)(nil) 45 46 type store struct { 47 blobstore blobs.Blobstore 48 serializer Serializer 49 maxBlobSize int 50 } 51 52 // NewExecutionDataStore creates a new Execution Data Store. 53 func NewExecutionDataStore(blobstore blobs.Blobstore, serializer Serializer, opts ...ExecutionDataStoreOption) *store { 54 s := &store{ 55 blobstore: blobstore, 56 serializer: serializer, 57 maxBlobSize: DefaultMaxBlobSize, 58 } 59 60 for _, opt := range opts { 61 opt(s) 62 } 63 64 return s 65 } 66 67 // Add constructs a blob tree for the given BlockExecutionData, adds it to the blobstore, 68 // then returns the rootID. 69 // No errors are expected during normal operation. 70 func (s *store) Add(ctx context.Context, executionData *BlockExecutionData) (flow.Identifier, error) { 71 executionDataRoot := &flow.BlockExecutionDataRoot{ 72 BlockID: executionData.BlockID, 73 ChunkExecutionDataIDs: make([]cid.Cid, len(executionData.ChunkExecutionDatas)), 74 } 75 76 for i, chunkExecutionData := range executionData.ChunkExecutionDatas { 77 chunkExecutionDataID, err := s.addChunkExecutionData(ctx, chunkExecutionData) 78 if err != nil { 79 return flow.ZeroID, fmt.Errorf("could not add chunk execution data at index %d: %w", i, err) 80 } 81 82 executionDataRoot.ChunkExecutionDataIDs[i] = chunkExecutionDataID 83 } 84 85 buf := new(bytes.Buffer) 86 if err := s.serializer.Serialize(buf, executionDataRoot); err != nil { 87 return flow.ZeroID, fmt.Errorf("could not serialize execution data root: %w", err) 88 } 89 90 // this should never happen unless either: 91 // - maxBlobSize is set too low 92 // - an enormous number of chunks are included in the block 93 // e.g. given a 1MB max size, 32 byte CID and 32 byte blockID: 94 // 1MB/32byes - 1 = 32767 chunk CIDs 95 // if the number of chunks in a block ever exceeds this, we will need to update the root blob 96 // generation to support splitting it up into a tree similar to addChunkExecutionData 97 if buf.Len() > s.maxBlobSize { 98 return flow.ZeroID, errors.New("root blob exceeds blob size limit") 99 } 100 101 rootBlob := blobs.NewBlob(buf.Bytes()) 102 if err := s.blobstore.Put(ctx, rootBlob); err != nil { 103 return flow.ZeroID, fmt.Errorf("could not add execution data root: %w", err) 104 } 105 106 rootID, err := flow.CidToId(rootBlob.Cid()) 107 if err != nil { 108 return flow.ZeroID, fmt.Errorf("could not get root ID: %w", err) 109 } 110 111 return rootID, nil 112 } 113 114 // addChunkExecutionData constructs a blob tree for the given ChunkExecutionData, adds it to the 115 // blobstore, and returns the root CID. 116 // No errors are expected during normal operation. 117 func (s *store) addChunkExecutionData(ctx context.Context, chunkExecutionData *ChunkExecutionData) (cid.Cid, error) { 118 var v interface{} = chunkExecutionData 119 120 // given an arbitrarily large v, split it into blobs of size up to maxBlobSize, adding them to 121 // the blobstore. Then, combine the list of CIDs added into a second level of blobs, and repeat. 122 // This produces a tree of blobs, where the leaves are the actual data, and each internal node 123 // contains a list of CIDs for its children. 124 for i := 0; ; i++ { 125 // chunk and store the data, then get the list of CIDs added 126 cids, err := s.addBlobs(ctx, v) 127 if err != nil { 128 return cid.Undef, fmt.Errorf("failed to add blob tree level at height %d: %w", i, err) 129 } 130 131 // once a single CID is left, we have reached the root of the tree 132 if len(cids) == 1 { 133 return cids[0], nil 134 } 135 136 // the next level is the list of CIDs added in this level 137 v = cids 138 } 139 } 140 141 // addBlobs splits the given value into blobs of size up to maxBlobSize, adds them to the blobstore, 142 // then returns the CIDs for each blob added. 143 // No errors are expected during normal operation. 144 func (s *store) addBlobs(ctx context.Context, v interface{}) ([]cid.Cid, error) { 145 // first, serialize the data into a large byte slice 146 buf := new(bytes.Buffer) 147 if err := s.serializer.Serialize(buf, v); err != nil { 148 return nil, fmt.Errorf("could not serialize execution data root: %w", err) 149 } 150 151 data := buf.Bytes() 152 var cids []cid.Cid 153 var blbs []blobs.Blob 154 155 // next, chunk the data into blobs of size up to maxBlobSize 156 for len(data) > 0 { 157 blobLen := s.maxBlobSize 158 if len(data) < blobLen { 159 blobLen = len(data) 160 } 161 162 blob := blobs.NewBlob(data[:blobLen]) 163 data = data[blobLen:] 164 blbs = append(blbs, blob) 165 cids = append(cids, blob.Cid()) 166 } 167 168 // finally, add the blobs to the blobstore and return the list of CIDs 169 if err := s.blobstore.PutMany(ctx, blbs); err != nil { 170 return nil, fmt.Errorf("could not add blobs: %w", err) 171 } 172 173 return cids, nil 174 } 175 176 // Get gets the BlockExecutionData for the given root ID from the blobstore. 177 // Expected errors during normal operations: 178 // - BlobNotFoundError if some CID in the blob tree could not be found from the blobstore 179 // - MalformedDataError if some level of the blob tree cannot be properly deserialized 180 func (s *store) Get(ctx context.Context, rootID flow.Identifier) (*BlockExecutionData, error) { 181 rootCid := flow.IdToCid(rootID) 182 183 // first, get the root blob. it will contain a list of blobs, one for each chunk 184 rootBlob, err := s.blobstore.Get(ctx, rootCid) 185 if err != nil { 186 if errors.Is(err, blobs.ErrNotFound) { 187 return nil, NewBlobNotFoundError(rootCid) 188 } 189 190 return nil, fmt.Errorf("failed to get root blob: %w", err) 191 } 192 193 rootData, err := s.serializer.Deserialize(bytes.NewBuffer(rootBlob.RawData())) 194 if err != nil { 195 return nil, NewMalformedDataError(err) 196 } 197 198 executionDataRoot, ok := rootData.(*flow.BlockExecutionDataRoot) 199 if !ok { 200 return nil, NewMalformedDataError(fmt.Errorf("root blob does not deserialize to a BlockExecutionDataRoot, got %T instead", rootData)) 201 } 202 203 // next, get each chunk blob and deserialize it 204 blockExecutionData := &BlockExecutionData{ 205 BlockID: executionDataRoot.BlockID, 206 ChunkExecutionDatas: make([]*ChunkExecutionData, len(executionDataRoot.ChunkExecutionDataIDs)), 207 } 208 209 for i, chunkExecutionDataID := range executionDataRoot.ChunkExecutionDataIDs { 210 chunkExecutionData, err := s.getChunkExecutionData(ctx, chunkExecutionDataID) 211 if err != nil { 212 return nil, fmt.Errorf("could not get chunk execution data at index %d: %w", i, err) 213 } 214 215 blockExecutionData.ChunkExecutionDatas[i] = chunkExecutionData 216 } 217 218 return blockExecutionData, nil 219 } 220 221 // getChunkExecutionData gets the ChunkExecutionData for the given CID from the blobstore. 222 // Expected errors during normal operations: 223 // - BlobNotFoundError if some CID in the blob tree could not be found from the blobstore 224 // - MalformedDataError if some level of the blob tree cannot be properly deserialized 225 func (s *store) getChunkExecutionData(ctx context.Context, chunkExecutionDataID cid.Cid) (*ChunkExecutionData, error) { 226 cids := []cid.Cid{chunkExecutionDataID} 227 228 // given a root CID, get the blob tree level by level, until we reach the full ChunkExecutionData 229 for i := 0; ; i++ { 230 v, err := s.getBlobs(ctx, cids) 231 if err != nil { 232 return nil, fmt.Errorf("failed to get blob tree level at depth %d: %w", i, err) 233 } 234 235 switch v := v.(type) { 236 case *ChunkExecutionData: 237 return v, nil 238 case *[]cid.Cid: 239 cids = *v 240 default: 241 return nil, NewMalformedDataError(fmt.Errorf("blob tree contains unexpected type %T at level %d", v, i)) 242 } 243 } 244 } 245 246 // getBlobs gets the blobs for the given CIDs from the blobstore, deserializes them, and returns 247 // the deserialized value. 248 // - BlobNotFoundError if any of the CIDs could not be found from the blobstore 249 // - MalformedDataError if any of the blobs cannot be properly deserialized 250 func (s *store) getBlobs(ctx context.Context, cids []cid.Cid) (interface{}, error) { 251 buf := new(bytes.Buffer) 252 253 // get each blob and append the raw data to the buffer 254 for _, cid := range cids { 255 blob, err := s.blobstore.Get(ctx, cid) 256 if err != nil { 257 if errors.Is(err, blobs.ErrNotFound) { 258 return nil, NewBlobNotFoundError(cid) 259 } 260 261 return nil, fmt.Errorf("failed to get blob: %w", err) 262 } 263 264 _, err = buf.Write(blob.RawData()) 265 if err != nil { 266 return nil, fmt.Errorf("failed to write blob %s to deserialization buffer: %w", cid.String(), err) 267 } 268 } 269 270 // deserialize the buffer into a value, and return it 271 v, err := s.serializer.Deserialize(buf) 272 if err != nil { 273 return nil, NewMalformedDataError(err) 274 } 275 276 return v, nil 277 }