github.com/sunrise-zone/sunrise-node@v0.13.1-sr2/share/eds/store.go (about) 1 package eds 2 3 import ( 4 "bufio" 5 "bytes" 6 "context" 7 "errors" 8 "fmt" 9 "io" 10 "os" 11 "sync" 12 "sync/atomic" 13 "time" 14 15 "github.com/filecoin-project/dagstore" 16 "github.com/filecoin-project/dagstore/index" 17 "github.com/filecoin-project/dagstore/mount" 18 "github.com/filecoin-project/dagstore/shard" 19 bstore "github.com/ipfs/boxo/blockstore" 20 "github.com/ipfs/go-datastore" 21 carv1 "github.com/ipld/go-car" 22 "go.opentelemetry.io/otel/attribute" 23 "go.opentelemetry.io/otel/trace" 24 25 "github.com/celestiaorg/rsmt2d" 26 27 "github.com/sunrise-zone/sunrise-node/libs/utils" 28 "github.com/sunrise-zone/sunrise-node/share" 29 "github.com/sunrise-zone/sunrise-node/share/eds/cache" 30 "github.com/sunrise-zone/sunrise-node/share/ipld" 31 ) 32 33 const ( 34 blocksPath = "/blocks/" 35 indexPath = "/index/" 36 transientsPath = "/transients/" 37 ) 38 39 var ErrNotFound = errors.New("eds not found in store") 40 41 // Store maintains (via DAGStore) a top-level index enabling granular and efficient random access to 42 // every share and/or Merkle proof over every registered CARv1 file. The EDSStore provides a custom 43 // blockstore interface implementation to achieve access. The main use-case is randomized sampling 44 // over the whole chain of EDS block data and getting data by namespace. 45 type Store struct { 46 cancel context.CancelFunc 47 48 dgstr *dagstore.DAGStore 49 mounts *mount.Registry 50 51 bs *blockstore 52 cache atomic.Pointer[cache.DoubleCache] 53 54 carIdx index.FullIndexRepo 55 invertedIdx *simpleInvertedIndex 56 57 basepath string 58 gcInterval time.Duration 59 // lastGCResult is only stored on the store for testing purposes. 60 lastGCResult atomic.Pointer[dagstore.GCResult] 61 62 // stripedLocks is used to synchronize parallel operations 63 stripedLocks [256]sync.Mutex 64 shardFailures chan dagstore.ShardResult 65 66 metrics *metrics 67 } 68 69 // NewStore creates a new EDS Store under the given basepath and datastore. 70 func NewStore(params *Parameters, basePath string, ds datastore.Batching) (*Store, error) { 71 if err := params.Validate(); err != nil { 72 return nil, err 73 } 74 75 err := setupPath(basePath) 76 if err != nil { 77 return nil, fmt.Errorf("failed to setup eds.Store directories: %w", err) 78 } 79 80 r := mount.NewRegistry() 81 err = r.Register("fs", &inMemoryOnceMount{}) 82 if err != nil { 83 return nil, fmt.Errorf("failed to register memory mount on the registry: %w", err) 84 } 85 if err != nil { 86 return nil, fmt.Errorf("failed to register FS mount on the registry: %w", err) 87 } 88 89 fsRepo, err := index.NewFSRepo(basePath + indexPath) 90 if err != nil { 91 return nil, fmt.Errorf("failed to create index repository: %w", err) 92 } 93 94 invertedIdx, err := newSimpleInvertedIndex(basePath) 95 if err != nil { 96 return nil, fmt.Errorf("failed to create index: %w", err) 97 } 98 99 failureChan := make(chan dagstore.ShardResult) 100 dagStore, err := dagstore.NewDAGStore( 101 dagstore.Config{ 102 TransientsDir: basePath + transientsPath, 103 IndexRepo: fsRepo, 104 Datastore: ds, 105 MountRegistry: r, 106 TopLevelIndex: invertedIdx, 107 FailureCh: failureChan, 108 }, 109 ) 110 if err != nil { 111 return nil, fmt.Errorf("failed to create DAGStore: %w", err) 112 } 113 114 recentBlocksCache, err := cache.NewAccessorCache("recent", params.RecentBlocksCacheSize) 115 if err != nil { 116 return nil, fmt.Errorf("failed to create recent blocks cache: %w", err) 117 } 118 119 blockstoreCache, err := cache.NewAccessorCache("blockstore", params.BlockstoreCacheSize) 120 if err != nil { 121 return nil, fmt.Errorf("failed to create blockstore cache: %w", err) 122 } 123 124 store := &Store{ 125 basepath: basePath, 126 dgstr: dagStore, 127 carIdx: fsRepo, 128 invertedIdx: invertedIdx, 129 gcInterval: params.GCInterval, 130 mounts: r, 131 shardFailures: failureChan, 132 } 133 store.bs = newBlockstore(store, ds) 134 store.cache.Store(cache.NewDoubleCache(recentBlocksCache, blockstoreCache)) 135 return store, nil 136 } 137 138 func (s *Store) Start(ctx context.Context) error { 139 err := s.dgstr.Start(ctx) 140 if err != nil { 141 return err 142 } 143 // start Store only if DagStore succeeds 144 runCtx, cancel := context.WithCancel(context.Background()) 145 s.cancel = cancel 146 // initialize empty gc result to avoid panic on access 147 s.lastGCResult.Store(&dagstore.GCResult{ 148 Shards: make(map[shard.Key]error), 149 }) 150 151 if s.gcInterval != 0 { 152 go s.gc(runCtx) 153 } 154 155 go s.watchForFailures(runCtx) 156 return nil 157 } 158 159 // Stop stops the underlying DAGStore. 160 func (s *Store) Stop(context.Context) error { 161 defer s.cancel() 162 if err := s.invertedIdx.close(); err != nil { 163 return err 164 } 165 return s.dgstr.Close() 166 } 167 168 // gc periodically removes all inactive or errored shards. 169 func (s *Store) gc(ctx context.Context) { 170 ticker := time.NewTicker(s.gcInterval) 171 for { 172 select { 173 case <-ctx.Done(): 174 return 175 case <-ticker.C: 176 tnow := time.Now() 177 res, err := s.dgstr.GC(ctx) 178 s.metrics.observeGCtime(ctx, time.Since(tnow), err != nil) 179 if err != nil { 180 log.Errorf("garbage collecting dagstore: %v", err) 181 return 182 } 183 s.lastGCResult.Store(res) 184 } 185 } 186 } 187 188 func (s *Store) watchForFailures(ctx context.Context) { 189 for { 190 select { 191 case <-ctx.Done(): 192 return 193 case res := <-s.shardFailures: 194 log.Errorw("removing shard after failure", "key", res.Key, "err", res.Error) 195 s.metrics.observeShardFailure(ctx, res.Key.String()) 196 k := share.MustDataHashFromString(res.Key.String()) 197 err := s.Remove(ctx, k) 198 if err != nil { 199 log.Errorw("failed to remove shard after failure", "key", res.Key, "err", err) 200 } 201 } 202 } 203 } 204 205 // Put stores the given data square with DataRoot's hash as a key. 206 // 207 // The square is verified on the Exchange level, and Put only stores the square, trusting it. 208 // The resulting file stores all the shares and NMT Merkle Proofs of the EDS. 209 // Additionally, the file gets indexed s.t. store.Blockstore can access them. 210 func (s *Store) Put(ctx context.Context, root share.DataHash, square *rsmt2d.ExtendedDataSquare) error { 211 ctx, span := tracer.Start(ctx, "store/put", trace.WithAttributes( 212 attribute.Int("width", int(square.Width())), 213 )) 214 215 tnow := time.Now() 216 err := s.put(ctx, root, square) 217 result := putOK 218 switch { 219 case errors.Is(err, dagstore.ErrShardExists): 220 result = putExists 221 case err != nil: 222 result = putFailed 223 } 224 utils.SetStatusAndEnd(span, err) 225 s.metrics.observePut(ctx, time.Since(tnow), result, square.Width()) 226 return err 227 } 228 229 func (s *Store) put(ctx context.Context, root share.DataHash, square *rsmt2d.ExtendedDataSquare) (err error) { 230 lk := &s.stripedLocks[root[len(root)-1]] 231 lk.Lock() 232 defer lk.Unlock() 233 234 // if root already exists, short-circuit 235 if has, _ := s.Has(ctx, root); has { 236 return dagstore.ErrShardExists 237 } 238 239 key := root.String() 240 f, err := os.OpenFile(s.basepath+blocksPath+key, os.O_CREATE|os.O_WRONLY, 0600) 241 if err != nil { 242 return err 243 } 244 defer closeAndLog("car file", f) 245 246 // save encoded eds into buffer 247 mount := &inMemoryOnceMount{ 248 // TODO: buffer could be pre-allocated with capacity calculated based on eds size. 249 buf: bytes.NewBuffer(nil), 250 FileMount: mount.FileMount{Path: s.basepath + blocksPath + key}, 251 } 252 err = WriteEDS(ctx, square, mount) 253 if err != nil { 254 return fmt.Errorf("failed to write EDS to file: %w", err) 255 } 256 257 // write whole buffered mount data in one go to optimize i/o 258 if _, err = mount.WriteTo(f); err != nil { 259 return fmt.Errorf("failed to write EDS to file: %w", err) 260 } 261 262 ch := make(chan dagstore.ShardResult, 1) 263 err = s.dgstr.RegisterShard(ctx, shard.KeyFromString(key), mount, ch, dagstore.RegisterOpts{}) 264 if err != nil { 265 return fmt.Errorf("failed to initiate shard registration: %w", err) 266 } 267 268 var result dagstore.ShardResult 269 select { 270 case result = <-ch: 271 case <-ctx.Done(): 272 // if the context finished before the result was received, track the result in a separate goroutine 273 go trackLateResult("put", ch, s.metrics, time.Minute*5) 274 return ctx.Err() 275 } 276 277 if result.Error != nil { 278 return fmt.Errorf("failed to register shard: %w", result.Error) 279 } 280 281 // the accessor returned in the result will be nil, so the shard needs to be acquired first to 282 // become available in the cache. It might take some time, and the result should not affect the put 283 // operation, so do it in a goroutine 284 // TODO: Ideally, only recent blocks should be put in the cache, but there is no way right now to 285 // check such a condition. 286 go func() { 287 ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 288 defer cancel() 289 ac, err := s.cache.Load().First().GetOrLoad(ctx, result.Key, s.getAccessor) 290 if err != nil { 291 log.Warnw("unable to put accessor to recent blocks accessors cache", "err", err) 292 return 293 } 294 295 // need to close returned accessor to remove the reader reference 296 if err := ac.Close(); err != nil { 297 log.Warnw("unable to close accessor after loading", "err", err) 298 } 299 }() 300 301 return nil 302 } 303 304 // waitForResult waits for a result from the res channel for a maximum duration specified by 305 // maxWait. If the result is not received within the specified duration, it logs an error 306 // indicating that the parent context has expired and the shard registration is stuck. If a result 307 // is received, it checks for any error and logs appropriate messages. 308 func trackLateResult(opName string, res <-chan dagstore.ShardResult, metrics *metrics, maxWait time.Duration) { 309 tnow := time.Now() 310 select { 311 case <-time.After(maxWait): 312 metrics.observeLongOp(context.Background(), opName, time.Since(tnow), longOpUnresolved) 313 log.Errorf("parent context is expired, while register shard is stuck for more than %v sec", time.Since(tnow)) 314 return 315 case result := <-res: 316 // don't observe if result was received right after launch of the func 317 if time.Since(tnow) < time.Second { 318 return 319 } 320 if result.Error != nil { 321 metrics.observeLongOp(context.Background(), opName, time.Since(tnow), longOpFailed) 322 log.Errorf("failed to register shard after context expired: %v ago, err: %s", time.Since(tnow), result.Error) 323 return 324 } 325 metrics.observeLongOp(context.Background(), opName, time.Since(tnow), longOpOK) 326 log.Warnf("parent context expired, but register shard finished with no error,"+ 327 " after context expired: %v ago", time.Since(tnow)) 328 return 329 } 330 } 331 332 // GetCAR takes a DataRoot and returns a buffered reader to the respective EDS serialized as a 333 // CARv1 file. 334 // The Reader strictly reads the CAR header and first quadrant (1/4) of the EDS, omitting all the 335 // NMT Merkle proofs. Integrity of the store data is not verified. 336 // 337 // The shard is cached in the Store, so subsequent calls to GetCAR with the same root will use the 338 // same reader. The cache is responsible for closing the underlying reader. 339 func (s *Store) GetCAR(ctx context.Context, root share.DataHash) (io.ReadCloser, error) { 340 ctx, span := tracer.Start(ctx, "store/get-car") 341 tnow := time.Now() 342 r, err := s.getCAR(ctx, root) 343 s.metrics.observeGetCAR(ctx, time.Since(tnow), err != nil) 344 utils.SetStatusAndEnd(span, err) 345 return r, err 346 } 347 348 func (s *Store) getCAR(ctx context.Context, root share.DataHash) (io.ReadCloser, error) { 349 key := shard.KeyFromString(root.String()) 350 accessor, err := s.cache.Load().Get(key) 351 if err == nil { 352 return newReadCloser(accessor), nil 353 } 354 // If the accessor is not found in the cache, create a new one from dagstore. We don't put the 355 // accessor in the cache here because getCAR is used by shrex-eds. There is a lower probability, 356 // compared to other cache put triggers, that the same block will be requested again soon. 357 shardAccessor, err := s.getAccessor(ctx, key) 358 if err != nil { 359 return nil, fmt.Errorf("failed to get accessor: %w", err) 360 } 361 362 return newReadCloser(shardAccessor), nil 363 } 364 365 // Blockstore returns an IPFS blockstore providing access to individual shares/nodes of all EDS 366 // registered on the Store. NOTE: The blockstore does not store whole Celestia Blocks but IPFS 367 // blocks. We represent `shares` and NMT Merkle proofs as IPFS blocks and IPLD nodes so Bitswap can 368 // access those. 369 func (s *Store) Blockstore() bstore.Blockstore { 370 return s.bs 371 } 372 373 // CARBlockstore returns an IPFS Blockstore providing access to individual shares/nodes of a 374 // specific EDS identified by DataHash and registered on the Store. NOTE: The Blockstore does not 375 // store whole Celestia Blocks but IPFS blocks. We represent `shares` and NMT Merkle proofs as IPFS 376 // blocks and IPLD nodes so Bitswap can access those. 377 func (s *Store) CARBlockstore( 378 ctx context.Context, 379 root share.DataHash, 380 ) (*BlockstoreCloser, error) { 381 ctx, span := tracer.Start(ctx, "store/car-blockstore") 382 tnow := time.Now() 383 cbs, err := s.carBlockstore(ctx, root) 384 s.metrics.observeCARBlockstore(ctx, time.Since(tnow), err != nil) 385 utils.SetStatusAndEnd(span, err) 386 return cbs, err 387 } 388 389 func (s *Store) carBlockstore( 390 ctx context.Context, 391 root share.DataHash, 392 ) (*BlockstoreCloser, error) { 393 key := shard.KeyFromString(root.String()) 394 accessor, err := s.cache.Load().Get(key) 395 if err == nil { 396 return blockstoreCloser(accessor) 397 } 398 399 // if the accessor is not found in the cache, create a new one from dagstore 400 sa, err := s.getAccessor(ctx, key) 401 if err != nil { 402 return nil, fmt.Errorf("failed to get accessor: %w", err) 403 } 404 return blockstoreCloser(sa) 405 } 406 407 // GetDAH returns the DataAvailabilityHeader for the EDS identified by DataHash. 408 func (s *Store) GetDAH(ctx context.Context, root share.DataHash) (*share.Root, error) { 409 ctx, span := tracer.Start(ctx, "store/car-dah") 410 tnow := time.Now() 411 r, err := s.getDAH(ctx, root) 412 s.metrics.observeGetDAH(ctx, time.Since(tnow), err != nil) 413 utils.SetStatusAndEnd(span, err) 414 return r, err 415 } 416 417 func (s *Store) getDAH(ctx context.Context, root share.DataHash) (*share.Root, error) { 418 r, err := s.getCAR(ctx, root) 419 if err != nil { 420 return nil, fmt.Errorf("eds/store: failed to get CAR file: %w", err) 421 } 422 defer closeAndLog("car reader", r) 423 424 carHeader, err := carv1.ReadHeader(bufio.NewReader(r)) 425 if err != nil { 426 return nil, fmt.Errorf("eds/store: failed to read car header: %w", err) 427 } 428 429 dah := dahFromCARHeader(carHeader) 430 if !bytes.Equal(dah.Hash(), root) { 431 return nil, fmt.Errorf("eds/store: content integrity mismatch from CAR for root %x", root) 432 } 433 return dah, nil 434 } 435 436 // dahFromCARHeader returns the DataAvailabilityHeader stored in the CIDs of a CARv1 header. 437 func dahFromCARHeader(carHeader *carv1.CarHeader) *share.Root { 438 rootCount := len(carHeader.Roots) 439 rootBytes := make([][]byte, 0, rootCount) 440 for _, root := range carHeader.Roots { 441 rootBytes = append(rootBytes, ipld.NamespacedSha256FromCID(root)) 442 } 443 return &share.Root{ 444 RowRoots: rootBytes[:rootCount/2], 445 ColumnRoots: rootBytes[rootCount/2:], 446 } 447 } 448 449 func (s *Store) getAccessor(ctx context.Context, key shard.Key) (cache.Accessor, error) { 450 ch := make(chan dagstore.ShardResult, 1) 451 err := s.dgstr.AcquireShard(ctx, key, ch, dagstore.AcquireOpts{}) 452 if err != nil { 453 if errors.Is(err, dagstore.ErrShardUnknown) { 454 return nil, ErrNotFound 455 } 456 return nil, fmt.Errorf("failed to initialize shard acquisition: %w", err) 457 } 458 459 select { 460 case res := <-ch: 461 if res.Error != nil { 462 return nil, fmt.Errorf("failed to acquire shard: %w", res.Error) 463 } 464 return res.Accessor, nil 465 case <-ctx.Done(): 466 go trackLateResult("get_shard", ch, s.metrics, time.Minute) 467 return nil, ctx.Err() 468 } 469 } 470 471 // Remove removes EDS from Store by the given share.Root hash and cleans up all 472 // the indexing. 473 func (s *Store) Remove(ctx context.Context, root share.DataHash) error { 474 ctx, span := tracer.Start(ctx, "store/remove") 475 tnow := time.Now() 476 err := s.remove(ctx, root) 477 s.metrics.observeRemove(ctx, time.Since(tnow), err != nil) 478 utils.SetStatusAndEnd(span, err) 479 return err 480 } 481 482 func (s *Store) remove(ctx context.Context, root share.DataHash) (err error) { 483 key := shard.KeyFromString(root.String()) 484 // remove open links to accessor from cache 485 if err := s.cache.Load().Remove(key); err != nil { 486 log.Warnw("remove accessor from cache", "err", err) 487 } 488 ch := make(chan dagstore.ShardResult, 1) 489 err = s.dgstr.DestroyShard(ctx, key, ch, dagstore.DestroyOpts{}) 490 if err != nil { 491 return fmt.Errorf("failed to initiate shard destruction: %w", err) 492 } 493 494 select { 495 case result := <-ch: 496 if result.Error != nil { 497 return fmt.Errorf("failed to destroy shard: %w", result.Error) 498 } 499 case <-ctx.Done(): 500 go trackLateResult("remove", ch, s.metrics, time.Minute) 501 return ctx.Err() 502 } 503 504 dropped, err := s.carIdx.DropFullIndex(key) 505 if !dropped { 506 log.Warnf("failed to drop index for %s", key) 507 } 508 if err != nil { 509 return fmt.Errorf("failed to drop index for %s: %w", key, err) 510 } 511 512 err = os.Remove(s.basepath + blocksPath + root.String()) 513 if err != nil { 514 return fmt.Errorf("failed to remove CAR file: %w", err) 515 } 516 return nil 517 } 518 519 // Get reads EDS out of Store by given DataRoot. 520 // 521 // It reads only one quadrant(1/4) of the EDS and verifies the integrity of the stored data by 522 // recomputing it. 523 func (s *Store) Get(ctx context.Context, root share.DataHash) (*rsmt2d.ExtendedDataSquare, error) { 524 ctx, span := tracer.Start(ctx, "store/get") 525 tnow := time.Now() 526 eds, err := s.get(ctx, root) 527 s.metrics.observeGet(ctx, time.Since(tnow), err != nil) 528 utils.SetStatusAndEnd(span, err) 529 return eds, err 530 } 531 532 func (s *Store) get(ctx context.Context, root share.DataHash) (eds *rsmt2d.ExtendedDataSquare, err error) { 533 ctx, span := tracer.Start(ctx, "store/get") 534 defer func() { 535 utils.SetStatusAndEnd(span, err) 536 }() 537 538 r, err := s.getCAR(ctx, root) 539 if err != nil { 540 return nil, fmt.Errorf("failed to get CAR file: %w", err) 541 } 542 defer closeAndLog("car reader", r) 543 544 eds, err = ReadEDS(ctx, r, root) 545 if err != nil { 546 return nil, fmt.Errorf("failed to read EDS from CAR file: %w", err) 547 } 548 return eds, nil 549 } 550 551 // Has checks if EDS exists by the given share.Root hash. 552 func (s *Store) Has(ctx context.Context, root share.DataHash) (has bool, err error) { 553 ctx, span := tracer.Start(ctx, "store/has") 554 tnow := time.Now() 555 eds, err := s.has(ctx, root) 556 s.metrics.observeHas(ctx, time.Since(tnow), err != nil) 557 utils.SetStatusAndEnd(span, err) 558 return eds, err 559 } 560 561 func (s *Store) has(_ context.Context, root share.DataHash) (bool, error) { 562 key := root.String() 563 info, err := s.dgstr.GetShardInfo(shard.KeyFromString(key)) 564 switch err { 565 case nil: 566 return true, info.Error 567 case dagstore.ErrShardUnknown: 568 return false, info.Error 569 default: 570 return false, err 571 } 572 } 573 574 // List lists all the registered EDSes. 575 func (s *Store) List() ([]share.DataHash, error) { 576 ctx, span := tracer.Start(context.Background(), "store/list") 577 tnow := time.Now() 578 hashes, err := s.list() 579 s.metrics.observeList(ctx, time.Since(tnow), err != nil) 580 utils.SetStatusAndEnd(span, err) 581 return hashes, err 582 } 583 584 func (s *Store) list() ([]share.DataHash, error) { 585 shards := s.dgstr.AllShardsInfo() 586 hashes := make([]share.DataHash, 0, len(shards)) 587 for shrd := range shards { 588 hash := share.MustDataHashFromString(shrd.String()) 589 hashes = append(hashes, hash) 590 } 591 return hashes, nil 592 } 593 594 func setupPath(basepath string) error { 595 err := os.MkdirAll(basepath+blocksPath, os.ModePerm) 596 if err != nil { 597 return fmt.Errorf("failed to create blocks directory: %w", err) 598 } 599 err = os.MkdirAll(basepath+transientsPath, os.ModePerm) 600 if err != nil { 601 return fmt.Errorf("failed to create transients directory: %w", err) 602 } 603 err = os.MkdirAll(basepath+indexPath, os.ModePerm) 604 if err != nil { 605 return fmt.Errorf("failed to create index directory: %w", err) 606 } 607 return nil 608 } 609 610 // inMemoryOnceMount is used to allow reading once from buffer before using main mount.Reader 611 type inMemoryOnceMount struct { 612 buf *bytes.Buffer 613 614 readOnce atomic.Bool 615 mount.FileMount 616 } 617 618 func (m *inMemoryOnceMount) Fetch(ctx context.Context) (mount.Reader, error) { 619 if m.buf != nil && !m.readOnce.Swap(true) { 620 reader := &inMemoryReader{Reader: bytes.NewReader(m.buf.Bytes())} 621 // release memory for gc, otherwise buffer will stick forever 622 m.buf = nil 623 return reader, nil 624 } 625 return m.FileMount.Fetch(ctx) 626 } 627 628 func (m *inMemoryOnceMount) Write(b []byte) (int, error) { 629 return m.buf.Write(b) 630 } 631 632 func (m *inMemoryOnceMount) WriteTo(w io.Writer) (int64, error) { 633 return io.Copy(w, bytes.NewReader(m.buf.Bytes())) 634 } 635 636 // inMemoryReader extends bytes.Reader to implement mount.Reader interface 637 type inMemoryReader struct { 638 *bytes.Reader 639 } 640 641 // Close allows inMemoryReader to satisfy mount.Reader interface 642 func (r *inMemoryReader) Close() error { 643 return nil 644 }