github.com/celestiaorg/celestia-node@v0.15.0-beta.1/pruner/service.go (about)

     1  package pruner
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"time"
     7  
     8  	"github.com/ipfs/go-datastore"
     9  	"github.com/ipfs/go-datastore/namespace"
    10  	logging "github.com/ipfs/go-log/v2"
    11  
    12  	hdr "github.com/celestiaorg/go-header"
    13  
    14  	"github.com/celestiaorg/celestia-node/header"
    15  )
    16  
    17  var log = logging.Logger("pruner/service")
    18  
    19  // Service handles running the pruning cycle for the node.
    20  type Service struct {
    21  	pruner Pruner
    22  	window AvailabilityWindow
    23  
    24  	getter hdr.Getter[*header.ExtendedHeader]
    25  
    26  	ds         datastore.Datastore
    27  	checkpoint *checkpoint
    28  
    29  	numBlocksInWindow uint64
    30  
    31  	ctx    context.Context
    32  	cancel context.CancelFunc
    33  	doneCh chan struct{}
    34  
    35  	params  Params
    36  	metrics *metrics
    37  }
    38  
    39  func NewService(
    40  	p Pruner,
    41  	window AvailabilityWindow,
    42  	getter hdr.Getter[*header.ExtendedHeader],
    43  	ds datastore.Datastore,
    44  	blockTime time.Duration,
    45  	opts ...Option,
    46  ) *Service {
    47  	params := DefaultParams()
    48  	for _, opt := range opts {
    49  		opt(&params)
    50  	}
    51  
    52  	numBlocksInWindow := uint64(time.Duration(window) / blockTime)
    53  
    54  	return &Service{
    55  		pruner:            p,
    56  		window:            window,
    57  		getter:            getter,
    58  		checkpoint:        &checkpoint{FailedHeaders: map[uint64]string{}},
    59  		ds:                namespace.Wrap(ds, storePrefix),
    60  		numBlocksInWindow: numBlocksInWindow,
    61  		doneCh:            make(chan struct{}),
    62  		params:            params,
    63  	}
    64  }
    65  
    66  func (s *Service) Start(context.Context) error {
    67  	s.ctx, s.cancel = context.WithCancel(context.Background())
    68  
    69  	err := s.loadCheckpoint(s.ctx)
    70  	if err != nil {
    71  		return err
    72  	}
    73  	log.Debugw("loaded checkpoint", "lastPruned", s.lastPruned().Height())
    74  
    75  	go s.run()
    76  	return nil
    77  }
    78  
    79  func (s *Service) Stop(ctx context.Context) error {
    80  	s.cancel()
    81  
    82  	select {
    83  	case <-s.doneCh:
    84  		return nil
    85  	case <-ctx.Done():
    86  		return fmt.Errorf("pruner unable to exit within context deadline")
    87  	}
    88  }
    89  
    90  func (s *Service) run() {
    91  	if s.params.gcCycle == time.Duration(0) {
    92  		// Service is disabled, exit
    93  		close(s.doneCh)
    94  		return
    95  	}
    96  
    97  	ticker := time.NewTicker(s.params.gcCycle)
    98  	defer ticker.Stop()
    99  
   100  	lastPrunedHeader := s.lastPruned()
   101  
   102  	for {
   103  		select {
   104  		case <-s.ctx.Done():
   105  			close(s.doneCh)
   106  			return
   107  		case <-ticker.C:
   108  			lastPrunedHeader = s.prune(s.ctx, lastPrunedHeader)
   109  		}
   110  	}
   111  }
   112  
   113  func (s *Service) prune(
   114  	ctx context.Context,
   115  	lastPrunedHeader *header.ExtendedHeader,
   116  ) *header.ExtendedHeader {
   117  	// prioritize retrying previously-failed headers
   118  	s.retryFailed(s.ctx)
   119  
   120  	for {
   121  		select {
   122  		case <-s.ctx.Done():
   123  			return lastPrunedHeader
   124  		default:
   125  		}
   126  
   127  		headers, err := s.findPruneableHeaders(ctx)
   128  		if err != nil || len(headers) == 0 {
   129  			log.Errorw("failed to find prune-able blocks", "error", err)
   130  			return lastPrunedHeader
   131  		}
   132  
   133  		failed := make(map[uint64]error)
   134  
   135  		log.Debugw("pruning headers", "from", headers[0].Height(), "to",
   136  			headers[len(headers)-1].Height())
   137  
   138  		for _, eh := range headers {
   139  			pruneCtx, cancel := context.WithDeadline(ctx, time.Now().Add(time.Second*5))
   140  
   141  			err = s.pruner.Prune(pruneCtx, eh)
   142  			if err != nil {
   143  				log.Errorw("failed to prune block", "height", eh.Height(), "err", err)
   144  				failed[eh.Height()] = err
   145  			} else {
   146  				lastPrunedHeader = eh
   147  			}
   148  
   149  			s.metrics.observePrune(pruneCtx, err != nil)
   150  			cancel()
   151  		}
   152  
   153  		err = s.updateCheckpoint(s.ctx, lastPrunedHeader, failed)
   154  		if err != nil {
   155  			log.Errorw("failed to update checkpoint", "err", err)
   156  			return lastPrunedHeader
   157  		}
   158  
   159  		if uint64(len(headers)) < maxHeadersPerLoop {
   160  			// we've pruned all the blocks we can
   161  			return lastPrunedHeader
   162  		}
   163  	}
   164  }
   165  
   166  func (s *Service) retryFailed(ctx context.Context) {
   167  	log.Debugw("retrying failed headers", "amount", len(s.checkpoint.FailedHeaders))
   168  
   169  	for failed := range s.checkpoint.FailedHeaders {
   170  		h, err := s.getter.GetByHeight(ctx, failed)
   171  		if err != nil {
   172  			log.Errorw("failed to load header from failed map", "height", failed, "err", err)
   173  			s.checkpoint.FailedHeaders[failed] = err.Error()
   174  			continue
   175  		}
   176  		err = s.pruner.Prune(ctx, h)
   177  		if err != nil {
   178  			log.Errorw("failed to prune block from failed map", "height", failed, "err", err)
   179  			s.checkpoint.FailedHeaders[failed] = err.Error()
   180  			continue
   181  		}
   182  		delete(s.checkpoint.FailedHeaders, failed)
   183  	}
   184  }