github.com/celestiaorg/celestia-node@v0.15.0-beta.1/share/getters/shrex.go (about)

     1  package getters
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"time"
     8  
     9  	"go.opentelemetry.io/otel"
    10  	"go.opentelemetry.io/otel/attribute"
    11  	"go.opentelemetry.io/otel/metric"
    12  	"go.opentelemetry.io/otel/trace"
    13  
    14  	"github.com/celestiaorg/rsmt2d"
    15  
    16  	"github.com/celestiaorg/celestia-node/header"
    17  	"github.com/celestiaorg/celestia-node/libs/utils"
    18  	"github.com/celestiaorg/celestia-node/share"
    19  	"github.com/celestiaorg/celestia-node/share/ipld"
    20  	"github.com/celestiaorg/celestia-node/share/p2p"
    21  	"github.com/celestiaorg/celestia-node/share/p2p/peers"
    22  	"github.com/celestiaorg/celestia-node/share/p2p/shrexeds"
    23  	"github.com/celestiaorg/celestia-node/share/p2p/shrexnd"
    24  )
    25  
    26  var _ share.Getter = (*ShrexGetter)(nil)
    27  
    28  const (
    29  	// defaultMinRequestTimeout value is set according to observed time taken by healthy peer to
    30  	// serve getEDS request for block size 256
    31  	defaultMinRequestTimeout = time.Minute // should be >= shrexeds server write timeout
    32  	defaultMinAttemptsCount  = 3
    33  )
    34  
    35  var meter = otel.Meter("shrex/getter")
    36  
    37  type metrics struct {
    38  	edsAttempts metric.Int64Histogram
    39  	ndAttempts  metric.Int64Histogram
    40  }
    41  
    42  func (m *metrics) recordEDSAttempt(ctx context.Context, attemptCount int, success bool) {
    43  	if m == nil {
    44  		return
    45  	}
    46  	ctx = utils.ResetContextOnError(ctx)
    47  	m.edsAttempts.Record(ctx, int64(attemptCount),
    48  		metric.WithAttributes(
    49  			attribute.Bool("success", success)))
    50  }
    51  
    52  func (m *metrics) recordNDAttempt(ctx context.Context, attemptCount int, success bool) {
    53  	if m == nil {
    54  		return
    55  	}
    56  	ctx = utils.ResetContextOnError(ctx)
    57  	m.ndAttempts.Record(ctx, int64(attemptCount),
    58  		metric.WithAttributes(
    59  			attribute.Bool("success", success)))
    60  }
    61  
    62  func (sg *ShrexGetter) WithMetrics() error {
    63  	edsAttemptHistogram, err := meter.Int64Histogram(
    64  		"getters_shrex_eds_attempts_per_request",
    65  		metric.WithDescription("Number of attempts per shrex/eds request"),
    66  	)
    67  	if err != nil {
    68  		return err
    69  	}
    70  
    71  	ndAttemptHistogram, err := meter.Int64Histogram(
    72  		"getters_shrex_nd_attempts_per_request",
    73  		metric.WithDescription("Number of attempts per shrex/nd request"),
    74  	)
    75  	if err != nil {
    76  		return err
    77  	}
    78  
    79  	sg.metrics = &metrics{
    80  		edsAttempts: edsAttemptHistogram,
    81  		ndAttempts:  ndAttemptHistogram,
    82  	}
    83  	return nil
    84  }
    85  
    86  // ShrexGetter is a share.Getter that uses the shrex/eds and shrex/nd protocol to retrieve shares.
    87  type ShrexGetter struct {
    88  	edsClient *shrexeds.Client
    89  	ndClient  *shrexnd.Client
    90  
    91  	peerManager *peers.Manager
    92  
    93  	// minRequestTimeout limits minimal timeout given to single peer by getter for serving the request.
    94  	minRequestTimeout time.Duration
    95  	// minAttemptsCount will be used to split request timeout into multiple attempts. It will allow to
    96  	// attempt multiple peers in scope of one request before context timeout is reached
    97  	minAttemptsCount int
    98  
    99  	metrics *metrics
   100  }
   101  
   102  func NewShrexGetter(edsClient *shrexeds.Client, ndClient *shrexnd.Client, peerManager *peers.Manager) *ShrexGetter {
   103  	return &ShrexGetter{
   104  		edsClient:         edsClient,
   105  		ndClient:          ndClient,
   106  		peerManager:       peerManager,
   107  		minRequestTimeout: defaultMinRequestTimeout,
   108  		minAttemptsCount:  defaultMinAttemptsCount,
   109  	}
   110  }
   111  
   112  func (sg *ShrexGetter) Start(ctx context.Context) error {
   113  	return sg.peerManager.Start(ctx)
   114  }
   115  
   116  func (sg *ShrexGetter) Stop(ctx context.Context) error {
   117  	return sg.peerManager.Stop(ctx)
   118  }
   119  
   120  func (sg *ShrexGetter) GetShare(context.Context, *header.ExtendedHeader, int, int) (share.Share, error) {
   121  	return nil, fmt.Errorf("getter/shrex: GetShare %w", errOperationNotSupported)
   122  }
   123  
   124  func (sg *ShrexGetter) GetEDS(ctx context.Context, header *header.ExtendedHeader) (*rsmt2d.ExtendedDataSquare, error) {
   125  	var (
   126  		attempt int
   127  		err     error
   128  	)
   129  	ctx, span := tracer.Start(ctx, "shrex/get-eds")
   130  	defer func() {
   131  		utils.SetStatusAndEnd(span, err)
   132  	}()
   133  
   134  	// short circuit if the data root is empty
   135  	if header.DAH.Equals(share.EmptyRoot()) {
   136  		return share.EmptyExtendedDataSquare(), nil
   137  	}
   138  	for {
   139  		if ctx.Err() != nil {
   140  			sg.metrics.recordEDSAttempt(ctx, attempt, false)
   141  			return nil, errors.Join(err, ctx.Err())
   142  		}
   143  		attempt++
   144  		start := time.Now()
   145  		peer, setStatus, getErr := sg.peerManager.Peer(ctx, header.DAH.Hash(), header.Height())
   146  		if getErr != nil {
   147  			log.Debugw("eds: couldn't find peer",
   148  				"hash", header.DAH.String(),
   149  				"err", getErr,
   150  				"finished (s)", time.Since(start))
   151  			sg.metrics.recordEDSAttempt(ctx, attempt, false)
   152  			return nil, errors.Join(err, getErr)
   153  		}
   154  
   155  		reqStart := time.Now()
   156  		reqCtx, cancel := ctxWithSplitTimeout(ctx, sg.minAttemptsCount-attempt+1, sg.minRequestTimeout)
   157  		eds, getErr := sg.edsClient.RequestEDS(reqCtx, header.DAH.Hash(), peer)
   158  		cancel()
   159  		switch {
   160  		case getErr == nil:
   161  			setStatus(peers.ResultNoop)
   162  			sg.metrics.recordEDSAttempt(ctx, attempt, true)
   163  			return eds, nil
   164  		case errors.Is(getErr, context.DeadlineExceeded),
   165  			errors.Is(getErr, context.Canceled):
   166  			setStatus(peers.ResultCooldownPeer)
   167  		case errors.Is(getErr, p2p.ErrNotFound):
   168  			getErr = share.ErrNotFound
   169  			setStatus(peers.ResultCooldownPeer)
   170  		case errors.Is(getErr, p2p.ErrInvalidResponse):
   171  			setStatus(peers.ResultBlacklistPeer)
   172  		default:
   173  			setStatus(peers.ResultCooldownPeer)
   174  		}
   175  
   176  		if !ErrorContains(err, getErr) {
   177  			err = errors.Join(err, getErr)
   178  		}
   179  		log.Debugw("eds: request failed",
   180  			"hash", header.DAH.String(),
   181  			"peer", peer.String(),
   182  			"attempt", attempt,
   183  			"err", getErr,
   184  			"finished (s)", time.Since(reqStart))
   185  	}
   186  }
   187  
   188  func (sg *ShrexGetter) GetSharesByNamespace(
   189  	ctx context.Context,
   190  	header *header.ExtendedHeader,
   191  	namespace share.Namespace,
   192  ) (share.NamespacedShares, error) {
   193  	if err := namespace.ValidateForData(); err != nil {
   194  		return nil, err
   195  	}
   196  	var (
   197  		attempt int
   198  		err     error
   199  	)
   200  	ctx, span := tracer.Start(ctx, "shrex/get-shares-by-namespace", trace.WithAttributes(
   201  		attribute.String("namespace", namespace.String()),
   202  	))
   203  	defer func() {
   204  		utils.SetStatusAndEnd(span, err)
   205  	}()
   206  
   207  	// verify that the namespace could exist inside the roots before starting network requests
   208  	dah := header.DAH
   209  	roots := ipld.FilterRootByNamespace(dah, namespace)
   210  	if len(roots) == 0 {
   211  		return []share.NamespacedRow{}, nil
   212  	}
   213  
   214  	for {
   215  		if ctx.Err() != nil {
   216  			sg.metrics.recordNDAttempt(ctx, attempt, false)
   217  			return nil, errors.Join(err, ctx.Err())
   218  		}
   219  		attempt++
   220  		start := time.Now()
   221  		peer, setStatus, getErr := sg.peerManager.Peer(ctx, header.DAH.Hash(), header.Height())
   222  		if getErr != nil {
   223  			log.Debugw("nd: couldn't find peer",
   224  				"hash", dah.String(),
   225  				"namespace", namespace.String(),
   226  				"err", getErr,
   227  				"finished (s)", time.Since(start))
   228  			sg.metrics.recordNDAttempt(ctx, attempt, false)
   229  			return nil, errors.Join(err, getErr)
   230  		}
   231  
   232  		reqStart := time.Now()
   233  		reqCtx, cancel := ctxWithSplitTimeout(ctx, sg.minAttemptsCount-attempt+1, sg.minRequestTimeout)
   234  		nd, getErr := sg.ndClient.RequestND(reqCtx, dah, namespace, peer)
   235  		cancel()
   236  		switch {
   237  		case getErr == nil:
   238  			// both inclusion and non-inclusion cases needs verification
   239  			if verErr := nd.Verify(dah, namespace); verErr != nil {
   240  				getErr = verErr
   241  				setStatus(peers.ResultBlacklistPeer)
   242  				break
   243  			}
   244  			setStatus(peers.ResultNoop)
   245  			sg.metrics.recordNDAttempt(ctx, attempt, true)
   246  			return nd, nil
   247  		case errors.Is(getErr, context.DeadlineExceeded),
   248  			errors.Is(getErr, context.Canceled):
   249  			setStatus(peers.ResultCooldownPeer)
   250  		case errors.Is(getErr, p2p.ErrNotFound):
   251  			getErr = share.ErrNotFound
   252  			setStatus(peers.ResultCooldownPeer)
   253  		case errors.Is(getErr, p2p.ErrInvalidResponse):
   254  			setStatus(peers.ResultBlacklistPeer)
   255  		default:
   256  			setStatus(peers.ResultCooldownPeer)
   257  		}
   258  
   259  		if !ErrorContains(err, getErr) {
   260  			err = errors.Join(err, getErr)
   261  		}
   262  		log.Debugw("nd: request failed",
   263  			"hash", dah.String(),
   264  			"namespace", namespace.String(),
   265  			"peer", peer.String(),
   266  			"attempt", attempt,
   267  			"err", getErr,
   268  			"finished (s)", time.Since(reqStart))
   269  	}
   270  }