github.com/ethersphere/bee/v2@v2.2.0/pkg/pusher/pusher.go (about)

     1  // Copyright 2020 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package pusher provides protocol-orchestrating functionality
     6  // over the pushsync protocol. It makes sure that chunks meant
     7  // to be distributed over the network are sent used using the
     8  // pushsync protocol.
     9  package pusher
    10  
    11  import (
    12  	"context"
    13  	"encoding/hex"
    14  	"errors"
    15  	"sync"
    16  	"time"
    17  
    18  	"github.com/ethersphere/bee/v2/pkg/log"
    19  	"github.com/ethersphere/bee/v2/pkg/postage"
    20  	"github.com/ethersphere/bee/v2/pkg/pushsync"
    21  	storage "github.com/ethersphere/bee/v2/pkg/storage"
    22  	"github.com/ethersphere/bee/v2/pkg/swarm"
    23  	"github.com/ethersphere/bee/v2/pkg/topology"
    24  	"github.com/ethersphere/bee/v2/pkg/tracing"
    25  	"github.com/opentracing/opentracing-go"
    26  	"github.com/opentracing/opentracing-go/ext"
    27  	olog "github.com/opentracing/opentracing-go/log"
    28  )
    29  
    30  // loggerName is the tree path name of the logger for this package.
    31  const loggerName = "pusher"
    32  
    33  type Op struct {
    34  	Chunk  swarm.Chunk
    35  	Err    chan error
    36  	Direct bool
    37  	Span   opentracing.Span
    38  }
    39  
    40  type OpChan <-chan *Op
    41  
    42  type Storer interface {
    43  	storage.PushReporter
    44  	storage.PushSubscriber
    45  	ReservePutter() storage.Putter
    46  }
    47  
    48  type Service struct {
    49  	networkID         uint64
    50  	storer            Storer
    51  	pushSyncer        pushsync.PushSyncer
    52  	validStamp        postage.ValidStampFn
    53  	logger            log.Logger
    54  	metrics           metrics
    55  	quit              chan struct{}
    56  	chunksWorkerQuitC chan struct{}
    57  	inflight          *inflight
    58  	attempts          *attempts
    59  	smuggler          chan OpChan
    60  }
    61  
    62  const (
    63  	traceDuration     = 30 * time.Second // duration for every root tracing span
    64  	ConcurrentPushes  = 100              // how many chunks to push simultaneously
    65  	DefaultRetryCount = 6
    66  )
    67  
    68  var (
    69  	ErrInvalidAddress = errors.New("invalid address")
    70  )
    71  
    72  func New(
    73  	networkID uint64,
    74  	storer Storer,
    75  	pushSyncer pushsync.PushSyncer,
    76  	validStamp postage.ValidStampFn,
    77  	logger log.Logger,
    78  	warmupTime time.Duration,
    79  	retryCount int,
    80  ) *Service {
    81  	p := &Service{
    82  		networkID:         networkID,
    83  		storer:            storer,
    84  		pushSyncer:        pushSyncer,
    85  		validStamp:        validStamp,
    86  		logger:            logger.WithName(loggerName).Register(),
    87  		metrics:           newMetrics(),
    88  		quit:              make(chan struct{}),
    89  		chunksWorkerQuitC: make(chan struct{}),
    90  		inflight:          newInflight(),
    91  		attempts:          &attempts{retryCount: retryCount, attempts: make(map[string]int)},
    92  		smuggler:          make(chan OpChan),
    93  	}
    94  	go p.chunksWorker(warmupTime)
    95  	return p
    96  }
    97  
    98  // chunksWorker is a loop that keeps looking for chunks that are locally uploaded ( by monitoring pushIndex )
    99  // and pushes them to the closest peer and get a receipt.
   100  func (s *Service) chunksWorker(warmupTime time.Duration) {
   101  	defer close(s.chunksWorkerQuitC)
   102  	select {
   103  	case <-time.After(warmupTime):
   104  	case <-s.quit:
   105  		return
   106  	}
   107  
   108  	var (
   109  		ctx, cancel = context.WithCancel(context.Background())
   110  		sem         = make(chan struct{}, ConcurrentPushes)
   111  		cc          = make(chan *Op)
   112  	)
   113  
   114  	// inflight.set handles the backpressure for the maximum amount of inflight chunks
   115  	// and duplicate handling.
   116  	chunks, unsubscribe := s.storer.SubscribePush(ctx)
   117  	defer func() {
   118  		unsubscribe()
   119  		cancel()
   120  	}()
   121  
   122  	var wg sync.WaitGroup
   123  
   124  	push := func(op *Op) {
   125  		var (
   126  			err      error
   127  			doRepeat bool
   128  		)
   129  
   130  		defer func() {
   131  			// no peer was found which may mean that the node is suffering from connections issues
   132  			// we must slow down the pusher to prevent constant retries
   133  			if errors.Is(err, topology.ErrNotFound) {
   134  				select {
   135  				case <-time.After(time.Second * 5):
   136  				case <-s.quit:
   137  				}
   138  			}
   139  
   140  			wg.Done()
   141  			<-sem
   142  			if doRepeat {
   143  				select {
   144  				case cc <- op:
   145  				case <-s.quit:
   146  				}
   147  			}
   148  		}()
   149  
   150  		s.metrics.TotalToPush.Inc()
   151  		startTime := time.Now()
   152  
   153  		spanCtx := ctx
   154  		if op.Span != nil {
   155  			spanCtx = tracing.WithContext(spanCtx, op.Span.Context())
   156  		} else {
   157  			op.Span = opentracing.NoopTracer{}.StartSpan("noOp")
   158  		}
   159  
   160  		if op.Direct {
   161  			err = s.pushDirect(spanCtx, s.logger, op)
   162  		} else {
   163  			doRepeat, err = s.pushDeferred(spanCtx, s.logger, op)
   164  		}
   165  
   166  		if err != nil {
   167  			s.metrics.TotalErrors.Inc()
   168  			s.metrics.ErrorTime.Observe(time.Since(startTime).Seconds())
   169  			ext.LogError(op.Span, err)
   170  		} else {
   171  			op.Span.LogFields(olog.Bool("success", true))
   172  		}
   173  
   174  		s.metrics.SyncTime.Observe(time.Since(startTime).Seconds())
   175  		s.metrics.TotalSynced.Inc()
   176  	}
   177  
   178  	go func() {
   179  		for {
   180  			select {
   181  			case ch, ok := <-chunks:
   182  				if !ok {
   183  					chunks = nil
   184  					continue
   185  				}
   186  				select {
   187  				case cc <- &Op{Chunk: ch, Direct: false}:
   188  				case <-s.quit:
   189  					return
   190  				}
   191  			case apiC := <-s.smuggler:
   192  				go func() {
   193  					for {
   194  						select {
   195  						case op := <-apiC:
   196  							select {
   197  							case cc <- op:
   198  							case <-s.quit:
   199  								return
   200  							}
   201  						case <-s.quit:
   202  							return
   203  						}
   204  					}
   205  				}()
   206  			case <-s.quit:
   207  				return
   208  			}
   209  		}
   210  	}()
   211  
   212  	defer wg.Wait()
   213  
   214  	for {
   215  		select {
   216  		case op := <-cc:
   217  			if s.inflight.set(op.Chunk) {
   218  				if op.Direct {
   219  					select {
   220  					case op.Err <- nil:
   221  					default:
   222  						s.logger.Debug("chunk already in flight, skipping", "chunk", op.Chunk.Address())
   223  					}
   224  				}
   225  				continue
   226  			}
   227  			select {
   228  			case sem <- struct{}{}:
   229  				wg.Add(1)
   230  				go push(op)
   231  			case <-s.quit:
   232  				return
   233  			}
   234  		case <-s.quit:
   235  			return
   236  		}
   237  	}
   238  
   239  }
   240  
   241  func (s *Service) pushDeferred(ctx context.Context, logger log.Logger, op *Op) (bool, error) {
   242  	loggerV1 := logger.V(1).Build()
   243  
   244  	defer s.inflight.delete(op.Chunk)
   245  
   246  	if _, err := s.validStamp(op.Chunk); err != nil {
   247  		loggerV1.Warning(
   248  			"stamp with is no longer valid, skipping syncing for chunk",
   249  			"batch_id", hex.EncodeToString(op.Chunk.Stamp().BatchID()),
   250  			"chunk_address", op.Chunk.Address(),
   251  			"error", err,
   252  		)
   253  
   254  		return false, errors.Join(err, s.storer.Report(ctx, op.Chunk, storage.ChunkCouldNotSync))
   255  	}
   256  
   257  	switch receipt, err := s.pushSyncer.PushChunkToClosest(ctx, op.Chunk); {
   258  	case errors.Is(err, topology.ErrWantSelf):
   259  		// store the chunk
   260  		loggerV1.Debug("chunk stays here, i'm the closest node", "chunk_address", op.Chunk.Address())
   261  		err = s.storer.ReservePutter().Put(ctx, op.Chunk)
   262  		if err != nil {
   263  			loggerV1.Error(err, "pusher: failed to store chunk")
   264  			return true, err
   265  		}
   266  		err = s.storer.Report(ctx, op.Chunk, storage.ChunkStored)
   267  		if err != nil {
   268  			loggerV1.Error(err, "pusher: failed reporting chunk")
   269  			return true, err
   270  		}
   271  	case errors.Is(err, pushsync.ErrShallowReceipt):
   272  		if retry := s.shallowReceipt(receipt); retry {
   273  			return true, err
   274  		}
   275  		if err := s.storer.Report(ctx, op.Chunk, storage.ChunkSynced); err != nil {
   276  			loggerV1.Error(err, "pusher: failed to report sync status")
   277  			return true, err
   278  		}
   279  	case err == nil:
   280  		if err := s.storer.Report(ctx, op.Chunk, storage.ChunkSynced); err != nil {
   281  			loggerV1.Error(err, "pusher: failed to report sync status")
   282  			return true, err
   283  		}
   284  	default:
   285  		loggerV1.Error(err, "pusher: failed PushChunkToClosest")
   286  		return true, err
   287  	}
   288  
   289  	return false, nil
   290  }
   291  
   292  func (s *Service) pushDirect(ctx context.Context, logger log.Logger, op *Op) error {
   293  	loggerV1 := logger.V(1).Build()
   294  
   295  	var err error
   296  
   297  	defer func() {
   298  		s.inflight.delete(op.Chunk)
   299  		select {
   300  		case op.Err <- err:
   301  		default:
   302  			loggerV1.Error(err, "pusher: failed to return error for direct upload")
   303  		}
   304  	}()
   305  
   306  	_, err = s.validStamp(op.Chunk)
   307  	if err != nil {
   308  		logger.Warning(
   309  			"stamp with is no longer valid, skipping direct upload for chunk",
   310  			"batch_id", hex.EncodeToString(op.Chunk.Stamp().BatchID()),
   311  			"chunk_address", op.Chunk.Address(),
   312  			"error", err,
   313  		)
   314  		return err
   315  	}
   316  
   317  	switch _, err = s.pushSyncer.PushChunkToClosest(ctx, op.Chunk); {
   318  	case errors.Is(err, topology.ErrWantSelf):
   319  		// store the chunk
   320  		loggerV1.Debug("chunk stays here, i'm the closest node", "chunk_address", op.Chunk.Address())
   321  		err = s.storer.ReservePutter().Put(ctx, op.Chunk)
   322  		if err != nil {
   323  			loggerV1.Error(err, "pusher: failed to store chunk")
   324  		}
   325  	case err != nil:
   326  		loggerV1.Error(err, "pusher: failed PushChunkToClosest")
   327  	}
   328  
   329  	return err
   330  }
   331  
   332  func (s *Service) shallowReceipt(receipt *pushsync.Receipt) bool {
   333  	if s.attempts.try(receipt.Address) {
   334  		return true
   335  	}
   336  	s.attempts.delete(receipt.Address)
   337  	return false
   338  }
   339  
   340  func (s *Service) AddFeed(c <-chan *Op) {
   341  	go func() {
   342  		select {
   343  		case s.smuggler <- c:
   344  			s.logger.Info("got a chunk being smuggled")
   345  		case <-s.quit:
   346  		}
   347  	}()
   348  }
   349  
   350  func (s *Service) Close() error {
   351  	s.logger.Info("pusher shutting down")
   352  	close(s.quit)
   353  
   354  	// Wait for chunks worker to finish
   355  	select {
   356  	case <-s.chunksWorkerQuitC:
   357  	case <-time.After(6 * time.Second):
   358  	}
   359  	return nil
   360  }