github.com/ethersphere/bee/v2@v2.2.0/pkg/p2p/libp2p/internal/reacher/reacher.go (about)

     1  // Copyright 2021 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package reacher runs a background worker that will ping peers
     6  // from an internal queue and report back the reachability to the notifier.
     7  package reacher
     8  
     9  import (
    10  	"context"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/ethersphere/bee/v2/pkg/p2p"
    15  	"github.com/ethersphere/bee/v2/pkg/swarm"
    16  	ma "github.com/multiformats/go-multiaddr"
    17  )
    18  
    19  const (
    20  	pingTimeout        = time.Second * 15
    21  	workers            = 8
    22  	retryAfterDuration = time.Minute * 5
    23  )
    24  
    25  type peer struct {
    26  	overlay    swarm.Address
    27  	addr       ma.Multiaddr
    28  	retryAfter time.Time
    29  }
    30  
    31  type reacher struct {
    32  	mu    sync.Mutex
    33  	peers map[string]*peer
    34  
    35  	work chan struct{}
    36  	quit chan struct{}
    37  
    38  	pinger   p2p.Pinger
    39  	notifier p2p.ReachableNotifier
    40  
    41  	wg      sync.WaitGroup
    42  	metrics metrics
    43  
    44  	options *Options
    45  }
    46  
    47  type Options struct {
    48  	PingTimeout        time.Duration
    49  	Workers            int
    50  	RetryAfterDuration time.Duration
    51  }
    52  
    53  func New(streamer p2p.Pinger, notifier p2p.ReachableNotifier, o *Options) *reacher {
    54  
    55  	r := &reacher{
    56  		work:     make(chan struct{}, 1),
    57  		quit:     make(chan struct{}),
    58  		pinger:   streamer,
    59  		peers:    make(map[string]*peer),
    60  		notifier: notifier,
    61  		metrics:  newMetrics(),
    62  	}
    63  
    64  	if o == nil {
    65  		o = &Options{
    66  			PingTimeout:        pingTimeout,
    67  			Workers:            workers,
    68  			RetryAfterDuration: retryAfterDuration,
    69  		}
    70  	}
    71  	r.options = o
    72  
    73  	r.wg.Add(1)
    74  	go r.manage()
    75  
    76  	return r
    77  }
    78  
    79  func (r *reacher) manage() {
    80  
    81  	defer r.wg.Done()
    82  
    83  	c := make(chan *peer)
    84  	defer close(c)
    85  
    86  	ctx, cancel := context.WithCancel(context.Background())
    87  	defer cancel()
    88  
    89  	r.wg.Add(r.options.Workers)
    90  	for i := 0; i < r.options.Workers; i++ {
    91  		go r.ping(c, ctx)
    92  	}
    93  
    94  	for {
    95  
    96  		p, tryAfter := r.tryAcquirePeer()
    97  
    98  		// if no peer is returned,
    99  		// wait until either more work or the closest retry-after time.
   100  
   101  		// wait for work and tryAfter
   102  		if tryAfter > 0 {
   103  			select {
   104  			case <-r.quit:
   105  				return
   106  			case <-r.work:
   107  				continue
   108  			case <-time.After(tryAfter):
   109  				continue
   110  			}
   111  		}
   112  
   113  		// wait for work
   114  		if p == nil {
   115  			select {
   116  			case <-r.quit:
   117  				return
   118  			case <-r.work:
   119  				continue
   120  			}
   121  		}
   122  
   123  		// send p to channel
   124  		select {
   125  		case <-r.quit:
   126  			return
   127  		case c <- p:
   128  		}
   129  	}
   130  }
   131  
   132  func (r *reacher) ping(c chan *peer, ctx context.Context) {
   133  
   134  	defer r.wg.Done()
   135  
   136  	for p := range c {
   137  
   138  		r.mu.Lock()
   139  		overlay := p.overlay
   140  		r.mu.Unlock()
   141  
   142  		now := time.Now()
   143  
   144  		ctxt, cancel := context.WithTimeout(ctx, r.options.PingTimeout)
   145  		_, err := r.pinger.Ping(ctxt, p.addr)
   146  		cancel()
   147  
   148  		// ping was successful
   149  		if err == nil {
   150  			r.metrics.Pings.WithLabelValues("success").Inc()
   151  			r.metrics.PingTime.WithLabelValues("success").Observe(time.Since(now).Seconds())
   152  			r.notifier.Reachable(overlay, p2p.ReachabilityStatusPublic)
   153  		} else {
   154  			r.metrics.Pings.WithLabelValues("failure").Inc()
   155  			r.metrics.PingTime.WithLabelValues("failure").Observe(time.Since(now).Seconds())
   156  			r.notifier.Reachable(overlay, p2p.ReachabilityStatusPrivate)
   157  		}
   158  
   159  		r.notifyManage()
   160  	}
   161  }
   162  
   163  func (r *reacher) tryAcquirePeer() (*peer, time.Duration) {
   164  	r.mu.Lock()
   165  	defer r.mu.Unlock()
   166  
   167  	var (
   168  		now         = time.Now()
   169  		nextClosest time.Time
   170  	)
   171  
   172  	for _, p := range r.peers {
   173  
   174  		// retry after has expired, retry
   175  		if now.After(p.retryAfter) {
   176  			p.retryAfter = time.Now().Add(r.options.RetryAfterDuration)
   177  			return p, 0
   178  		}
   179  
   180  		// here, we find the peer with the earliest retry after
   181  		if nextClosest.IsZero() || p.retryAfter.Before(nextClosest) {
   182  			nextClosest = p.retryAfter
   183  		}
   184  	}
   185  
   186  	if nextClosest.IsZero() {
   187  		return nil, 0
   188  	}
   189  
   190  	// return the time to wait until the closest retry after
   191  	return nil, time.Until(nextClosest)
   192  }
   193  
   194  func (r *reacher) notifyManage() {
   195  	select {
   196  	case r.work <- struct{}{}:
   197  	default:
   198  	}
   199  }
   200  
   201  // Connected adds a new peer to the queue for testing reachability.
   202  func (r *reacher) Connected(overlay swarm.Address, addr ma.Multiaddr) {
   203  	r.mu.Lock()
   204  	defer r.mu.Unlock()
   205  
   206  	if _, ok := r.peers[overlay.ByteString()]; !ok {
   207  		r.peers[overlay.ByteString()] = &peer{overlay: overlay, addr: addr}
   208  	}
   209  
   210  	r.notifyManage()
   211  }
   212  
   213  // Disconnected removes a peer from the queue.
   214  func (r *reacher) Disconnected(overlay swarm.Address) {
   215  	r.mu.Lock()
   216  	defer r.mu.Unlock()
   217  
   218  	delete(r.peers, overlay.ByteString())
   219  }
   220  
   221  // Close stops the worker. Must be called once.
   222  func (r *reacher) Close() error {
   223  	select {
   224  	case <-r.quit:
   225  		return nil
   226  	default:
   227  	}
   228  
   229  	close(r.quit)
   230  	r.wg.Wait()
   231  	return nil
   232  }