github.com/status-im/status-go@v1.1.0/peers/peerpool.go (about)

     1  package peers
     2  
     3  import (
     4  	"crypto/ecdsa"
     5  	"errors"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/ethereum/go-ethereum/event"
    10  	"github.com/ethereum/go-ethereum/log"
    11  	"github.com/ethereum/go-ethereum/p2p"
    12  	"github.com/ethereum/go-ethereum/p2p/discv5"
    13  	"github.com/ethereum/go-ethereum/p2p/enode"
    14  
    15  	"github.com/status-im/status-go/discovery"
    16  	"github.com/status-im/status-go/params"
    17  	"github.com/status-im/status-go/peers/verifier"
    18  	"github.com/status-im/status-go/signal"
    19  )
    20  
    21  var (
    22  	// ErrDiscv5NotRunning returned when pool is started but discover v5 is not running or not enabled.
    23  	ErrDiscv5NotRunning = errors.New("Discovery v5 is not running")
    24  )
    25  
    26  // PoolEvent is a type used to for peer pool events.
    27  type PoolEvent string
    28  
    29  const (
    30  	immediately = 0 * time.Minute
    31  	// expirationPeriod is an amount of time while peer is considered as a connectable
    32  	expirationPeriod = 60 * time.Minute
    33  	// discoveryRestartTimeout defines how often loop will try to start discovery server
    34  	discoveryRestartTimeout = 2 * time.Second
    35  	// DefaultFastSync is a recommended value for aggressive peers search.
    36  	DefaultFastSync = 3 * time.Second
    37  	// DefaultSlowSync is a recommended value for slow (background) peers search.
    38  	DefaultSlowSync = 30 * time.Second
    39  	// DefaultDiscV5Timeout is a timeout after which Discv5 is stopped.
    40  	DefaultDiscV5Timeout = 3 * time.Minute
    41  	// DefaultTopicFastModeTimeout is a timeout after which sync mode is switched to slow mode.
    42  	DefaultTopicFastModeTimeout = 30 * time.Second
    43  	// DefaultTopicStopSearchDelay is the default delay when stopping a topic search.
    44  	DefaultTopicStopSearchDelay = 10 * time.Second
    45  )
    46  
    47  // Options is a struct with PeerPool configuration.
    48  type Options struct {
    49  	FastSync time.Duration
    50  	SlowSync time.Duration
    51  	// After this time, Discovery is stopped even if max peers is not reached.
    52  	DiscServerTimeout time.Duration
    53  	// AllowStop allows stopping Discovery when reaching max peers or after timeout.
    54  	AllowStop bool
    55  	// TopicStopSearchDelay time stopSearch will be waiting for max cached peers to be
    56  	// filled before really stopping the search.
    57  	TopicStopSearchDelay time.Duration
    58  	// TrustedMailServers is a list of trusted nodes.
    59  	TrustedMailServers []enode.ID
    60  }
    61  
    62  // NewDefaultOptions returns a struct with default Options.
    63  func NewDefaultOptions() *Options {
    64  	return &Options{
    65  		FastSync:             DefaultFastSync,
    66  		SlowSync:             DefaultSlowSync,
    67  		DiscServerTimeout:    DefaultDiscV5Timeout,
    68  		AllowStop:            false,
    69  		TopicStopSearchDelay: DefaultTopicStopSearchDelay,
    70  	}
    71  }
    72  
    73  type peerInfo struct {
    74  	// discoveredTime last time when node was found by v5
    75  	discoveredTime time.Time
    76  	// dismissed is true when our node requested a disconnect
    77  	dismissed bool
    78  	// added is true when the node tries to add this peer to a server
    79  	added bool
    80  
    81  	node *discv5.Node
    82  	// store public key separately to make peerInfo more independent from discv5
    83  	publicKey *ecdsa.PublicKey
    84  }
    85  
    86  func (p *peerInfo) NodeID() enode.ID {
    87  	return enode.PubkeyToIDV4(p.publicKey)
    88  }
    89  
    90  // PeerPool manages discovered peers and connects them to p2p server
    91  type PeerPool struct {
    92  	opts *Options
    93  
    94  	discovery discovery.Discovery
    95  
    96  	// config can be set only once per pool life cycle
    97  	config map[discv5.Topic]params.Limits
    98  	cache  *Cache
    99  
   100  	mu                 sync.RWMutex
   101  	timeoutMu          sync.RWMutex
   102  	topics             []TopicPoolInterface
   103  	serverSubscription event.Subscription
   104  	events             chan *p2p.PeerEvent
   105  	quit               chan struct{}
   106  	wg                 sync.WaitGroup
   107  	timeout            <-chan time.Time
   108  	updateTopic        chan *updateTopicRequest
   109  }
   110  
   111  // NewPeerPool creates instance of PeerPool
   112  func NewPeerPool(discovery discovery.Discovery, config map[discv5.Topic]params.Limits, cache *Cache, options *Options) *PeerPool {
   113  	return &PeerPool{
   114  		opts:      options,
   115  		discovery: discovery,
   116  		config:    config,
   117  		cache:     cache,
   118  	}
   119  }
   120  
   121  func (p *PeerPool) setDiscoveryTimeout() {
   122  	p.timeoutMu.Lock()
   123  	defer p.timeoutMu.Unlock()
   124  	if p.opts.AllowStop && p.opts.DiscServerTimeout > 0 {
   125  		p.timeout = time.After(p.opts.DiscServerTimeout)
   126  	}
   127  }
   128  
   129  // Start creates topic pool for each topic in config and subscribes to server events.
   130  func (p *PeerPool) Start(server *p2p.Server) error {
   131  	if !p.discovery.Running() {
   132  		return ErrDiscv5NotRunning
   133  	}
   134  
   135  	p.mu.Lock()
   136  	defer p.mu.Unlock()
   137  
   138  	// init channels
   139  	p.quit = make(chan struct{})
   140  	p.updateTopic = make(chan *updateTopicRequest)
   141  	p.setDiscoveryTimeout()
   142  
   143  	// subscribe to peer events
   144  	p.events = make(chan *p2p.PeerEvent, 20)
   145  	p.serverSubscription = server.SubscribeEvents(p.events)
   146  	p.wg.Add(1)
   147  	go func() {
   148  		p.handleServerPeers(server, p.events)
   149  		p.wg.Done()
   150  	}()
   151  
   152  	// collect topics and start searching for nodes
   153  	p.topics = make([]TopicPoolInterface, 0, len(p.config))
   154  	for topic, limits := range p.config {
   155  		var topicPool TopicPoolInterface
   156  		t := newTopicPool(p.discovery, topic, limits, p.opts.SlowSync, p.opts.FastSync, p.cache)
   157  		if topic == MailServerDiscoveryTopic {
   158  			v, err := p.initVerifier()
   159  			if err != nil {
   160  				return err
   161  			}
   162  			topicPool = newCacheOnlyTopicPool(t, v)
   163  		} else {
   164  			topicPool = t
   165  		}
   166  		if err := topicPool.StartSearch(server); err != nil {
   167  			return err
   168  		}
   169  		p.topics = append(p.topics, topicPool)
   170  	}
   171  
   172  	// discovery must be already started when pool is started
   173  	signal.SendDiscoveryStarted()
   174  
   175  	return nil
   176  }
   177  
   178  func (p *PeerPool) initVerifier() (v Verifier, err error) {
   179  	return verifier.NewLocalVerifier(p.opts.TrustedMailServers), nil
   180  }
   181  
   182  func (p *PeerPool) startDiscovery() error {
   183  	if p.discovery.Running() {
   184  		return nil
   185  	}
   186  
   187  	if err := p.discovery.Start(); err != nil {
   188  		return err
   189  	}
   190  
   191  	p.mu.Lock()
   192  	p.setDiscoveryTimeout()
   193  	p.mu.Unlock()
   194  
   195  	signal.SendDiscoveryStarted()
   196  
   197  	return nil
   198  }
   199  
   200  func (p *PeerPool) stopDiscovery(server *p2p.Server) {
   201  	if !p.discovery.Running() {
   202  		return
   203  	}
   204  
   205  	if err := p.discovery.Stop(); err != nil {
   206  		log.Error("discovery errored when stopping", "err", err)
   207  	}
   208  	for _, t := range p.topics {
   209  		t.StopSearch(server)
   210  	}
   211  
   212  	p.timeoutMu.Lock()
   213  	p.timeout = nil
   214  	p.timeoutMu.Unlock()
   215  
   216  	signal.SendDiscoveryStopped()
   217  }
   218  
   219  // restartDiscovery and search for topics that have peer count below min
   220  func (p *PeerPool) restartDiscovery(server *p2p.Server) error {
   221  	if !p.discovery.Running() {
   222  		if err := p.startDiscovery(); err != nil {
   223  			return err
   224  		}
   225  		log.Debug("restarted discovery from peer pool")
   226  	}
   227  	for _, t := range p.topics {
   228  		if !t.BelowMin() || t.SearchRunning() {
   229  			continue
   230  		}
   231  		err := t.StartSearch(server)
   232  		if err != nil {
   233  			log.Error("search failed to start", "error", err)
   234  		}
   235  	}
   236  	return nil
   237  }
   238  
   239  // handleServerPeers watches server peer events, notifies topic pools about changes
   240  // in the peer set and stops the discv5 if all topic pools collected enough peers.
   241  //
   242  // @TODO(adam): split it into peers and discovery management loops. This should
   243  // simplify the whole logic and allow to remove `timeout` field from `PeerPool`.
   244  func (p *PeerPool) handleServerPeers(server *p2p.Server, events <-chan *p2p.PeerEvent) {
   245  	retryDiscv5 := make(chan struct{}, 1)
   246  	stopDiscv5 := make(chan struct{}, 1)
   247  
   248  	queueRetry := func(d time.Duration) {
   249  		go func() {
   250  			time.Sleep(d)
   251  			select {
   252  			case retryDiscv5 <- struct{}{}:
   253  			default:
   254  			}
   255  		}()
   256  
   257  	}
   258  
   259  	queueStop := func() {
   260  		go func() {
   261  			select {
   262  			case stopDiscv5 <- struct{}{}:
   263  			default:
   264  			}
   265  		}()
   266  
   267  	}
   268  
   269  	for {
   270  		// We use a separate lock for timeout, as this loop should
   271  		// always be running, otherwise the p2p.Server will hang.
   272  		// Because the handler of events might potentially hang on the
   273  		// server, deadlocking if this loop is waiting for the global lock.
   274  		// NOTE: this code probably needs to be refactored and simplified
   275  		// as it's difficult to follow the asynchronous nature of it.
   276  		p.timeoutMu.RLock()
   277  		timeout := p.timeout
   278  		p.timeoutMu.RUnlock()
   279  
   280  		select {
   281  		case <-p.quit:
   282  			log.Debug("stopping DiscV5 because of quit")
   283  			p.stopDiscovery(server)
   284  			return
   285  		case <-timeout:
   286  			log.Info("DiscV5 timed out")
   287  			p.stopDiscovery(server)
   288  		case <-retryDiscv5:
   289  			if err := p.restartDiscovery(server); err != nil {
   290  				log.Error("starting discv5 failed", "error", err, "retry", discoveryRestartTimeout)
   291  				queueRetry(discoveryRestartTimeout)
   292  			}
   293  		case <-stopDiscv5:
   294  			p.handleStopTopics(server)
   295  		case req := <-p.updateTopic:
   296  			if p.updateTopicLimits(server, req) == nil {
   297  				if !p.discovery.Running() {
   298  					queueRetry(immediately)
   299  				}
   300  			}
   301  		case event := <-events:
   302  			// NOTE: handlePeerEventType needs to be called asynchronously
   303  			// as it publishes on the <-events channel, leading to a deadlock
   304  			// if events channel is full.
   305  			go p.handlePeerEventType(server, event, queueRetry, queueStop)
   306  		}
   307  	}
   308  }
   309  
   310  func (p *PeerPool) handlePeerEventType(server *p2p.Server, event *p2p.PeerEvent, queueRetry func(time.Duration), queueStop func()) {
   311  	p.mu.Lock()
   312  	defer p.mu.Unlock()
   313  
   314  	var shouldRetry bool
   315  	var shouldStop bool
   316  	switch event.Type {
   317  	case p2p.PeerEventTypeDrop:
   318  		log.Debug("confirm peer dropped", "ID", event.Peer)
   319  		if p.handleDroppedPeer(server, event.Peer) {
   320  			shouldRetry = true
   321  		}
   322  	case p2p.PeerEventTypeAdd: // skip other events
   323  		log.Debug("confirm peer added", "ID", event.Peer)
   324  		p.handleAddedPeer(server, event.Peer)
   325  		shouldStop = true
   326  	default:
   327  		return
   328  	}
   329  
   330  	// First we send the discovery summary
   331  	SendDiscoverySummary(server.PeersInfo())
   332  
   333  	// then we send the stop event
   334  	if shouldRetry {
   335  		queueRetry(immediately)
   336  	} else if shouldStop {
   337  		queueStop()
   338  	}
   339  }
   340  
   341  // handleAddedPeer notifies all topics about added peer.
   342  func (p *PeerPool) handleAddedPeer(server *p2p.Server, nodeID enode.ID) {
   343  	for _, t := range p.topics {
   344  		t.ConfirmAdded(server, nodeID)
   345  		if p.opts.AllowStop && t.MaxReached() {
   346  			t.setStopSearchTimeout(p.opts.TopicStopSearchDelay)
   347  		}
   348  	}
   349  }
   350  
   351  // handleStopTopics stops the search on any topics having reached its max cached
   352  // limit or its delay stop is expired, additionally will stop discovery if all
   353  // peers are stopped.
   354  func (p *PeerPool) handleStopTopics(server *p2p.Server) {
   355  	if !p.opts.AllowStop {
   356  		return
   357  	}
   358  	for _, t := range p.topics {
   359  		if t.readyToStopSearch() {
   360  			t.StopSearch(server)
   361  		}
   362  	}
   363  	if p.allTopicsStopped() {
   364  		log.Debug("closing discv5 connection because all topics reached max limit")
   365  		p.stopDiscovery(server)
   366  	}
   367  }
   368  
   369  // allTopicsStopped returns true if all topics are stopped.
   370  func (p *PeerPool) allTopicsStopped() (all bool) {
   371  	if !p.opts.AllowStop {
   372  		return false
   373  	}
   374  	all = true
   375  	for _, t := range p.topics {
   376  		if !t.isStopped() {
   377  			all = false
   378  		}
   379  	}
   380  	return all
   381  }
   382  
   383  // handleDroppedPeer notifies every topic about dropped peer and returns true if any peer have connections
   384  // below min limit
   385  func (p *PeerPool) handleDroppedPeer(server *p2p.Server, nodeID enode.ID) (any bool) {
   386  	for _, t := range p.topics {
   387  		confirmed := t.ConfirmDropped(server, nodeID)
   388  		if confirmed {
   389  			newPeer := t.AddPeerFromTable(server)
   390  			if newPeer != nil {
   391  				log.Debug("added peer from local table", "ID", newPeer.ID)
   392  			}
   393  		}
   394  		log.Debug("search", "topic", t.Topic(), "below min", t.BelowMin())
   395  		if t.BelowMin() && !t.SearchRunning() {
   396  			any = true
   397  		}
   398  	}
   399  	return any
   400  }
   401  
   402  // Stop closes pool quit channel and all channels that are watched by search queries
   403  // and waits till all goroutines will exit.
   404  func (p *PeerPool) Stop() {
   405  	// pool wasn't started
   406  	if p.quit == nil {
   407  		return
   408  	}
   409  	select {
   410  	case <-p.quit:
   411  		return
   412  	default:
   413  		log.Debug("started closing peer pool")
   414  		close(p.quit)
   415  	}
   416  	p.serverSubscription.Unsubscribe()
   417  	p.wg.Wait()
   418  }
   419  
   420  type updateTopicRequest struct {
   421  	Topic  string
   422  	Limits params.Limits
   423  }
   424  
   425  // UpdateTopic updates the pre-existing TopicPool limits.
   426  func (p *PeerPool) UpdateTopic(topic string, limits params.Limits) error {
   427  	if _, err := p.getTopic(topic); err != nil {
   428  		return err
   429  	}
   430  
   431  	p.updateTopic <- &updateTopicRequest{
   432  		Topic:  topic,
   433  		Limits: limits,
   434  	}
   435  
   436  	return nil
   437  }
   438  
   439  func (p *PeerPool) updateTopicLimits(server *p2p.Server, req *updateTopicRequest) error {
   440  	t, err := p.getTopic(req.Topic)
   441  	if err != nil {
   442  		return err
   443  	}
   444  	t.SetLimits(req.Limits)
   445  	return nil
   446  }
   447  
   448  func (p *PeerPool) getTopic(topic string) (TopicPoolInterface, error) {
   449  	for _, t := range p.topics {
   450  		if t.Topic() == discv5.Topic(topic) {
   451  			return t, nil
   452  		}
   453  	}
   454  	return nil, errors.New("topic not found")
   455  }