github.com/status-im/status-go@v1.1.0/peers/topicpool.go (about)

     1  package peers
     2  
     3  import (
     4  	"container/heap"
     5  	"sync"
     6  	"sync/atomic"
     7  	"time"
     8  
     9  	"github.com/ethereum/go-ethereum/log"
    10  	"github.com/ethereum/go-ethereum/p2p"
    11  	"github.com/ethereum/go-ethereum/p2p/discv5"
    12  	"github.com/ethereum/go-ethereum/p2p/enode"
    13  
    14  	"github.com/status-im/status-go/discovery"
    15  	"github.com/status-im/status-go/params"
    16  )
    17  
    18  const (
    19  	// notQueuedIndex used to define that item is not queued in the heap queue.
    20  	notQueuedIndex = -1
    21  )
    22  
    23  // maxCachedPeersMultiplier peers max limit will be multiplied by this number
    24  // to get the maximum number of cached peers allowed.
    25  var maxCachedPeersMultiplier = 1
    26  
    27  // maxPendingPeersMultiplier peers max limit will be multiplied by this number
    28  // to get the maximum number of pending peers allowed.
    29  var maxPendingPeersMultiplier = 2
    30  
    31  // TopicPoolInterface the TopicPool interface.
    32  type TopicPoolInterface interface {
    33  	StopSearch(server *p2p.Server)
    34  	BelowMin() bool
    35  	SearchRunning() bool
    36  	StartSearch(server *p2p.Server) error
    37  	ConfirmDropped(server *p2p.Server, nodeID enode.ID) bool
    38  	AddPeerFromTable(server *p2p.Server) *discv5.Node
    39  	MaxReached() bool
    40  	ConfirmAdded(server *p2p.Server, nodeID enode.ID)
    41  	isStopped() bool
    42  	Topic() discv5.Topic
    43  	SetLimits(limits params.Limits)
    44  	setStopSearchTimeout(delay time.Duration)
    45  	readyToStopSearch() bool
    46  }
    47  
    48  type Clock interface {
    49  	Now() time.Time
    50  }
    51  
    52  type realClock struct{}
    53  
    54  func (realClock) Now() time.Time { return time.Now() }
    55  
    56  // newTopicPool returns instance of TopicPool.
    57  func newTopicPool(discovery discovery.Discovery, topic discv5.Topic, limits params.Limits, slowMode, fastMode time.Duration, cache *Cache) *TopicPool {
    58  	pool := TopicPool{
    59  		discovery:            discovery,
    60  		topic:                topic,
    61  		limits:               limits,
    62  		fastMode:             fastMode,
    63  		slowMode:             slowMode,
    64  		fastModeTimeout:      DefaultTopicFastModeTimeout,
    65  		pendingPeers:         make(map[enode.ID]*peerInfoItem),
    66  		discoveredPeersQueue: make(peerPriorityQueue, 0),
    67  		discoveredPeers:      make(map[enode.ID]bool),
    68  		connectedPeers:       make(map[enode.ID]*peerInfo),
    69  		cache:                cache,
    70  		maxCachedPeers:       limits.Max * maxCachedPeersMultiplier,
    71  		maxPendingPeers:      limits.Max * maxPendingPeersMultiplier,
    72  		clock:                realClock{},
    73  	}
    74  	heap.Init(&pool.discoveredPeersQueue)
    75  
    76  	return &pool
    77  }
    78  
    79  // TopicPool manages peers for topic.
    80  type TopicPool struct {
    81  	discovery discovery.Discovery
    82  
    83  	// configuration
    84  	topic           discv5.Topic
    85  	limits          params.Limits
    86  	fastMode        time.Duration
    87  	slowMode        time.Duration
    88  	fastModeTimeout time.Duration
    89  
    90  	mu     sync.RWMutex
    91  	discWG sync.WaitGroup
    92  	poolWG sync.WaitGroup
    93  	quit   chan struct{}
    94  
    95  	running int32
    96  
    97  	currentMode           time.Duration
    98  	period                chan time.Duration
    99  	fastModeTimeoutCancel chan struct{}
   100  
   101  	pendingPeers         map[enode.ID]*peerInfoItem // contains found and requested to be connected peers but not confirmed
   102  	discoveredPeersQueue peerPriorityQueue          // priority queue to find the most recently discovered peers; does not containt peers requested to connect
   103  	discoveredPeers      map[enode.ID]bool          // remembers which peers have already been discovered and are enqueued
   104  	connectedPeers       map[enode.ID]*peerInfo     // currently connected peers
   105  
   106  	stopSearchTimeout *time.Time
   107  
   108  	maxPendingPeers int
   109  	maxCachedPeers  int
   110  	cache           *Cache
   111  
   112  	clock Clock
   113  }
   114  
   115  func (t *TopicPool) addToPendingPeers(peer *peerInfo) {
   116  	if _, ok := t.pendingPeers[peer.NodeID()]; ok {
   117  		return
   118  	}
   119  	t.pendingPeers[peer.NodeID()] = &peerInfoItem{
   120  		peerInfo: peer,
   121  		index:    notQueuedIndex,
   122  	}
   123  
   124  	// maxPendingPeers = 0 means no limits.
   125  	if t.maxPendingPeers == 0 || t.maxPendingPeers >= len(t.pendingPeers) {
   126  		return
   127  	}
   128  
   129  	var oldestPeer *peerInfo
   130  	for _, i := range t.pendingPeers {
   131  		if oldestPeer != nil && oldestPeer.discoveredTime.Before(i.peerInfo.discoveredTime) {
   132  			continue
   133  		}
   134  
   135  		oldestPeer = i.peerInfo
   136  	}
   137  
   138  	t.removeFromPendingPeers(oldestPeer.NodeID())
   139  }
   140  
   141  // addToQueue adds the passed peer to the queue if it is already pending.
   142  func (t *TopicPool) addToQueue(peer *peerInfo) {
   143  	if p, ok := t.pendingPeers[peer.NodeID()]; ok {
   144  		if _, ok := t.discoveredPeers[peer.NodeID()]; ok {
   145  			return
   146  		}
   147  
   148  		heap.Push(&t.discoveredPeersQueue, p)
   149  		t.discoveredPeers[peer.NodeID()] = true
   150  	}
   151  }
   152  
   153  func (t *TopicPool) popFromQueue() *peerInfo {
   154  	if t.discoveredPeersQueue.Len() == 0 {
   155  		return nil
   156  	}
   157  	item := heap.Pop(&t.discoveredPeersQueue).(*peerInfoItem)
   158  	item.index = notQueuedIndex
   159  	delete(t.discoveredPeers, item.peerInfo.NodeID())
   160  	return item.peerInfo
   161  }
   162  
   163  func (t *TopicPool) removeFromPendingPeers(nodeID enode.ID) {
   164  	peer, ok := t.pendingPeers[nodeID]
   165  	if !ok {
   166  		return
   167  	}
   168  	delete(t.pendingPeers, nodeID)
   169  	if peer.index != notQueuedIndex {
   170  		heap.Remove(&t.discoveredPeersQueue, peer.index)
   171  		delete(t.discoveredPeers, nodeID)
   172  	}
   173  }
   174  
   175  func (t *TopicPool) updatePendingPeer(nodeID enode.ID) {
   176  	peer, ok := t.pendingPeers[nodeID]
   177  	if !ok {
   178  		return
   179  	}
   180  	peer.discoveredTime = t.clock.Now()
   181  	if peer.index != notQueuedIndex {
   182  		heap.Fix(&t.discoveredPeersQueue, peer.index)
   183  	}
   184  }
   185  
   186  func (t *TopicPool) movePeerFromPoolToConnected(nodeID enode.ID) {
   187  	peer, ok := t.pendingPeers[nodeID]
   188  	if !ok {
   189  		return
   190  	}
   191  	t.removeFromPendingPeers(nodeID)
   192  	t.connectedPeers[nodeID] = peer.peerInfo
   193  }
   194  
   195  // SearchRunning returns true if search is running
   196  func (t *TopicPool) SearchRunning() bool {
   197  	return atomic.LoadInt32(&t.running) == 1
   198  }
   199  
   200  // MaxReached returns true if we connected with max number of peers.
   201  func (t *TopicPool) MaxReached() bool {
   202  	t.mu.RLock()
   203  	defer t.mu.RUnlock()
   204  	return len(t.connectedPeers) == t.limits.Max
   205  }
   206  
   207  // BelowMin returns true if current number of peers is below min limit.
   208  func (t *TopicPool) BelowMin() bool {
   209  	t.mu.RLock()
   210  	defer t.mu.RUnlock()
   211  	return len(t.connectedPeers) < t.limits.Min
   212  }
   213  
   214  // maxCachedPeersReached returns true if max number of cached peers is reached.
   215  func (t *TopicPool) maxCachedPeersReached() bool {
   216  	if t.maxCachedPeers == 0 {
   217  		return true
   218  	}
   219  	peers := t.cache.GetPeersRange(t.topic, t.maxCachedPeers)
   220  
   221  	return len(peers) >= t.maxCachedPeers
   222  }
   223  
   224  // setStopSearchTimeout sets the timeout to stop current topic search if it's not
   225  // been stopped before.
   226  func (t *TopicPool) setStopSearchTimeout(delay time.Duration) {
   227  	if t.stopSearchTimeout != nil {
   228  		return
   229  	}
   230  	now := t.clock.Now().Add(delay)
   231  	t.stopSearchTimeout = &now
   232  }
   233  
   234  // isStopSearchDelayExpired returns true if the timeout to stop current topic
   235  // search has been accomplished.
   236  func (t *TopicPool) isStopSearchDelayExpired() bool {
   237  	if t.stopSearchTimeout == nil {
   238  		return false
   239  	}
   240  	return t.stopSearchTimeout.Before(t.clock.Now())
   241  }
   242  
   243  // readyToStopSearch return true if all conditions to stop search are ok.
   244  func (t *TopicPool) readyToStopSearch() bool {
   245  	return t.isStopSearchDelayExpired() || t.maxCachedPeersReached()
   246  }
   247  
   248  // updateSyncMode changes the sync mode depending on the current number
   249  // of connected peers and limits.
   250  func (t *TopicPool) updateSyncMode() {
   251  	newMode := t.slowMode
   252  	if len(t.connectedPeers) < t.limits.Min {
   253  		newMode = t.fastMode
   254  	}
   255  	t.setSyncMode(newMode)
   256  }
   257  
   258  func (t *TopicPool) setSyncMode(mode time.Duration) {
   259  	if mode == t.currentMode {
   260  		return
   261  	}
   262  
   263  	t.period <- mode
   264  	t.currentMode = mode
   265  
   266  	// if selected mode is fast mode and fast mode timeout was not set yet,
   267  	// do it now
   268  	if mode == t.fastMode && t.fastModeTimeoutCancel == nil {
   269  		t.fastModeTimeoutCancel = t.limitFastMode(t.fastModeTimeout)
   270  	}
   271  	// remove fast mode timeout as slow mode is selected now
   272  	if mode == t.slowMode && t.fastModeTimeoutCancel != nil {
   273  		close(t.fastModeTimeoutCancel)
   274  		t.fastModeTimeoutCancel = nil
   275  	}
   276  }
   277  
   278  func (t *TopicPool) limitFastMode(timeout time.Duration) chan struct{} {
   279  	if timeout == 0 {
   280  		return nil
   281  	}
   282  
   283  	cancel := make(chan struct{})
   284  
   285  	t.poolWG.Add(1)
   286  	go func() {
   287  		defer t.poolWG.Done()
   288  
   289  		select {
   290  		case <-time.After(timeout):
   291  			t.mu.Lock()
   292  			t.setSyncMode(t.slowMode)
   293  			t.mu.Unlock()
   294  		case <-cancel:
   295  			return
   296  		}
   297  	}()
   298  
   299  	return cancel
   300  }
   301  
   302  // ConfirmAdded called when peer was added by p2p Server.
   303  //  1. Skip a peer if it not in our peer table
   304  //  2. Add a peer to a cache.
   305  //  3. Disconnect a peer if it was connected after we reached max limit of peers.
   306  //     (we can't know in advance if peer will be connected, thats why we allow
   307  //     to overflow for short duration)
   308  //  4. Switch search to slow mode if it is running.
   309  func (t *TopicPool) ConfirmAdded(server *p2p.Server, nodeID enode.ID) {
   310  	t.mu.Lock()
   311  	defer t.mu.Unlock()
   312  
   313  	peerInfoItem, ok := t.pendingPeers[nodeID]
   314  	inbound := !ok || !peerInfoItem.added
   315  
   316  	log.Debug("peer added event", "peer", nodeID.String(), "inbound", inbound)
   317  
   318  	if inbound {
   319  		return
   320  	}
   321  
   322  	peer := peerInfoItem.peerInfo // get explicit reference
   323  
   324  	// established connection means that the node
   325  	// is a viable candidate for a connection and can be cached
   326  	if err := t.cache.AddPeer(peer.node, t.topic); err != nil {
   327  		log.Error("failed to persist a peer", "error", err)
   328  	}
   329  
   330  	t.movePeerFromPoolToConnected(nodeID)
   331  	// if the upper limit is already reached, drop this peer
   332  	if len(t.connectedPeers) > t.limits.Max {
   333  		log.Debug("max limit is reached drop the peer", "ID", nodeID, "topic", t.topic)
   334  		peer.dismissed = true
   335  		t.removeServerPeer(server, peer)
   336  		return
   337  	}
   338  
   339  	// make sure `dismissed` is reset
   340  	peer.dismissed = false
   341  
   342  	// A peer was added so check if we can switch to slow mode.
   343  	if t.SearchRunning() {
   344  		t.updateSyncMode()
   345  	}
   346  }
   347  
   348  // ConfirmDropped called when server receives drop event.
   349  // 1. Skip peer if it is not in our peer table.
   350  // 2. If disconnect request - we could drop that peer ourselves.
   351  // 3. If connected number will drop below min limit - switch to fast mode.
   352  // 4. Delete a peer from cache and peer table.
   353  // Returns false if peer is not in our table or we requested removal of this peer.
   354  // Otherwise peer is removed and true is returned.
   355  func (t *TopicPool) ConfirmDropped(server *p2p.Server, nodeID enode.ID) bool {
   356  	t.mu.Lock()
   357  	defer t.mu.Unlock()
   358  
   359  	// either inbound or connected from another topic
   360  	peer, exist := t.connectedPeers[nodeID]
   361  	if !exist {
   362  		return false
   363  	}
   364  
   365  	log.Debug("disconnect", "ID", nodeID, "dismissed", peer.dismissed)
   366  
   367  	delete(t.connectedPeers, nodeID)
   368  	// Peer was removed by us because exceeded the limit.
   369  	// Add it back to the pool as it can be useful in the future.
   370  	if peer.dismissed {
   371  		t.addToPendingPeers(peer)
   372  		// use queue for peers that weren't added to p2p server
   373  		t.addToQueue(peer)
   374  		return false
   375  	}
   376  
   377  	// If there was a network error, this event will be received
   378  	// but the peer won't be removed from the static nodes set.
   379  	// That's why we need to call `removeServerPeer` manually.
   380  	t.removeServerPeer(server, peer)
   381  
   382  	if err := t.cache.RemovePeer(nodeID, t.topic); err != nil {
   383  		log.Error("failed to remove peer from cache", "error", err)
   384  	}
   385  
   386  	// As we removed a peer, update a sync strategy if needed.
   387  	if t.SearchRunning() {
   388  		t.updateSyncMode()
   389  	}
   390  
   391  	return true
   392  }
   393  
   394  // AddPeerFromTable checks if there is a valid peer in local table and adds it to a server.
   395  func (t *TopicPool) AddPeerFromTable(server *p2p.Server) *discv5.Node {
   396  	t.mu.RLock()
   397  	defer t.mu.RUnlock()
   398  
   399  	// The most recently added peer is removed from the queue.
   400  	// If it did not expire yet, it will be added to the server.
   401  	// TODO(adam): investigate if it's worth to keep the peer in the queue
   402  	// until the server confirms it is added and in the meanwhile only adjust its priority.
   403  	peer := t.popFromQueue()
   404  	if peer != nil && t.clock.Now().Before(peer.discoveredTime.Add(expirationPeriod)) {
   405  		t.addServerPeer(server, peer)
   406  		return peer.node
   407  	}
   408  
   409  	return nil
   410  }
   411  
   412  // StartSearch creates discv5 queries and runs a loop to consume found peers.
   413  func (t *TopicPool) StartSearch(server *p2p.Server) error {
   414  	if atomic.LoadInt32(&t.running) == 1 {
   415  		return nil
   416  	}
   417  	if !t.discovery.Running() {
   418  		return ErrDiscv5NotRunning
   419  	}
   420  	atomic.StoreInt32(&t.running, 1)
   421  
   422  	t.mu.Lock()
   423  	defer t.mu.Unlock()
   424  
   425  	t.quit = make(chan struct{})
   426  	t.stopSearchTimeout = nil
   427  
   428  	// `period` is used to notify about the current sync mode.
   429  	t.period = make(chan time.Duration, 2)
   430  	// use fast sync mode at the beginning
   431  	t.setSyncMode(t.fastMode)
   432  
   433  	// peers management
   434  	found := make(chan *discv5.Node, 5) // 5 reasonable number for concurrently found nodes
   435  	lookup := make(chan bool, 10)       // sufficiently buffered channel, just prevents blocking because of lookup
   436  
   437  	for _, peer := range t.cache.GetPeersRange(t.topic, 5) {
   438  		log.Debug("adding a peer from cache", "peer", peer)
   439  		found <- peer
   440  	}
   441  
   442  	t.discWG.Add(1)
   443  	go func() {
   444  		if err := t.discovery.Discover(string(t.topic), t.period, found, lookup); err != nil {
   445  			log.Error("error searching foro", "topic", t.topic, "err", err)
   446  		}
   447  		t.discWG.Done()
   448  	}()
   449  	t.poolWG.Add(1)
   450  	go func() {
   451  		t.handleFoundPeers(server, found, lookup)
   452  		t.poolWG.Done()
   453  	}()
   454  
   455  	return nil
   456  }
   457  
   458  func (t *TopicPool) handleFoundPeers(server *p2p.Server, found <-chan *discv5.Node, lookup <-chan bool) {
   459  	selfID := discv5.PubkeyID(server.Self().Pubkey())
   460  	for {
   461  		select {
   462  		case <-t.quit:
   463  			return
   464  		case <-lookup:
   465  		case node := <-found:
   466  			if node.ID == selfID {
   467  				continue
   468  			}
   469  			if err := t.processFoundNode(server, node); err != nil {
   470  				log.Error("failed to process found node", "node", node, "error", err)
   471  			}
   472  		}
   473  	}
   474  }
   475  
   476  // processFoundNode called when node is discovered by kademlia search query
   477  // 2 important conditions
   478  //  1. every time when node is processed we need to update discoveredTime.
   479  //     peer will be considered as valid later only if it was discovered < 60m ago
   480  //  2. if peer is connected or if max limit is reached we are not a adding peer to p2p server
   481  func (t *TopicPool) processFoundNode(server *p2p.Server, node *discv5.Node) error {
   482  	t.mu.Lock()
   483  	defer t.mu.Unlock()
   484  
   485  	pk, err := node.ID.Pubkey()
   486  	if err != nil {
   487  		return err
   488  	}
   489  
   490  	nodeID := enode.PubkeyToIDV4(pk)
   491  
   492  	log.Debug("peer found", "ID", nodeID, "topic", t.topic)
   493  
   494  	// peer is already connected so update only discoveredTime
   495  	if peer, ok := t.connectedPeers[nodeID]; ok {
   496  		peer.discoveredTime = t.clock.Now()
   497  		return nil
   498  	}
   499  
   500  	if _, ok := t.pendingPeers[nodeID]; ok {
   501  		t.updatePendingPeer(nodeID)
   502  	} else {
   503  		t.addToPendingPeers(&peerInfo{
   504  			discoveredTime: t.clock.Now(),
   505  			node:           node,
   506  			publicKey:      pk,
   507  		})
   508  	}
   509  	log.Debug(
   510  		"adding peer to a server", "peer", node.ID.String(),
   511  		"connected", len(t.connectedPeers), "max", t.maxCachedPeers)
   512  
   513  	// This can happen when the monotonic clock is not precise enough and
   514  	// multiple peers gets added at the same clock time, resulting in all
   515  	// of them having the same discoveredTime.
   516  	// At which point a random peer will be removed, sometimes being the
   517  	// peer we just added.
   518  	// We could make sure that the latest added peer is not removed,
   519  	// but this is simpler, and peers will be fresh enough as resolution
   520  	// should be quite high (ms at least).
   521  	// This has been reported on windows builds
   522  	// only https://github.com/status-im/nim-status-client/issues/522
   523  	if t.pendingPeers[nodeID] == nil {
   524  		log.Debug("peer added has just been removed", "peer", nodeID)
   525  		return nil
   526  	}
   527  
   528  	// the upper limit is not reached, so let's add this peer
   529  	if len(t.connectedPeers) < t.maxCachedPeers {
   530  		t.addServerPeer(server, t.pendingPeers[nodeID].peerInfo)
   531  	} else {
   532  		t.addToQueue(t.pendingPeers[nodeID].peerInfo)
   533  	}
   534  
   535  	return nil
   536  }
   537  
   538  func (t *TopicPool) addServerPeer(server *p2p.Server, info *peerInfo) {
   539  	info.added = true
   540  	n := enode.NewV4(info.publicKey, info.node.IP, int(info.node.TCP), int(info.node.UDP))
   541  	server.AddPeer(n)
   542  }
   543  
   544  func (t *TopicPool) removeServerPeer(server *p2p.Server, info *peerInfo) {
   545  	info.added = false
   546  	n := enode.NewV4(info.publicKey, info.node.IP, int(info.node.TCP), int(info.node.UDP))
   547  	server.RemovePeer(n)
   548  }
   549  
   550  func (t *TopicPool) isStopped() bool {
   551  	t.mu.Lock()
   552  	defer t.mu.Unlock()
   553  	return t.currentMode == 0
   554  }
   555  
   556  // StopSearch stops the closes stop
   557  func (t *TopicPool) StopSearch(server *p2p.Server) {
   558  	if !atomic.CompareAndSwapInt32(&t.running, 1, 0) {
   559  		return
   560  	}
   561  	if t.quit == nil {
   562  		return
   563  	}
   564  	select {
   565  	case <-t.quit:
   566  		return
   567  	default:
   568  	}
   569  	log.Debug("stoping search", "topic", t.topic)
   570  	close(t.quit)
   571  	t.mu.Lock()
   572  	if t.fastModeTimeoutCancel != nil {
   573  		close(t.fastModeTimeoutCancel)
   574  		t.fastModeTimeoutCancel = nil
   575  	}
   576  	t.currentMode = 0
   577  	t.mu.Unlock()
   578  	// wait for poolWG to exit because it writes to period channel
   579  	t.poolWG.Wait()
   580  	close(t.period)
   581  	t.discWG.Wait()
   582  }
   583  
   584  // Topic exposes the internal discovery topic.
   585  func (t *TopicPool) Topic() discv5.Topic {
   586  	return t.topic
   587  }
   588  
   589  // SetLimits set the limits for the current TopicPool.
   590  func (t *TopicPool) SetLimits(limits params.Limits) {
   591  	t.mu.Lock()
   592  	defer t.mu.Unlock()
   593  
   594  	t.limits = limits
   595  }