github.com/status-im/status-go@v1.1.0/peers/topicpool.go (about) 1 package peers 2 3 import ( 4 "container/heap" 5 "sync" 6 "sync/atomic" 7 "time" 8 9 "github.com/ethereum/go-ethereum/log" 10 "github.com/ethereum/go-ethereum/p2p" 11 "github.com/ethereum/go-ethereum/p2p/discv5" 12 "github.com/ethereum/go-ethereum/p2p/enode" 13 14 "github.com/status-im/status-go/discovery" 15 "github.com/status-im/status-go/params" 16 ) 17 18 const ( 19 // notQueuedIndex used to define that item is not queued in the heap queue. 20 notQueuedIndex = -1 21 ) 22 23 // maxCachedPeersMultiplier peers max limit will be multiplied by this number 24 // to get the maximum number of cached peers allowed. 25 var maxCachedPeersMultiplier = 1 26 27 // maxPendingPeersMultiplier peers max limit will be multiplied by this number 28 // to get the maximum number of pending peers allowed. 29 var maxPendingPeersMultiplier = 2 30 31 // TopicPoolInterface the TopicPool interface. 32 type TopicPoolInterface interface { 33 StopSearch(server *p2p.Server) 34 BelowMin() bool 35 SearchRunning() bool 36 StartSearch(server *p2p.Server) error 37 ConfirmDropped(server *p2p.Server, nodeID enode.ID) bool 38 AddPeerFromTable(server *p2p.Server) *discv5.Node 39 MaxReached() bool 40 ConfirmAdded(server *p2p.Server, nodeID enode.ID) 41 isStopped() bool 42 Topic() discv5.Topic 43 SetLimits(limits params.Limits) 44 setStopSearchTimeout(delay time.Duration) 45 readyToStopSearch() bool 46 } 47 48 type Clock interface { 49 Now() time.Time 50 } 51 52 type realClock struct{} 53 54 func (realClock) Now() time.Time { return time.Now() } 55 56 // newTopicPool returns instance of TopicPool. 57 func newTopicPool(discovery discovery.Discovery, topic discv5.Topic, limits params.Limits, slowMode, fastMode time.Duration, cache *Cache) *TopicPool { 58 pool := TopicPool{ 59 discovery: discovery, 60 topic: topic, 61 limits: limits, 62 fastMode: fastMode, 63 slowMode: slowMode, 64 fastModeTimeout: DefaultTopicFastModeTimeout, 65 pendingPeers: make(map[enode.ID]*peerInfoItem), 66 discoveredPeersQueue: make(peerPriorityQueue, 0), 67 discoveredPeers: make(map[enode.ID]bool), 68 connectedPeers: make(map[enode.ID]*peerInfo), 69 cache: cache, 70 maxCachedPeers: limits.Max * maxCachedPeersMultiplier, 71 maxPendingPeers: limits.Max * maxPendingPeersMultiplier, 72 clock: realClock{}, 73 } 74 heap.Init(&pool.discoveredPeersQueue) 75 76 return &pool 77 } 78 79 // TopicPool manages peers for topic. 80 type TopicPool struct { 81 discovery discovery.Discovery 82 83 // configuration 84 topic discv5.Topic 85 limits params.Limits 86 fastMode time.Duration 87 slowMode time.Duration 88 fastModeTimeout time.Duration 89 90 mu sync.RWMutex 91 discWG sync.WaitGroup 92 poolWG sync.WaitGroup 93 quit chan struct{} 94 95 running int32 96 97 currentMode time.Duration 98 period chan time.Duration 99 fastModeTimeoutCancel chan struct{} 100 101 pendingPeers map[enode.ID]*peerInfoItem // contains found and requested to be connected peers but not confirmed 102 discoveredPeersQueue peerPriorityQueue // priority queue to find the most recently discovered peers; does not containt peers requested to connect 103 discoveredPeers map[enode.ID]bool // remembers which peers have already been discovered and are enqueued 104 connectedPeers map[enode.ID]*peerInfo // currently connected peers 105 106 stopSearchTimeout *time.Time 107 108 maxPendingPeers int 109 maxCachedPeers int 110 cache *Cache 111 112 clock Clock 113 } 114 115 func (t *TopicPool) addToPendingPeers(peer *peerInfo) { 116 if _, ok := t.pendingPeers[peer.NodeID()]; ok { 117 return 118 } 119 t.pendingPeers[peer.NodeID()] = &peerInfoItem{ 120 peerInfo: peer, 121 index: notQueuedIndex, 122 } 123 124 // maxPendingPeers = 0 means no limits. 125 if t.maxPendingPeers == 0 || t.maxPendingPeers >= len(t.pendingPeers) { 126 return 127 } 128 129 var oldestPeer *peerInfo 130 for _, i := range t.pendingPeers { 131 if oldestPeer != nil && oldestPeer.discoveredTime.Before(i.peerInfo.discoveredTime) { 132 continue 133 } 134 135 oldestPeer = i.peerInfo 136 } 137 138 t.removeFromPendingPeers(oldestPeer.NodeID()) 139 } 140 141 // addToQueue adds the passed peer to the queue if it is already pending. 142 func (t *TopicPool) addToQueue(peer *peerInfo) { 143 if p, ok := t.pendingPeers[peer.NodeID()]; ok { 144 if _, ok := t.discoveredPeers[peer.NodeID()]; ok { 145 return 146 } 147 148 heap.Push(&t.discoveredPeersQueue, p) 149 t.discoveredPeers[peer.NodeID()] = true 150 } 151 } 152 153 func (t *TopicPool) popFromQueue() *peerInfo { 154 if t.discoveredPeersQueue.Len() == 0 { 155 return nil 156 } 157 item := heap.Pop(&t.discoveredPeersQueue).(*peerInfoItem) 158 item.index = notQueuedIndex 159 delete(t.discoveredPeers, item.peerInfo.NodeID()) 160 return item.peerInfo 161 } 162 163 func (t *TopicPool) removeFromPendingPeers(nodeID enode.ID) { 164 peer, ok := t.pendingPeers[nodeID] 165 if !ok { 166 return 167 } 168 delete(t.pendingPeers, nodeID) 169 if peer.index != notQueuedIndex { 170 heap.Remove(&t.discoveredPeersQueue, peer.index) 171 delete(t.discoveredPeers, nodeID) 172 } 173 } 174 175 func (t *TopicPool) updatePendingPeer(nodeID enode.ID) { 176 peer, ok := t.pendingPeers[nodeID] 177 if !ok { 178 return 179 } 180 peer.discoveredTime = t.clock.Now() 181 if peer.index != notQueuedIndex { 182 heap.Fix(&t.discoveredPeersQueue, peer.index) 183 } 184 } 185 186 func (t *TopicPool) movePeerFromPoolToConnected(nodeID enode.ID) { 187 peer, ok := t.pendingPeers[nodeID] 188 if !ok { 189 return 190 } 191 t.removeFromPendingPeers(nodeID) 192 t.connectedPeers[nodeID] = peer.peerInfo 193 } 194 195 // SearchRunning returns true if search is running 196 func (t *TopicPool) SearchRunning() bool { 197 return atomic.LoadInt32(&t.running) == 1 198 } 199 200 // MaxReached returns true if we connected with max number of peers. 201 func (t *TopicPool) MaxReached() bool { 202 t.mu.RLock() 203 defer t.mu.RUnlock() 204 return len(t.connectedPeers) == t.limits.Max 205 } 206 207 // BelowMin returns true if current number of peers is below min limit. 208 func (t *TopicPool) BelowMin() bool { 209 t.mu.RLock() 210 defer t.mu.RUnlock() 211 return len(t.connectedPeers) < t.limits.Min 212 } 213 214 // maxCachedPeersReached returns true if max number of cached peers is reached. 215 func (t *TopicPool) maxCachedPeersReached() bool { 216 if t.maxCachedPeers == 0 { 217 return true 218 } 219 peers := t.cache.GetPeersRange(t.topic, t.maxCachedPeers) 220 221 return len(peers) >= t.maxCachedPeers 222 } 223 224 // setStopSearchTimeout sets the timeout to stop current topic search if it's not 225 // been stopped before. 226 func (t *TopicPool) setStopSearchTimeout(delay time.Duration) { 227 if t.stopSearchTimeout != nil { 228 return 229 } 230 now := t.clock.Now().Add(delay) 231 t.stopSearchTimeout = &now 232 } 233 234 // isStopSearchDelayExpired returns true if the timeout to stop current topic 235 // search has been accomplished. 236 func (t *TopicPool) isStopSearchDelayExpired() bool { 237 if t.stopSearchTimeout == nil { 238 return false 239 } 240 return t.stopSearchTimeout.Before(t.clock.Now()) 241 } 242 243 // readyToStopSearch return true if all conditions to stop search are ok. 244 func (t *TopicPool) readyToStopSearch() bool { 245 return t.isStopSearchDelayExpired() || t.maxCachedPeersReached() 246 } 247 248 // updateSyncMode changes the sync mode depending on the current number 249 // of connected peers and limits. 250 func (t *TopicPool) updateSyncMode() { 251 newMode := t.slowMode 252 if len(t.connectedPeers) < t.limits.Min { 253 newMode = t.fastMode 254 } 255 t.setSyncMode(newMode) 256 } 257 258 func (t *TopicPool) setSyncMode(mode time.Duration) { 259 if mode == t.currentMode { 260 return 261 } 262 263 t.period <- mode 264 t.currentMode = mode 265 266 // if selected mode is fast mode and fast mode timeout was not set yet, 267 // do it now 268 if mode == t.fastMode && t.fastModeTimeoutCancel == nil { 269 t.fastModeTimeoutCancel = t.limitFastMode(t.fastModeTimeout) 270 } 271 // remove fast mode timeout as slow mode is selected now 272 if mode == t.slowMode && t.fastModeTimeoutCancel != nil { 273 close(t.fastModeTimeoutCancel) 274 t.fastModeTimeoutCancel = nil 275 } 276 } 277 278 func (t *TopicPool) limitFastMode(timeout time.Duration) chan struct{} { 279 if timeout == 0 { 280 return nil 281 } 282 283 cancel := make(chan struct{}) 284 285 t.poolWG.Add(1) 286 go func() { 287 defer t.poolWG.Done() 288 289 select { 290 case <-time.After(timeout): 291 t.mu.Lock() 292 t.setSyncMode(t.slowMode) 293 t.mu.Unlock() 294 case <-cancel: 295 return 296 } 297 }() 298 299 return cancel 300 } 301 302 // ConfirmAdded called when peer was added by p2p Server. 303 // 1. Skip a peer if it not in our peer table 304 // 2. Add a peer to a cache. 305 // 3. Disconnect a peer if it was connected after we reached max limit of peers. 306 // (we can't know in advance if peer will be connected, thats why we allow 307 // to overflow for short duration) 308 // 4. Switch search to slow mode if it is running. 309 func (t *TopicPool) ConfirmAdded(server *p2p.Server, nodeID enode.ID) { 310 t.mu.Lock() 311 defer t.mu.Unlock() 312 313 peerInfoItem, ok := t.pendingPeers[nodeID] 314 inbound := !ok || !peerInfoItem.added 315 316 log.Debug("peer added event", "peer", nodeID.String(), "inbound", inbound) 317 318 if inbound { 319 return 320 } 321 322 peer := peerInfoItem.peerInfo // get explicit reference 323 324 // established connection means that the node 325 // is a viable candidate for a connection and can be cached 326 if err := t.cache.AddPeer(peer.node, t.topic); err != nil { 327 log.Error("failed to persist a peer", "error", err) 328 } 329 330 t.movePeerFromPoolToConnected(nodeID) 331 // if the upper limit is already reached, drop this peer 332 if len(t.connectedPeers) > t.limits.Max { 333 log.Debug("max limit is reached drop the peer", "ID", nodeID, "topic", t.topic) 334 peer.dismissed = true 335 t.removeServerPeer(server, peer) 336 return 337 } 338 339 // make sure `dismissed` is reset 340 peer.dismissed = false 341 342 // A peer was added so check if we can switch to slow mode. 343 if t.SearchRunning() { 344 t.updateSyncMode() 345 } 346 } 347 348 // ConfirmDropped called when server receives drop event. 349 // 1. Skip peer if it is not in our peer table. 350 // 2. If disconnect request - we could drop that peer ourselves. 351 // 3. If connected number will drop below min limit - switch to fast mode. 352 // 4. Delete a peer from cache and peer table. 353 // Returns false if peer is not in our table or we requested removal of this peer. 354 // Otherwise peer is removed and true is returned. 355 func (t *TopicPool) ConfirmDropped(server *p2p.Server, nodeID enode.ID) bool { 356 t.mu.Lock() 357 defer t.mu.Unlock() 358 359 // either inbound or connected from another topic 360 peer, exist := t.connectedPeers[nodeID] 361 if !exist { 362 return false 363 } 364 365 log.Debug("disconnect", "ID", nodeID, "dismissed", peer.dismissed) 366 367 delete(t.connectedPeers, nodeID) 368 // Peer was removed by us because exceeded the limit. 369 // Add it back to the pool as it can be useful in the future. 370 if peer.dismissed { 371 t.addToPendingPeers(peer) 372 // use queue for peers that weren't added to p2p server 373 t.addToQueue(peer) 374 return false 375 } 376 377 // If there was a network error, this event will be received 378 // but the peer won't be removed from the static nodes set. 379 // That's why we need to call `removeServerPeer` manually. 380 t.removeServerPeer(server, peer) 381 382 if err := t.cache.RemovePeer(nodeID, t.topic); err != nil { 383 log.Error("failed to remove peer from cache", "error", err) 384 } 385 386 // As we removed a peer, update a sync strategy if needed. 387 if t.SearchRunning() { 388 t.updateSyncMode() 389 } 390 391 return true 392 } 393 394 // AddPeerFromTable checks if there is a valid peer in local table and adds it to a server. 395 func (t *TopicPool) AddPeerFromTable(server *p2p.Server) *discv5.Node { 396 t.mu.RLock() 397 defer t.mu.RUnlock() 398 399 // The most recently added peer is removed from the queue. 400 // If it did not expire yet, it will be added to the server. 401 // TODO(adam): investigate if it's worth to keep the peer in the queue 402 // until the server confirms it is added and in the meanwhile only adjust its priority. 403 peer := t.popFromQueue() 404 if peer != nil && t.clock.Now().Before(peer.discoveredTime.Add(expirationPeriod)) { 405 t.addServerPeer(server, peer) 406 return peer.node 407 } 408 409 return nil 410 } 411 412 // StartSearch creates discv5 queries and runs a loop to consume found peers. 413 func (t *TopicPool) StartSearch(server *p2p.Server) error { 414 if atomic.LoadInt32(&t.running) == 1 { 415 return nil 416 } 417 if !t.discovery.Running() { 418 return ErrDiscv5NotRunning 419 } 420 atomic.StoreInt32(&t.running, 1) 421 422 t.mu.Lock() 423 defer t.mu.Unlock() 424 425 t.quit = make(chan struct{}) 426 t.stopSearchTimeout = nil 427 428 // `period` is used to notify about the current sync mode. 429 t.period = make(chan time.Duration, 2) 430 // use fast sync mode at the beginning 431 t.setSyncMode(t.fastMode) 432 433 // peers management 434 found := make(chan *discv5.Node, 5) // 5 reasonable number for concurrently found nodes 435 lookup := make(chan bool, 10) // sufficiently buffered channel, just prevents blocking because of lookup 436 437 for _, peer := range t.cache.GetPeersRange(t.topic, 5) { 438 log.Debug("adding a peer from cache", "peer", peer) 439 found <- peer 440 } 441 442 t.discWG.Add(1) 443 go func() { 444 if err := t.discovery.Discover(string(t.topic), t.period, found, lookup); err != nil { 445 log.Error("error searching foro", "topic", t.topic, "err", err) 446 } 447 t.discWG.Done() 448 }() 449 t.poolWG.Add(1) 450 go func() { 451 t.handleFoundPeers(server, found, lookup) 452 t.poolWG.Done() 453 }() 454 455 return nil 456 } 457 458 func (t *TopicPool) handleFoundPeers(server *p2p.Server, found <-chan *discv5.Node, lookup <-chan bool) { 459 selfID := discv5.PubkeyID(server.Self().Pubkey()) 460 for { 461 select { 462 case <-t.quit: 463 return 464 case <-lookup: 465 case node := <-found: 466 if node.ID == selfID { 467 continue 468 } 469 if err := t.processFoundNode(server, node); err != nil { 470 log.Error("failed to process found node", "node", node, "error", err) 471 } 472 } 473 } 474 } 475 476 // processFoundNode called when node is discovered by kademlia search query 477 // 2 important conditions 478 // 1. every time when node is processed we need to update discoveredTime. 479 // peer will be considered as valid later only if it was discovered < 60m ago 480 // 2. if peer is connected or if max limit is reached we are not a adding peer to p2p server 481 func (t *TopicPool) processFoundNode(server *p2p.Server, node *discv5.Node) error { 482 t.mu.Lock() 483 defer t.mu.Unlock() 484 485 pk, err := node.ID.Pubkey() 486 if err != nil { 487 return err 488 } 489 490 nodeID := enode.PubkeyToIDV4(pk) 491 492 log.Debug("peer found", "ID", nodeID, "topic", t.topic) 493 494 // peer is already connected so update only discoveredTime 495 if peer, ok := t.connectedPeers[nodeID]; ok { 496 peer.discoveredTime = t.clock.Now() 497 return nil 498 } 499 500 if _, ok := t.pendingPeers[nodeID]; ok { 501 t.updatePendingPeer(nodeID) 502 } else { 503 t.addToPendingPeers(&peerInfo{ 504 discoveredTime: t.clock.Now(), 505 node: node, 506 publicKey: pk, 507 }) 508 } 509 log.Debug( 510 "adding peer to a server", "peer", node.ID.String(), 511 "connected", len(t.connectedPeers), "max", t.maxCachedPeers) 512 513 // This can happen when the monotonic clock is not precise enough and 514 // multiple peers gets added at the same clock time, resulting in all 515 // of them having the same discoveredTime. 516 // At which point a random peer will be removed, sometimes being the 517 // peer we just added. 518 // We could make sure that the latest added peer is not removed, 519 // but this is simpler, and peers will be fresh enough as resolution 520 // should be quite high (ms at least). 521 // This has been reported on windows builds 522 // only https://github.com/status-im/nim-status-client/issues/522 523 if t.pendingPeers[nodeID] == nil { 524 log.Debug("peer added has just been removed", "peer", nodeID) 525 return nil 526 } 527 528 // the upper limit is not reached, so let's add this peer 529 if len(t.connectedPeers) < t.maxCachedPeers { 530 t.addServerPeer(server, t.pendingPeers[nodeID].peerInfo) 531 } else { 532 t.addToQueue(t.pendingPeers[nodeID].peerInfo) 533 } 534 535 return nil 536 } 537 538 func (t *TopicPool) addServerPeer(server *p2p.Server, info *peerInfo) { 539 info.added = true 540 n := enode.NewV4(info.publicKey, info.node.IP, int(info.node.TCP), int(info.node.UDP)) 541 server.AddPeer(n) 542 } 543 544 func (t *TopicPool) removeServerPeer(server *p2p.Server, info *peerInfo) { 545 info.added = false 546 n := enode.NewV4(info.publicKey, info.node.IP, int(info.node.TCP), int(info.node.UDP)) 547 server.RemovePeer(n) 548 } 549 550 func (t *TopicPool) isStopped() bool { 551 t.mu.Lock() 552 defer t.mu.Unlock() 553 return t.currentMode == 0 554 } 555 556 // StopSearch stops the closes stop 557 func (t *TopicPool) StopSearch(server *p2p.Server) { 558 if !atomic.CompareAndSwapInt32(&t.running, 1, 0) { 559 return 560 } 561 if t.quit == nil { 562 return 563 } 564 select { 565 case <-t.quit: 566 return 567 default: 568 } 569 log.Debug("stoping search", "topic", t.topic) 570 close(t.quit) 571 t.mu.Lock() 572 if t.fastModeTimeoutCancel != nil { 573 close(t.fastModeTimeoutCancel) 574 t.fastModeTimeoutCancel = nil 575 } 576 t.currentMode = 0 577 t.mu.Unlock() 578 // wait for poolWG to exit because it writes to period channel 579 t.poolWG.Wait() 580 close(t.period) 581 t.discWG.Wait() 582 } 583 584 // Topic exposes the internal discovery topic. 585 func (t *TopicPool) Topic() discv5.Topic { 586 return t.topic 587 } 588 589 // SetLimits set the limits for the current TopicPool. 590 func (t *TopicPool) SetLimits(limits params.Limits) { 591 t.mu.Lock() 592 defer t.mu.Unlock() 593 594 t.limits = limits 595 }