github.com/status-im/status-go@v1.1.0/peers/peerpool.go (about) 1 package peers 2 3 import ( 4 "crypto/ecdsa" 5 "errors" 6 "sync" 7 "time" 8 9 "github.com/ethereum/go-ethereum/event" 10 "github.com/ethereum/go-ethereum/log" 11 "github.com/ethereum/go-ethereum/p2p" 12 "github.com/ethereum/go-ethereum/p2p/discv5" 13 "github.com/ethereum/go-ethereum/p2p/enode" 14 15 "github.com/status-im/status-go/discovery" 16 "github.com/status-im/status-go/params" 17 "github.com/status-im/status-go/peers/verifier" 18 "github.com/status-im/status-go/signal" 19 ) 20 21 var ( 22 // ErrDiscv5NotRunning returned when pool is started but discover v5 is not running or not enabled. 23 ErrDiscv5NotRunning = errors.New("Discovery v5 is not running") 24 ) 25 26 // PoolEvent is a type used to for peer pool events. 27 type PoolEvent string 28 29 const ( 30 immediately = 0 * time.Minute 31 // expirationPeriod is an amount of time while peer is considered as a connectable 32 expirationPeriod = 60 * time.Minute 33 // discoveryRestartTimeout defines how often loop will try to start discovery server 34 discoveryRestartTimeout = 2 * time.Second 35 // DefaultFastSync is a recommended value for aggressive peers search. 36 DefaultFastSync = 3 * time.Second 37 // DefaultSlowSync is a recommended value for slow (background) peers search. 38 DefaultSlowSync = 30 * time.Second 39 // DefaultDiscV5Timeout is a timeout after which Discv5 is stopped. 40 DefaultDiscV5Timeout = 3 * time.Minute 41 // DefaultTopicFastModeTimeout is a timeout after which sync mode is switched to slow mode. 42 DefaultTopicFastModeTimeout = 30 * time.Second 43 // DefaultTopicStopSearchDelay is the default delay when stopping a topic search. 44 DefaultTopicStopSearchDelay = 10 * time.Second 45 ) 46 47 // Options is a struct with PeerPool configuration. 48 type Options struct { 49 FastSync time.Duration 50 SlowSync time.Duration 51 // After this time, Discovery is stopped even if max peers is not reached. 52 DiscServerTimeout time.Duration 53 // AllowStop allows stopping Discovery when reaching max peers or after timeout. 54 AllowStop bool 55 // TopicStopSearchDelay time stopSearch will be waiting for max cached peers to be 56 // filled before really stopping the search. 57 TopicStopSearchDelay time.Duration 58 // TrustedMailServers is a list of trusted nodes. 59 TrustedMailServers []enode.ID 60 } 61 62 // NewDefaultOptions returns a struct with default Options. 63 func NewDefaultOptions() *Options { 64 return &Options{ 65 FastSync: DefaultFastSync, 66 SlowSync: DefaultSlowSync, 67 DiscServerTimeout: DefaultDiscV5Timeout, 68 AllowStop: false, 69 TopicStopSearchDelay: DefaultTopicStopSearchDelay, 70 } 71 } 72 73 type peerInfo struct { 74 // discoveredTime last time when node was found by v5 75 discoveredTime time.Time 76 // dismissed is true when our node requested a disconnect 77 dismissed bool 78 // added is true when the node tries to add this peer to a server 79 added bool 80 81 node *discv5.Node 82 // store public key separately to make peerInfo more independent from discv5 83 publicKey *ecdsa.PublicKey 84 } 85 86 func (p *peerInfo) NodeID() enode.ID { 87 return enode.PubkeyToIDV4(p.publicKey) 88 } 89 90 // PeerPool manages discovered peers and connects them to p2p server 91 type PeerPool struct { 92 opts *Options 93 94 discovery discovery.Discovery 95 96 // config can be set only once per pool life cycle 97 config map[discv5.Topic]params.Limits 98 cache *Cache 99 100 mu sync.RWMutex 101 timeoutMu sync.RWMutex 102 topics []TopicPoolInterface 103 serverSubscription event.Subscription 104 events chan *p2p.PeerEvent 105 quit chan struct{} 106 wg sync.WaitGroup 107 timeout <-chan time.Time 108 updateTopic chan *updateTopicRequest 109 } 110 111 // NewPeerPool creates instance of PeerPool 112 func NewPeerPool(discovery discovery.Discovery, config map[discv5.Topic]params.Limits, cache *Cache, options *Options) *PeerPool { 113 return &PeerPool{ 114 opts: options, 115 discovery: discovery, 116 config: config, 117 cache: cache, 118 } 119 } 120 121 func (p *PeerPool) setDiscoveryTimeout() { 122 p.timeoutMu.Lock() 123 defer p.timeoutMu.Unlock() 124 if p.opts.AllowStop && p.opts.DiscServerTimeout > 0 { 125 p.timeout = time.After(p.opts.DiscServerTimeout) 126 } 127 } 128 129 // Start creates topic pool for each topic in config and subscribes to server events. 130 func (p *PeerPool) Start(server *p2p.Server) error { 131 if !p.discovery.Running() { 132 return ErrDiscv5NotRunning 133 } 134 135 p.mu.Lock() 136 defer p.mu.Unlock() 137 138 // init channels 139 p.quit = make(chan struct{}) 140 p.updateTopic = make(chan *updateTopicRequest) 141 p.setDiscoveryTimeout() 142 143 // subscribe to peer events 144 p.events = make(chan *p2p.PeerEvent, 20) 145 p.serverSubscription = server.SubscribeEvents(p.events) 146 p.wg.Add(1) 147 go func() { 148 p.handleServerPeers(server, p.events) 149 p.wg.Done() 150 }() 151 152 // collect topics and start searching for nodes 153 p.topics = make([]TopicPoolInterface, 0, len(p.config)) 154 for topic, limits := range p.config { 155 var topicPool TopicPoolInterface 156 t := newTopicPool(p.discovery, topic, limits, p.opts.SlowSync, p.opts.FastSync, p.cache) 157 if topic == MailServerDiscoveryTopic { 158 v, err := p.initVerifier() 159 if err != nil { 160 return err 161 } 162 topicPool = newCacheOnlyTopicPool(t, v) 163 } else { 164 topicPool = t 165 } 166 if err := topicPool.StartSearch(server); err != nil { 167 return err 168 } 169 p.topics = append(p.topics, topicPool) 170 } 171 172 // discovery must be already started when pool is started 173 signal.SendDiscoveryStarted() 174 175 return nil 176 } 177 178 func (p *PeerPool) initVerifier() (v Verifier, err error) { 179 return verifier.NewLocalVerifier(p.opts.TrustedMailServers), nil 180 } 181 182 func (p *PeerPool) startDiscovery() error { 183 if p.discovery.Running() { 184 return nil 185 } 186 187 if err := p.discovery.Start(); err != nil { 188 return err 189 } 190 191 p.mu.Lock() 192 p.setDiscoveryTimeout() 193 p.mu.Unlock() 194 195 signal.SendDiscoveryStarted() 196 197 return nil 198 } 199 200 func (p *PeerPool) stopDiscovery(server *p2p.Server) { 201 if !p.discovery.Running() { 202 return 203 } 204 205 if err := p.discovery.Stop(); err != nil { 206 log.Error("discovery errored when stopping", "err", err) 207 } 208 for _, t := range p.topics { 209 t.StopSearch(server) 210 } 211 212 p.timeoutMu.Lock() 213 p.timeout = nil 214 p.timeoutMu.Unlock() 215 216 signal.SendDiscoveryStopped() 217 } 218 219 // restartDiscovery and search for topics that have peer count below min 220 func (p *PeerPool) restartDiscovery(server *p2p.Server) error { 221 if !p.discovery.Running() { 222 if err := p.startDiscovery(); err != nil { 223 return err 224 } 225 log.Debug("restarted discovery from peer pool") 226 } 227 for _, t := range p.topics { 228 if !t.BelowMin() || t.SearchRunning() { 229 continue 230 } 231 err := t.StartSearch(server) 232 if err != nil { 233 log.Error("search failed to start", "error", err) 234 } 235 } 236 return nil 237 } 238 239 // handleServerPeers watches server peer events, notifies topic pools about changes 240 // in the peer set and stops the discv5 if all topic pools collected enough peers. 241 // 242 // @TODO(adam): split it into peers and discovery management loops. This should 243 // simplify the whole logic and allow to remove `timeout` field from `PeerPool`. 244 func (p *PeerPool) handleServerPeers(server *p2p.Server, events <-chan *p2p.PeerEvent) { 245 retryDiscv5 := make(chan struct{}, 1) 246 stopDiscv5 := make(chan struct{}, 1) 247 248 queueRetry := func(d time.Duration) { 249 go func() { 250 time.Sleep(d) 251 select { 252 case retryDiscv5 <- struct{}{}: 253 default: 254 } 255 }() 256 257 } 258 259 queueStop := func() { 260 go func() { 261 select { 262 case stopDiscv5 <- struct{}{}: 263 default: 264 } 265 }() 266 267 } 268 269 for { 270 // We use a separate lock for timeout, as this loop should 271 // always be running, otherwise the p2p.Server will hang. 272 // Because the handler of events might potentially hang on the 273 // server, deadlocking if this loop is waiting for the global lock. 274 // NOTE: this code probably needs to be refactored and simplified 275 // as it's difficult to follow the asynchronous nature of it. 276 p.timeoutMu.RLock() 277 timeout := p.timeout 278 p.timeoutMu.RUnlock() 279 280 select { 281 case <-p.quit: 282 log.Debug("stopping DiscV5 because of quit") 283 p.stopDiscovery(server) 284 return 285 case <-timeout: 286 log.Info("DiscV5 timed out") 287 p.stopDiscovery(server) 288 case <-retryDiscv5: 289 if err := p.restartDiscovery(server); err != nil { 290 log.Error("starting discv5 failed", "error", err, "retry", discoveryRestartTimeout) 291 queueRetry(discoveryRestartTimeout) 292 } 293 case <-stopDiscv5: 294 p.handleStopTopics(server) 295 case req := <-p.updateTopic: 296 if p.updateTopicLimits(server, req) == nil { 297 if !p.discovery.Running() { 298 queueRetry(immediately) 299 } 300 } 301 case event := <-events: 302 // NOTE: handlePeerEventType needs to be called asynchronously 303 // as it publishes on the <-events channel, leading to a deadlock 304 // if events channel is full. 305 go p.handlePeerEventType(server, event, queueRetry, queueStop) 306 } 307 } 308 } 309 310 func (p *PeerPool) handlePeerEventType(server *p2p.Server, event *p2p.PeerEvent, queueRetry func(time.Duration), queueStop func()) { 311 p.mu.Lock() 312 defer p.mu.Unlock() 313 314 var shouldRetry bool 315 var shouldStop bool 316 switch event.Type { 317 case p2p.PeerEventTypeDrop: 318 log.Debug("confirm peer dropped", "ID", event.Peer) 319 if p.handleDroppedPeer(server, event.Peer) { 320 shouldRetry = true 321 } 322 case p2p.PeerEventTypeAdd: // skip other events 323 log.Debug("confirm peer added", "ID", event.Peer) 324 p.handleAddedPeer(server, event.Peer) 325 shouldStop = true 326 default: 327 return 328 } 329 330 // First we send the discovery summary 331 SendDiscoverySummary(server.PeersInfo()) 332 333 // then we send the stop event 334 if shouldRetry { 335 queueRetry(immediately) 336 } else if shouldStop { 337 queueStop() 338 } 339 } 340 341 // handleAddedPeer notifies all topics about added peer. 342 func (p *PeerPool) handleAddedPeer(server *p2p.Server, nodeID enode.ID) { 343 for _, t := range p.topics { 344 t.ConfirmAdded(server, nodeID) 345 if p.opts.AllowStop && t.MaxReached() { 346 t.setStopSearchTimeout(p.opts.TopicStopSearchDelay) 347 } 348 } 349 } 350 351 // handleStopTopics stops the search on any topics having reached its max cached 352 // limit or its delay stop is expired, additionally will stop discovery if all 353 // peers are stopped. 354 func (p *PeerPool) handleStopTopics(server *p2p.Server) { 355 if !p.opts.AllowStop { 356 return 357 } 358 for _, t := range p.topics { 359 if t.readyToStopSearch() { 360 t.StopSearch(server) 361 } 362 } 363 if p.allTopicsStopped() { 364 log.Debug("closing discv5 connection because all topics reached max limit") 365 p.stopDiscovery(server) 366 } 367 } 368 369 // allTopicsStopped returns true if all topics are stopped. 370 func (p *PeerPool) allTopicsStopped() (all bool) { 371 if !p.opts.AllowStop { 372 return false 373 } 374 all = true 375 for _, t := range p.topics { 376 if !t.isStopped() { 377 all = false 378 } 379 } 380 return all 381 } 382 383 // handleDroppedPeer notifies every topic about dropped peer and returns true if any peer have connections 384 // below min limit 385 func (p *PeerPool) handleDroppedPeer(server *p2p.Server, nodeID enode.ID) (any bool) { 386 for _, t := range p.topics { 387 confirmed := t.ConfirmDropped(server, nodeID) 388 if confirmed { 389 newPeer := t.AddPeerFromTable(server) 390 if newPeer != nil { 391 log.Debug("added peer from local table", "ID", newPeer.ID) 392 } 393 } 394 log.Debug("search", "topic", t.Topic(), "below min", t.BelowMin()) 395 if t.BelowMin() && !t.SearchRunning() { 396 any = true 397 } 398 } 399 return any 400 } 401 402 // Stop closes pool quit channel and all channels that are watched by search queries 403 // and waits till all goroutines will exit. 404 func (p *PeerPool) Stop() { 405 // pool wasn't started 406 if p.quit == nil { 407 return 408 } 409 select { 410 case <-p.quit: 411 return 412 default: 413 log.Debug("started closing peer pool") 414 close(p.quit) 415 } 416 p.serverSubscription.Unsubscribe() 417 p.wg.Wait() 418 } 419 420 type updateTopicRequest struct { 421 Topic string 422 Limits params.Limits 423 } 424 425 // UpdateTopic updates the pre-existing TopicPool limits. 426 func (p *PeerPool) UpdateTopic(topic string, limits params.Limits) error { 427 if _, err := p.getTopic(topic); err != nil { 428 return err 429 } 430 431 p.updateTopic <- &updateTopicRequest{ 432 Topic: topic, 433 Limits: limits, 434 } 435 436 return nil 437 } 438 439 func (p *PeerPool) updateTopicLimits(server *p2p.Server, req *updateTopicRequest) error { 440 t, err := p.getTopic(req.Topic) 441 if err != nil { 442 return err 443 } 444 t.SetLimits(req.Limits) 445 return nil 446 } 447 448 func (p *PeerPool) getTopic(topic string) (TopicPoolInterface, error) { 449 for _, t := range p.topics { 450 if t.Topic() == discv5.Topic(topic) { 451 return t, nil 452 } 453 } 454 return nil, errors.New("topic not found") 455 }