github.com/badrootd/nibiru-cometbft@v0.37.5-0.20240307173500-2a75559eee9b/p2p/switch.go (about) 1 package p2p 2 3 import ( 4 "fmt" 5 "math" 6 "sync" 7 "time" 8 9 "github.com/badrootd/nibiru-cometbft/config" 10 "github.com/badrootd/nibiru-cometbft/libs/cmap" 11 "github.com/badrootd/nibiru-cometbft/libs/rand" 12 "github.com/badrootd/nibiru-cometbft/libs/service" 13 "github.com/badrootd/nibiru-cometbft/p2p/conn" 14 "github.com/cosmos/gogoproto/proto" 15 ) 16 17 const ( 18 // wait a random amount of time from this interval 19 // before dialing peers or reconnecting to help prevent DoS 20 dialRandomizerIntervalMilliseconds = 3000 21 22 // repeatedly try to reconnect for a few minutes 23 // ie. 5 * 20 = 100s 24 reconnectAttempts = 20 25 reconnectInterval = 5 * time.Second 26 27 // then move into exponential backoff mode for ~1day 28 // ie. 3**10 = 16hrs 29 reconnectBackOffAttempts = 10 30 reconnectBackOffBaseSeconds = 3 31 ) 32 33 // MConnConfig returns an MConnConfig with fields updated 34 // from the P2PConfig. 35 func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig { 36 mConfig := conn.DefaultMConnConfig() 37 mConfig.FlushThrottle = cfg.FlushThrottleTimeout 38 mConfig.SendRate = cfg.SendRate 39 mConfig.RecvRate = cfg.RecvRate 40 mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize 41 return mConfig 42 } 43 44 //----------------------------------------------------------------------------- 45 46 // An AddrBook represents an address book from the pex package, which is used 47 // to store peer addresses. 48 type AddrBook interface { 49 AddAddress(addr *NetAddress, src *NetAddress) error 50 AddPrivateIDs([]string) 51 AddOurAddress(*NetAddress) 52 OurAddress(*NetAddress) bool 53 MarkGood(ID) 54 RemoveAddress(*NetAddress) 55 HasAddress(*NetAddress) bool 56 Save() 57 } 58 59 // PeerFilterFunc to be implemented by filter hooks after a new Peer has been 60 // fully setup. 61 type PeerFilterFunc func(IPeerSet, Peer) error 62 63 //----------------------------------------------------------------------------- 64 65 // Switch handles peer connections and exposes an API to receive incoming messages 66 // on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one 67 // or more `Channels`. So while sending outgoing messages is typically performed on the peer, 68 // incoming messages are received on the reactor. 69 type Switch struct { 70 service.BaseService 71 72 config *config.P2PConfig 73 reactors map[string]Reactor 74 chDescs []*conn.ChannelDescriptor 75 reactorsByCh map[byte]Reactor 76 msgTypeByChID map[byte]proto.Message 77 peers *PeerSet 78 dialing *cmap.CMap 79 reconnecting *cmap.CMap 80 nodeInfo NodeInfo // our node info 81 nodeKey *NodeKey // our node privkey 82 addrBook AddrBook 83 // peers addresses with whom we'll maintain constant connection 84 persistentPeersAddrs []*NetAddress 85 unconditionalPeerIDs map[ID]struct{} 86 87 transport Transport 88 89 filterTimeout time.Duration 90 peerFilters []PeerFilterFunc 91 92 rng *rand.Rand // seed for randomizing dial times and orders 93 94 metrics *Metrics 95 mlc *metricsLabelCache 96 } 97 98 // NetAddress returns the address the switch is listening on. 99 func (sw *Switch) NetAddress() *NetAddress { 100 addr := sw.transport.NetAddress() 101 return &addr 102 } 103 104 // SwitchOption sets an optional parameter on the Switch. 105 type SwitchOption func(*Switch) 106 107 // NewSwitch creates a new Switch with the given config. 108 func NewSwitch( 109 cfg *config.P2PConfig, 110 transport Transport, 111 options ...SwitchOption, 112 ) *Switch { 113 114 sw := &Switch{ 115 config: cfg, 116 reactors: make(map[string]Reactor), 117 chDescs: make([]*conn.ChannelDescriptor, 0), 118 reactorsByCh: make(map[byte]Reactor), 119 msgTypeByChID: make(map[byte]proto.Message), 120 peers: NewPeerSet(), 121 dialing: cmap.NewCMap(), 122 reconnecting: cmap.NewCMap(), 123 metrics: NopMetrics(), 124 transport: transport, 125 filterTimeout: defaultFilterTimeout, 126 persistentPeersAddrs: make([]*NetAddress, 0), 127 unconditionalPeerIDs: make(map[ID]struct{}), 128 mlc: newMetricsLabelCache(), 129 } 130 131 // Ensure we have a completely undeterministic PRNG. 132 sw.rng = rand.NewRand() 133 134 sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw) 135 136 for _, option := range options { 137 option(sw) 138 } 139 140 return sw 141 } 142 143 // SwitchFilterTimeout sets the timeout used for peer filters. 144 func SwitchFilterTimeout(timeout time.Duration) SwitchOption { 145 return func(sw *Switch) { sw.filterTimeout = timeout } 146 } 147 148 // SwitchPeerFilters sets the filters for rejection of new peers. 149 func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption { 150 return func(sw *Switch) { sw.peerFilters = filters } 151 } 152 153 // WithMetrics sets the metrics. 154 func WithMetrics(metrics *Metrics) SwitchOption { 155 return func(sw *Switch) { sw.metrics = metrics } 156 } 157 158 //--------------------------------------------------------------------- 159 // Switch setup 160 161 // AddReactor adds the given reactor to the switch. 162 // NOTE: Not goroutine safe. 163 func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor { 164 for _, chDesc := range reactor.GetChannels() { 165 chID := chDesc.ID 166 // No two reactors can share the same channel. 167 if sw.reactorsByCh[chID] != nil { 168 panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor)) 169 } 170 sw.chDescs = append(sw.chDescs, chDesc) 171 sw.reactorsByCh[chID] = reactor 172 sw.msgTypeByChID[chID] = chDesc.MessageType 173 } 174 sw.reactors[name] = reactor 175 reactor.SetSwitch(sw) 176 return reactor 177 } 178 179 // RemoveReactor removes the given Reactor from the Switch. 180 // NOTE: Not goroutine safe. 181 func (sw *Switch) RemoveReactor(name string, reactor Reactor) { 182 for _, chDesc := range reactor.GetChannels() { 183 // remove channel description 184 for i := 0; i < len(sw.chDescs); i++ { 185 if chDesc.ID == sw.chDescs[i].ID { 186 sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...) 187 break 188 } 189 } 190 delete(sw.reactorsByCh, chDesc.ID) 191 delete(sw.msgTypeByChID, chDesc.ID) 192 } 193 delete(sw.reactors, name) 194 reactor.SetSwitch(nil) 195 } 196 197 // Reactors returns a map of reactors registered on the switch. 198 // NOTE: Not goroutine safe. 199 func (sw *Switch) Reactors() map[string]Reactor { 200 return sw.reactors 201 } 202 203 // Reactor returns the reactor with the given name. 204 // NOTE: Not goroutine safe. 205 func (sw *Switch) Reactor(name string) Reactor { 206 return sw.reactors[name] 207 } 208 209 // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes. 210 // NOTE: Not goroutine safe. 211 func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) { 212 sw.nodeInfo = nodeInfo 213 } 214 215 // NodeInfo returns the switch's NodeInfo. 216 // NOTE: Not goroutine safe. 217 func (sw *Switch) NodeInfo() NodeInfo { 218 return sw.nodeInfo 219 } 220 221 // SetNodeKey sets the switch's private key for authenticated encryption. 222 // NOTE: Not goroutine safe. 223 func (sw *Switch) SetNodeKey(nodeKey *NodeKey) { 224 sw.nodeKey = nodeKey 225 } 226 227 //--------------------------------------------------------------------- 228 // Service start/stop 229 230 // OnStart implements BaseService. It starts all the reactors and peers. 231 func (sw *Switch) OnStart() error { 232 // Start reactors 233 for _, reactor := range sw.reactors { 234 err := reactor.Start() 235 if err != nil { 236 return fmt.Errorf("failed to start %v: %w", reactor, err) 237 } 238 } 239 240 // Start accepting Peers. 241 go sw.acceptRoutine() 242 243 return nil 244 } 245 246 // OnStop implements BaseService. It stops all peers and reactors. 247 func (sw *Switch) OnStop() { 248 // Stop peers 249 for _, p := range sw.peers.List() { 250 sw.stopAndRemovePeer(p, nil) 251 } 252 253 // Stop reactors 254 sw.Logger.Debug("Switch: Stopping reactors") 255 for _, reactor := range sw.reactors { 256 if err := reactor.Stop(); err != nil { 257 sw.Logger.Error("error while stopped reactor", "reactor", reactor, "error", err) 258 } 259 } 260 } 261 262 //--------------------------------------------------------------------- 263 // Peers 264 265 // BroadcastEnvelope runs a go routine for each attempted send, which will block trying 266 // to send for defaultSendTimeoutSeconds. Returns a channel which receives 267 // success values for each attempted send (false if times out). Channel will be 268 // closed once msg bytes are sent to all peers (or time out). 269 // BroadcastEnvelopes sends to the peers using the SendEnvelope method. 270 // 271 // NOTE: BroadcastEnvelope uses goroutines, so order of broadcast may not be preserved. 272 func (sw *Switch) BroadcastEnvelope(e Envelope) chan bool { 273 sw.Logger.Debug("Broadcast", "channel", e.ChannelID) 274 275 peers := sw.peers.List() 276 var wg sync.WaitGroup 277 wg.Add(len(peers)) 278 successChan := make(chan bool, len(peers)) 279 280 for _, peer := range peers { 281 go func(p Peer) { 282 defer wg.Done() 283 success := p.SendEnvelope(e) 284 successChan <- success 285 }(peer) 286 } 287 288 go func() { 289 wg.Wait() 290 close(successChan) 291 }() 292 293 return successChan 294 } 295 296 // NumPeers returns the count of outbound/inbound and outbound-dialing peers. 297 // unconditional peers are not counted here. 298 func (sw *Switch) NumPeers() (outbound, inbound, dialing int) { 299 peers := sw.peers.List() 300 for _, peer := range peers { 301 if peer.IsOutbound() { 302 if !sw.IsPeerUnconditional(peer.ID()) { 303 outbound++ 304 } 305 } else { 306 if !sw.IsPeerUnconditional(peer.ID()) { 307 inbound++ 308 } 309 } 310 } 311 dialing = sw.dialing.Size() 312 return 313 } 314 315 func (sw *Switch) IsPeerUnconditional(id ID) bool { 316 _, ok := sw.unconditionalPeerIDs[id] 317 return ok 318 } 319 320 // MaxNumOutboundPeers returns a maximum number of outbound peers. 321 func (sw *Switch) MaxNumOutboundPeers() int { 322 return sw.config.MaxNumOutboundPeers 323 } 324 325 // Peers returns the set of peers that are connected to the switch. 326 func (sw *Switch) Peers() IPeerSet { 327 return sw.peers 328 } 329 330 // StopPeerForError disconnects from a peer due to external error. 331 // If the peer is persistent, it will attempt to reconnect. 332 // TODO: make record depending on reason. 333 func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) { 334 if !peer.IsRunning() { 335 return 336 } 337 338 sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason) 339 sw.stopAndRemovePeer(peer, reason) 340 341 if peer.IsPersistent() { 342 var addr *NetAddress 343 if peer.IsOutbound() { // socket address for outbound peers 344 addr = peer.SocketAddr() 345 } else { // self-reported address for inbound peers 346 var err error 347 addr, err = peer.NodeInfo().NetAddress() 348 if err != nil { 349 sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong", 350 "peer", peer, "err", err) 351 return 352 } 353 } 354 go sw.reconnectToPeer(addr) 355 } 356 } 357 358 // StopPeerGracefully disconnects from a peer gracefully. 359 // TODO: handle graceful disconnects. 360 func (sw *Switch) StopPeerGracefully(peer Peer) { 361 sw.Logger.Info("Stopping peer gracefully") 362 sw.stopAndRemovePeer(peer, nil) 363 } 364 365 func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) { 366 sw.transport.Cleanup(peer) 367 if err := peer.Stop(); err != nil { 368 sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly 369 } 370 371 for _, reactor := range sw.reactors { 372 reactor.RemovePeer(peer, reason) 373 } 374 375 // Removing a peer should go last to avoid a situation where a peer 376 // reconnect to our node and the switch calls InitPeer before 377 // RemovePeer is finished. 378 // https://github.com/tendermint/tendermint/issues/3338 379 if sw.peers.Remove(peer) { 380 sw.metrics.Peers.Add(float64(-1)) 381 } else { 382 // Removal of the peer has failed. The function above sets a flag within the peer to mark this. 383 // We keep this message here as information to the developer. 384 sw.Logger.Debug("error on peer removal", ",", "peer", peer.ID()) 385 } 386 } 387 388 // reconnectToPeer tries to reconnect to the addr, first repeatedly 389 // with a fixed interval, then with exponential backoff. 390 // If no success after all that, it stops trying, and leaves it 391 // to the PEX/Addrbook to find the peer with the addr again 392 // NOTE: this will keep trying even if the handshake or auth fails. 393 // TODO: be more explicit with error types so we only retry on certain failures 394 // - ie. if we're getting ErrDuplicatePeer we can stop 395 // because the addrbook got us the peer back already 396 func (sw *Switch) reconnectToPeer(addr *NetAddress) { 397 if sw.reconnecting.Has(string(addr.ID)) { 398 return 399 } 400 sw.reconnecting.Set(string(addr.ID), addr) 401 defer sw.reconnecting.Delete(string(addr.ID)) 402 403 start := time.Now() 404 sw.Logger.Info("Reconnecting to peer", "addr", addr) 405 for i := 0; i < reconnectAttempts; i++ { 406 if !sw.IsRunning() { 407 return 408 } 409 410 err := sw.DialPeerWithAddress(addr) 411 if err == nil { 412 return // success 413 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 414 return 415 } 416 417 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 418 // sleep a set amount 419 sw.randomSleep(reconnectInterval) 420 continue 421 } 422 423 sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff", 424 "addr", addr, "elapsed", time.Since(start)) 425 for i := 0; i < reconnectBackOffAttempts; i++ { 426 if !sw.IsRunning() { 427 return 428 } 429 430 // sleep an exponentially increasing amount 431 sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i)) 432 sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second) 433 434 err := sw.DialPeerWithAddress(addr) 435 if err == nil { 436 return // success 437 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 438 return 439 } 440 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 441 } 442 sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start)) 443 } 444 445 // SetAddrBook allows to set address book on Switch. 446 func (sw *Switch) SetAddrBook(addrBook AddrBook) { 447 sw.addrBook = addrBook 448 } 449 450 // MarkPeerAsGood marks the given peer as good when it did something useful 451 // like contributed to consensus. 452 func (sw *Switch) MarkPeerAsGood(peer Peer) { 453 if sw.addrBook != nil { 454 sw.addrBook.MarkGood(peer.ID()) 455 } 456 } 457 458 //--------------------------------------------------------------------- 459 // Dialing 460 461 type privateAddr interface { 462 PrivateAddr() bool 463 } 464 465 func isPrivateAddr(err error) bool { 466 te, ok := err.(privateAddr) 467 return ok && te.PrivateAddr() 468 } 469 470 // DialPeersAsync dials a list of peers asynchronously in random order. 471 // Used to dial peers from config on startup or from unsafe-RPC (trusted sources). 472 // It ignores ErrNetAddressLookup. However, if there are other errors, first 473 // encounter is returned. 474 // Nop if there are no peers. 475 func (sw *Switch) DialPeersAsync(peers []string) error { 476 netAddrs, errs := NewNetAddressStrings(peers) 477 // report all the errors 478 for _, err := range errs { 479 sw.Logger.Error("Error in peer's address", "err", err) 480 } 481 // return first non-ErrNetAddressLookup error 482 for _, err := range errs { 483 if _, ok := err.(ErrNetAddressLookup); ok { 484 continue 485 } 486 return err 487 } 488 sw.dialPeersAsync(netAddrs) 489 return nil 490 } 491 492 func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) { 493 ourAddr := sw.NetAddress() 494 495 // TODO: this code feels like it's in the wrong place. 496 // The integration tests depend on the addrBook being saved 497 // right away but maybe we can change that. Recall that 498 // the addrBook is only written to disk every 2min 499 if sw.addrBook != nil { 500 // add peers to `addrBook` 501 for _, netAddr := range netAddrs { 502 // do not add our address or ID 503 if !netAddr.Same(ourAddr) { 504 if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil { 505 if isPrivateAddr(err) { 506 sw.Logger.Debug("Won't add peer's address to addrbook", "err", err) 507 } else { 508 sw.Logger.Error("Can't add peer's address to addrbook", "err", err) 509 } 510 } 511 } 512 } 513 // Persist some peers to disk right away. 514 // NOTE: integration tests depend on this 515 sw.addrBook.Save() 516 } 517 518 // permute the list, dial them in random order. 519 perm := sw.rng.Perm(len(netAddrs)) 520 for i := 0; i < len(perm); i++ { 521 go func(i int) { 522 j := perm[i] 523 addr := netAddrs[j] 524 525 if addr.Same(ourAddr) { 526 sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr) 527 return 528 } 529 530 sw.randomSleep(0) 531 532 err := sw.DialPeerWithAddress(addr) 533 if err != nil { 534 switch err.(type) { 535 case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress: 536 sw.Logger.Debug("Error dialing peer", "err", err) 537 default: 538 sw.Logger.Error("Error dialing peer", "err", err) 539 } 540 } 541 }(i) 542 } 543 } 544 545 // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects 546 // and authenticates successfully. 547 // If we're currently dialing this address or it belongs to an existing peer, 548 // ErrCurrentlyDialingOrExistingAddress is returned. 549 func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error { 550 if sw.IsDialingOrExistingAddress(addr) { 551 return ErrCurrentlyDialingOrExistingAddress{addr.String()} 552 } 553 554 sw.dialing.Set(string(addr.ID), addr) 555 defer sw.dialing.Delete(string(addr.ID)) 556 557 return sw.addOutboundPeerWithConfig(addr, sw.config) 558 } 559 560 // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds] 561 func (sw *Switch) randomSleep(interval time.Duration) { 562 r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond 563 time.Sleep(r + interval) 564 } 565 566 // IsDialingOrExistingAddress returns true if switch has a peer with the given 567 // address or dialing it at the moment. 568 func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool { 569 return sw.dialing.Has(string(addr.ID)) || 570 sw.peers.Has(addr.ID) || 571 (!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP)) 572 } 573 574 // AddPersistentPeers allows you to set persistent peers. It ignores 575 // ErrNetAddressLookup. However, if there are other errors, first encounter is 576 // returned. 577 func (sw *Switch) AddPersistentPeers(addrs []string) error { 578 sw.Logger.Info("Adding persistent peers", "addrs", addrs) 579 netAddrs, errs := NewNetAddressStrings(addrs) 580 // report all the errors 581 for _, err := range errs { 582 sw.Logger.Error("Error in peer's address", "err", err) 583 } 584 // return first non-ErrNetAddressLookup error 585 for _, err := range errs { 586 if _, ok := err.(ErrNetAddressLookup); ok { 587 continue 588 } 589 return err 590 } 591 sw.persistentPeersAddrs = netAddrs 592 return nil 593 } 594 595 func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error { 596 sw.Logger.Info("Adding unconditional peer ids", "ids", ids) 597 for i, id := range ids { 598 err := validateID(ID(id)) 599 if err != nil { 600 return fmt.Errorf("wrong ID #%d: %w", i, err) 601 } 602 sw.unconditionalPeerIDs[ID(id)] = struct{}{} 603 } 604 return nil 605 } 606 607 func (sw *Switch) AddPrivatePeerIDs(ids []string) error { 608 validIDs := make([]string, 0, len(ids)) 609 for i, id := range ids { 610 err := validateID(ID(id)) 611 if err != nil { 612 return fmt.Errorf("wrong ID #%d: %w", i, err) 613 } 614 validIDs = append(validIDs, id) 615 } 616 617 sw.addrBook.AddPrivateIDs(validIDs) 618 619 return nil 620 } 621 622 func (sw *Switch) IsPeerPersistent(na *NetAddress) bool { 623 for _, pa := range sw.persistentPeersAddrs { 624 if pa.Equals(na) { 625 return true 626 } 627 } 628 return false 629 } 630 631 func (sw *Switch) acceptRoutine() { 632 for { 633 p, err := sw.transport.Accept(peerConfig{ 634 chDescs: sw.chDescs, 635 onPeerError: sw.StopPeerForError, 636 reactorsByCh: sw.reactorsByCh, 637 msgTypeByChID: sw.msgTypeByChID, 638 metrics: sw.metrics, 639 mlc: sw.mlc, 640 isPersistent: sw.IsPeerPersistent, 641 }) 642 if err != nil { 643 switch err := err.(type) { 644 case ErrRejected: 645 if err.IsSelf() { 646 // Remove the given address from the address book and add to our addresses 647 // to avoid dialing in the future. 648 addr := err.Addr() 649 sw.addrBook.RemoveAddress(&addr) 650 sw.addrBook.AddOurAddress(&addr) 651 } 652 653 sw.Logger.Info( 654 "Inbound Peer rejected", 655 "err", err, 656 "numPeers", sw.peers.Size(), 657 ) 658 659 continue 660 case ErrFilterTimeout: 661 sw.Logger.Error( 662 "Peer filter timed out", 663 "err", err, 664 ) 665 666 continue 667 case ErrTransportClosed: 668 sw.Logger.Error( 669 "Stopped accept routine, as transport is closed", 670 "numPeers", sw.peers.Size(), 671 ) 672 default: 673 sw.Logger.Error( 674 "Accept on transport errored", 675 "err", err, 676 "numPeers", sw.peers.Size(), 677 ) 678 // We could instead have a retry loop around the acceptRoutine, 679 // but that would need to stop and let the node shutdown eventually. 680 // So might as well panic and let process managers restart the node. 681 // There's no point in letting the node run without the acceptRoutine, 682 // since it won't be able to accept new connections. 683 panic(fmt.Errorf("accept routine exited: %v", err)) 684 } 685 686 break 687 } 688 689 if !sw.IsPeerUnconditional(p.NodeInfo().ID()) { 690 // Ignore connection if we already have enough peers. 691 _, in, _ := sw.NumPeers() 692 if in >= sw.config.MaxNumInboundPeers { 693 sw.Logger.Info( 694 "Ignoring inbound connection: already have enough inbound peers", 695 "address", p.SocketAddr(), 696 "have", in, 697 "max", sw.config.MaxNumInboundPeers, 698 ) 699 700 sw.transport.Cleanup(p) 701 702 continue 703 } 704 705 } 706 707 if err := sw.addPeer(p); err != nil { 708 sw.transport.Cleanup(p) 709 if p.IsRunning() { 710 _ = p.Stop() 711 } 712 sw.Logger.Info( 713 "Ignoring inbound connection: error while adding peer", 714 "err", err, 715 "id", p.ID(), 716 ) 717 } 718 } 719 } 720 721 // dial the peer; make secret connection; authenticate against the dialed ID; 722 // add the peer. 723 // if dialing fails, start the reconnect loop. If handshake fails, it's over. 724 // If peer is started successfully, reconnectLoop will start when 725 // StopPeerForError is called. 726 func (sw *Switch) addOutboundPeerWithConfig( 727 addr *NetAddress, 728 cfg *config.P2PConfig, 729 ) error { 730 sw.Logger.Debug("Dialing peer", "address", addr) 731 732 // XXX(xla): Remove the leakage of test concerns in implementation. 733 if cfg.TestDialFail { 734 go sw.reconnectToPeer(addr) 735 return fmt.Errorf("dial err (peerConfig.DialFail == true)") 736 } 737 738 p, err := sw.transport.Dial(*addr, peerConfig{ 739 chDescs: sw.chDescs, 740 onPeerError: sw.StopPeerForError, 741 isPersistent: sw.IsPeerPersistent, 742 reactorsByCh: sw.reactorsByCh, 743 msgTypeByChID: sw.msgTypeByChID, 744 metrics: sw.metrics, 745 mlc: sw.mlc, 746 }) 747 if err != nil { 748 if e, ok := err.(ErrRejected); ok { 749 if e.IsSelf() { 750 // Remove the given address from the address book and add to our addresses 751 // to avoid dialing in the future. 752 sw.addrBook.RemoveAddress(addr) 753 sw.addrBook.AddOurAddress(addr) 754 755 return err 756 } 757 } 758 759 // retry persistent peers after 760 // any dial error besides IsSelf() 761 if sw.IsPeerPersistent(addr) { 762 go sw.reconnectToPeer(addr) 763 } 764 765 return err 766 } 767 768 if err := sw.addPeer(p); err != nil { 769 sw.transport.Cleanup(p) 770 if p.IsRunning() { 771 _ = p.Stop() 772 } 773 return err 774 } 775 776 return nil 777 } 778 779 func (sw *Switch) filterPeer(p Peer) error { 780 // Avoid duplicate 781 if sw.peers.Has(p.ID()) { 782 return ErrRejected{id: p.ID(), isDuplicate: true} 783 } 784 785 errc := make(chan error, len(sw.peerFilters)) 786 787 for _, f := range sw.peerFilters { 788 go func(f PeerFilterFunc, p Peer, errc chan<- error) { 789 errc <- f(sw.peers, p) 790 }(f, p, errc) 791 } 792 793 for i := 0; i < cap(errc); i++ { 794 select { 795 case err := <-errc: 796 if err != nil { 797 return ErrRejected{id: p.ID(), err: err, isFiltered: true} 798 } 799 case <-time.After(sw.filterTimeout): 800 return ErrFilterTimeout{} 801 } 802 } 803 804 return nil 805 } 806 807 // addPeer starts up the Peer and adds it to the Switch. Error is returned if 808 // the peer is filtered out or failed to start or can't be added. 809 func (sw *Switch) addPeer(p Peer) error { 810 if err := sw.filterPeer(p); err != nil { 811 return err 812 } 813 814 p.SetLogger(sw.Logger.With("peer", p.SocketAddr())) 815 816 // Handle the shut down case where the switch has stopped but we're 817 // concurrently trying to add a peer. 818 if !sw.IsRunning() { 819 // XXX should this return an error or just log and terminate? 820 sw.Logger.Error("Won't start a peer - switch is not running", "peer", p) 821 return nil 822 } 823 824 // Add some data to the peer, which is required by reactors. 825 for _, reactor := range sw.reactors { 826 p = reactor.InitPeer(p) 827 } 828 829 // Start the peer's send/recv routines. 830 // Must start it before adding it to the peer set 831 // to prevent Start and Stop from being called concurrently. 832 err := p.Start() 833 if err != nil { 834 // Should never happen 835 sw.Logger.Error("Error starting peer", "err", err, "peer", p) 836 return err 837 } 838 839 // Add the peer to PeerSet. Do this before starting the reactors 840 // so that if Receive errors, we will find the peer and remove it. 841 // Add should not err since we already checked peers.Has(). 842 if err := sw.peers.Add(p); err != nil { 843 switch err.(type) { 844 case ErrPeerRemoval: 845 sw.Logger.Error("Error starting peer ", 846 " err ", "Peer has already errored and removal was attempted.", 847 "peer", p.ID()) 848 } 849 return err 850 } 851 sw.metrics.Peers.Add(float64(1)) 852 853 // Start all the reactor protocols on the peer. 854 for _, reactor := range sw.reactors { 855 reactor.AddPeer(p) 856 } 857 858 sw.Logger.Debug("Added peer", "peer", p) 859 860 return nil 861 }