github.com/MagHErmit/tendermint@v0.282.1/p2p/switch.go (about) 1 package p2p 2 3 import ( 4 "fmt" 5 "math" 6 "sync" 7 "time" 8 9 "github.com/MagHErmit/tendermint/config" 10 "github.com/MagHErmit/tendermint/libs/cmap" 11 "github.com/MagHErmit/tendermint/libs/log" 12 "github.com/MagHErmit/tendermint/libs/rand" 13 "github.com/MagHErmit/tendermint/libs/service" 14 "github.com/MagHErmit/tendermint/p2p/conn" 15 ) 16 17 const ( 18 // wait a random amount of time from this interval 19 // before dialing peers or reconnecting to help prevent DoS 20 dialRandomizerIntervalMilliseconds = 3000 21 22 // repeatedly try to reconnect for a few minutes 23 // ie. 5 * 20 = 100s 24 reconnectAttempts = 20 25 reconnectInterval = 5 * time.Second 26 27 // then move into exponential backoff mode for ~1day 28 // ie. 3**10 = 16hrs 29 reconnectBackOffAttempts = 10 30 reconnectBackOffBaseSeconds = 3 31 ) 32 33 // MConnConfig returns an MConnConfig with fields updated 34 // from the P2PConfig. 35 func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig { 36 mConfig := conn.DefaultMConnConfig() 37 mConfig.FlushThrottle = cfg.FlushThrottleTimeout 38 mConfig.SendRate = cfg.SendRate 39 mConfig.RecvRate = cfg.RecvRate 40 mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize 41 return mConfig 42 } 43 44 //----------------------------------------------------------------------------- 45 46 // An AddrBook represents an address book from the pex package, which is used 47 // to store peer addresses. 48 type AddrBook interface { 49 AddAddress(addr *NetAddress, src *NetAddress) error 50 AddPrivateIDs([]string) 51 AddOurAddress(*NetAddress) 52 OurAddress(*NetAddress) bool 53 MarkGood(ID) 54 RemoveAddress(*NetAddress) 55 HasAddress(*NetAddress) bool 56 Save() 57 } 58 59 // PeerFilterFunc to be implemented by filter hooks after a new Peer has been 60 // fully setup. 61 type PeerFilterFunc func(IPeerSet, Peer) error 62 63 //----------------------------------------------------------------------------- 64 65 // Switch handles peer connections and exposes an API to receive incoming messages 66 // on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one 67 // or more `Channels`. So while sending outgoing messages is typically performed on the peer, 68 // incoming messages are received on the reactor. 69 type Switch struct { 70 service.BaseService 71 72 config *config.P2PConfig 73 reactors map[string]Reactor 74 chDescs []*conn.ChannelDescriptor 75 reactorsByCh map[byte]Reactor 76 peers *PeerSet 77 dialing *cmap.CMap 78 reconnecting *cmap.CMap 79 nodeInfo NodeInfo // our node info 80 nodeKey *NodeKey // our node privkey 81 addrBook AddrBook 82 // peers addresses with whom we'll maintain constant connection 83 persistentPeersAddrs []*NetAddress 84 unconditionalPeerIDs map[ID]struct{} 85 86 transport Transport 87 88 filterTimeout time.Duration 89 peerFilters []PeerFilterFunc 90 91 rng *rand.Rand // seed for randomizing dial times and orders 92 93 metrics *Metrics 94 } 95 96 // NetAddress returns the address the switch is listening on. 97 func (sw *Switch) NetAddress() *NetAddress { 98 addr := sw.transport.NetAddress() 99 return &addr 100 } 101 102 // SwitchOption sets an optional parameter on the Switch. 103 type SwitchOption func(*Switch) 104 105 // NewSwitch creates a new Switch with the given config. 106 func NewSwitch( 107 cfg *config.P2PConfig, 108 transport Transport, 109 options ...SwitchOption, 110 ) *Switch { 111 sw := &Switch{ 112 config: cfg, 113 reactors: make(map[string]Reactor), 114 chDescs: make([]*conn.ChannelDescriptor, 0), 115 reactorsByCh: make(map[byte]Reactor), 116 peers: NewPeerSet(), 117 dialing: cmap.NewCMap(), 118 reconnecting: cmap.NewCMap(), 119 metrics: NopMetrics(), 120 transport: transport, 121 filterTimeout: defaultFilterTimeout, 122 persistentPeersAddrs: make([]*NetAddress, 0), 123 unconditionalPeerIDs: make(map[ID]struct{}), 124 } 125 126 // Ensure we have a completely undeterministic PRNG. 127 sw.rng = rand.NewRand() 128 129 sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw) 130 131 for _, option := range options { 132 option(sw) 133 } 134 135 return sw 136 } 137 138 // SwitchFilterTimeout sets the timeout used for peer filters. 139 func SwitchFilterTimeout(timeout time.Duration) SwitchOption { 140 return func(sw *Switch) { sw.filterTimeout = timeout } 141 } 142 143 // SwitchPeerFilters sets the filters for rejection of new peers. 144 func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption { 145 return func(sw *Switch) { sw.peerFilters = filters } 146 } 147 148 // WithMetrics sets the metrics. 149 func WithMetrics(metrics *Metrics) SwitchOption { 150 return func(sw *Switch) { sw.metrics = metrics } 151 } 152 153 //--------------------------------------------------------------------- 154 // Switch setup 155 156 // AddReactor adds the given reactor to the switch. 157 // NOTE: Not goroutine safe. 158 func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor { 159 for _, chDesc := range reactor.GetChannels() { 160 chID := chDesc.ID 161 // No two reactors can share the same channel. 162 if sw.reactorsByCh[chID] != nil { 163 panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor)) 164 } 165 sw.chDescs = append(sw.chDescs, chDesc) 166 sw.reactorsByCh[chID] = reactor 167 } 168 sw.reactors[name] = reactor 169 reactor.SetSwitch(sw) 170 return reactor 171 } 172 173 // RemoveReactor removes the given Reactor from the Switch. 174 // NOTE: Not goroutine safe. 175 func (sw *Switch) RemoveReactor(name string, reactor Reactor) { 176 for _, chDesc := range reactor.GetChannels() { 177 // remove channel description 178 for i := 0; i < len(sw.chDescs); i++ { 179 if chDesc.ID == sw.chDescs[i].ID { 180 sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...) 181 break 182 } 183 } 184 delete(sw.reactorsByCh, chDesc.ID) 185 } 186 delete(sw.reactors, name) 187 reactor.SetSwitch(nil) 188 } 189 190 // Reactors returns a map of reactors registered on the switch. 191 // NOTE: Not goroutine safe. 192 func (sw *Switch) Reactors() map[string]Reactor { 193 return sw.reactors 194 } 195 196 // Reactor returns the reactor with the given name. 197 // NOTE: Not goroutine safe. 198 func (sw *Switch) Reactor(name string) Reactor { 199 return sw.reactors[name] 200 } 201 202 // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes. 203 // NOTE: Not goroutine safe. 204 func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) { 205 sw.nodeInfo = nodeInfo 206 } 207 208 // NodeInfo returns the switch's NodeInfo. 209 // NOTE: Not goroutine safe. 210 func (sw *Switch) NodeInfo() NodeInfo { 211 return sw.nodeInfo 212 } 213 214 // SetNodeKey sets the switch's private key for authenticated encryption. 215 // NOTE: Not goroutine safe. 216 func (sw *Switch) SetNodeKey(nodeKey *NodeKey) { 217 sw.nodeKey = nodeKey 218 } 219 220 //--------------------------------------------------------------------- 221 // Service start/stop 222 223 // OnStart implements BaseService. It starts all the reactors and peers. 224 func (sw *Switch) OnStart() error { 225 // Start reactors 226 for _, reactor := range sw.reactors { 227 err := reactor.Start() 228 if err != nil { 229 return fmt.Errorf("failed to start %v: %w", reactor, err) 230 } 231 } 232 233 // Start accepting Peers. 234 go sw.acceptRoutine() 235 236 return nil 237 } 238 239 // OnStop implements BaseService. It stops all peers and reactors. 240 func (sw *Switch) OnStop() { 241 // Stop peers 242 for _, p := range sw.peers.List() { 243 sw.stopAndRemovePeer(p, nil) 244 } 245 246 // Stop reactors 247 sw.Logger.Debug("Switch: Stopping reactors") 248 for _, reactor := range sw.reactors { 249 if err := reactor.Stop(); err != nil { 250 sw.Logger.Error("error while stopped reactor", "reactor", reactor, "error", err) 251 } 252 } 253 } 254 255 //--------------------------------------------------------------------- 256 // Peers 257 258 // Broadcast runs a go routine for each attempted send, which will block trying 259 // to send for defaultSendTimeoutSeconds. Returns a channel which receives 260 // success values for each attempted send (false if times out). Channel will be 261 // closed once msg bytes are sent to all peers (or time out). 262 // 263 // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved. 264 func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool { 265 sw.Logger.Debug("Broadcast", "channel", chID, "msgBytes", log.NewLazySprintf("%X", msgBytes)) 266 267 peers := sw.peers.List() 268 var wg sync.WaitGroup 269 wg.Add(len(peers)) 270 successChan := make(chan bool, len(peers)) 271 272 for _, peer := range peers { 273 go func(p Peer) { 274 defer wg.Done() 275 success := p.Send(chID, msgBytes) 276 successChan <- success 277 }(peer) 278 } 279 280 go func() { 281 wg.Wait() 282 close(successChan) 283 }() 284 285 return successChan 286 } 287 288 // NumPeers returns the count of outbound/inbound and outbound-dialing peers. 289 // unconditional peers are not counted here. 290 func (sw *Switch) NumPeers() (outbound, inbound, dialing int) { 291 peers := sw.peers.List() 292 for _, peer := range peers { 293 if peer.IsOutbound() { 294 if !sw.IsPeerUnconditional(peer.ID()) { 295 outbound++ 296 } 297 } else { 298 if !sw.IsPeerUnconditional(peer.ID()) { 299 inbound++ 300 } 301 } 302 } 303 dialing = sw.dialing.Size() 304 return 305 } 306 307 func (sw *Switch) IsPeerUnconditional(id ID) bool { 308 _, ok := sw.unconditionalPeerIDs[id] 309 return ok 310 } 311 312 // MaxNumOutboundPeers returns a maximum number of outbound peers. 313 func (sw *Switch) MaxNumOutboundPeers() int { 314 return sw.config.MaxNumOutboundPeers 315 } 316 317 // Peers returns the set of peers that are connected to the switch. 318 func (sw *Switch) Peers() IPeerSet { 319 return sw.peers 320 } 321 322 // StopPeerForError disconnects from a peer due to external error. 323 // If the peer is persistent, it will attempt to reconnect. 324 // TODO: make record depending on reason. 325 func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) { 326 if !peer.IsRunning() { 327 return 328 } 329 330 sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason) 331 sw.stopAndRemovePeer(peer, reason) 332 333 if peer.IsPersistent() { 334 var addr *NetAddress 335 if peer.IsOutbound() { // socket address for outbound peers 336 addr = peer.SocketAddr() 337 } else { // self-reported address for inbound peers 338 var err error 339 addr, err = peer.NodeInfo().NetAddress() 340 if err != nil { 341 sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong", 342 "peer", peer, "err", err) 343 return 344 } 345 } 346 go sw.reconnectToPeer(addr) 347 } 348 } 349 350 // StopPeerGracefully disconnects from a peer gracefully. 351 // TODO: handle graceful disconnects. 352 func (sw *Switch) StopPeerGracefully(peer Peer) { 353 sw.Logger.Info("Stopping peer gracefully") 354 sw.stopAndRemovePeer(peer, nil) 355 } 356 357 func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) { 358 sw.transport.Cleanup(peer) 359 if err := peer.Stop(); err != nil { 360 sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly 361 } 362 363 for _, reactor := range sw.reactors { 364 reactor.RemovePeer(peer, reason) 365 } 366 367 // Removing a peer should go last to avoid a situation where a peer 368 // reconnect to our node and the switch calls InitPeer before 369 // RemovePeer is finished. 370 // https://github.com/MagHErmit/tendermint/issues/3338 371 if sw.peers.Remove(peer) { 372 sw.metrics.Peers.Add(float64(-1)) 373 } else { 374 // Removal of the peer has failed. The function above sets a flag within the peer to mark this. 375 // We keep this message here as information to the developer. 376 sw.Logger.Debug("error on peer removal", ",", "peer", peer.ID()) 377 } 378 } 379 380 // reconnectToPeer tries to reconnect to the addr, first repeatedly 381 // with a fixed interval, then with exponential backoff. 382 // If no success after all that, it stops trying, and leaves it 383 // to the PEX/Addrbook to find the peer with the addr again 384 // NOTE: this will keep trying even if the handshake or auth fails. 385 // TODO: be more explicit with error types so we only retry on certain failures 386 // - ie. if we're getting ErrDuplicatePeer we can stop 387 // because the addrbook got us the peer back already 388 func (sw *Switch) reconnectToPeer(addr *NetAddress) { 389 if sw.reconnecting.Has(string(addr.ID)) { 390 return 391 } 392 sw.reconnecting.Set(string(addr.ID), addr) 393 defer sw.reconnecting.Delete(string(addr.ID)) 394 395 start := time.Now() 396 sw.Logger.Info("Reconnecting to peer", "addr", addr) 397 for i := 0; i < reconnectAttempts; i++ { 398 if !sw.IsRunning() { 399 return 400 } 401 402 err := sw.DialPeerWithAddress(addr) 403 if err == nil { 404 return // success 405 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 406 return 407 } 408 409 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 410 // sleep a set amount 411 sw.randomSleep(reconnectInterval) 412 continue 413 } 414 415 sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff", 416 "addr", addr, "elapsed", time.Since(start)) 417 for i := 0; i < reconnectBackOffAttempts; i++ { 418 if !sw.IsRunning() { 419 return 420 } 421 422 // sleep an exponentially increasing amount 423 sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i)) 424 sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second) 425 426 err := sw.DialPeerWithAddress(addr) 427 if err == nil { 428 return // success 429 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 430 return 431 } 432 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 433 } 434 sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start)) 435 } 436 437 // SetAddrBook allows to set address book on Switch. 438 func (sw *Switch) SetAddrBook(addrBook AddrBook) { 439 sw.addrBook = addrBook 440 } 441 442 // MarkPeerAsGood marks the given peer as good when it did something useful 443 // like contributed to consensus. 444 func (sw *Switch) MarkPeerAsGood(peer Peer) { 445 if sw.addrBook != nil { 446 sw.addrBook.MarkGood(peer.ID()) 447 } 448 } 449 450 //--------------------------------------------------------------------- 451 // Dialing 452 453 type privateAddr interface { 454 PrivateAddr() bool 455 } 456 457 func isPrivateAddr(err error) bool { 458 te, ok := err.(privateAddr) 459 return ok && te.PrivateAddr() 460 } 461 462 // DialPeersAsync dials a list of peers asynchronously in random order. 463 // Used to dial peers from config on startup or from unsafe-RPC (trusted sources). 464 // It ignores ErrNetAddressLookup. However, if there are other errors, first 465 // encounter is returned. 466 // Nop if there are no peers. 467 func (sw *Switch) DialPeersAsync(peers []string) error { 468 netAddrs, errs := NewNetAddressStrings(peers) 469 // report all the errors 470 for _, err := range errs { 471 sw.Logger.Error("Error in peer's address", "err", err) 472 } 473 // return first non-ErrNetAddressLookup error 474 for _, err := range errs { 475 if _, ok := err.(ErrNetAddressLookup); ok { 476 continue 477 } 478 return err 479 } 480 sw.dialPeersAsync(netAddrs) 481 return nil 482 } 483 484 func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) { 485 ourAddr := sw.NetAddress() 486 487 // TODO: this code feels like it's in the wrong place. 488 // The integration tests depend on the addrBook being saved 489 // right away but maybe we can change that. Recall that 490 // the addrBook is only written to disk every 2min 491 if sw.addrBook != nil { 492 // add peers to `addrBook` 493 for _, netAddr := range netAddrs { 494 // do not add our address or ID 495 if !netAddr.Same(ourAddr) { 496 if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil { 497 if isPrivateAddr(err) { 498 sw.Logger.Debug("Won't add peer's address to addrbook", "err", err) 499 } else { 500 sw.Logger.Error("Can't add peer's address to addrbook", "err", err) 501 } 502 } 503 } 504 } 505 // Persist some peers to disk right away. 506 // NOTE: integration tests depend on this 507 sw.addrBook.Save() 508 } 509 510 // permute the list, dial them in random order. 511 perm := sw.rng.Perm(len(netAddrs)) 512 for i := 0; i < len(perm); i++ { 513 go func(i int) { 514 j := perm[i] 515 addr := netAddrs[j] 516 517 if addr.Same(ourAddr) { 518 sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr) 519 return 520 } 521 522 sw.randomSleep(0) 523 524 err := sw.DialPeerWithAddress(addr) 525 if err != nil { 526 switch err.(type) { 527 case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress: 528 sw.Logger.Debug("Error dialing peer", "err", err) 529 default: 530 sw.Logger.Error("Error dialing peer", "err", err) 531 } 532 } 533 }(i) 534 } 535 } 536 537 // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects 538 // and authenticates successfully. 539 // If we're currently dialing this address or it belongs to an existing peer, 540 // ErrCurrentlyDialingOrExistingAddress is returned. 541 func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error { 542 if sw.IsDialingOrExistingAddress(addr) { 543 return ErrCurrentlyDialingOrExistingAddress{addr.String()} 544 } 545 546 sw.dialing.Set(string(addr.ID), addr) 547 defer sw.dialing.Delete(string(addr.ID)) 548 549 return sw.addOutboundPeerWithConfig(addr, sw.config) 550 } 551 552 // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds] 553 func (sw *Switch) randomSleep(interval time.Duration) { 554 r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond 555 time.Sleep(r + interval) 556 } 557 558 // IsDialingOrExistingAddress returns true if switch has a peer with the given 559 // address or dialing it at the moment. 560 func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool { 561 return sw.dialing.Has(string(addr.ID)) || 562 sw.peers.Has(addr.ID) || 563 (!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP)) 564 } 565 566 // AddPersistentPeers allows you to set persistent peers. It ignores 567 // ErrNetAddressLookup. However, if there are other errors, first encounter is 568 // returned. 569 func (sw *Switch) AddPersistentPeers(addrs []string) error { 570 sw.Logger.Info("Adding persistent peers", "addrs", addrs) 571 netAddrs, errs := NewNetAddressStrings(addrs) 572 // report all the errors 573 for _, err := range errs { 574 sw.Logger.Error("Error in peer's address", "err", err) 575 } 576 // return first non-ErrNetAddressLookup error 577 for _, err := range errs { 578 if _, ok := err.(ErrNetAddressLookup); ok { 579 continue 580 } 581 return err 582 } 583 sw.persistentPeersAddrs = netAddrs 584 return nil 585 } 586 587 func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error { 588 sw.Logger.Info("Adding unconditional peer ids", "ids", ids) 589 for i, id := range ids { 590 err := validateID(ID(id)) 591 if err != nil { 592 return fmt.Errorf("wrong ID #%d: %w", i, err) 593 } 594 sw.unconditionalPeerIDs[ID(id)] = struct{}{} 595 } 596 return nil 597 } 598 599 func (sw *Switch) AddPrivatePeerIDs(ids []string) error { 600 validIDs := make([]string, 0, len(ids)) 601 for i, id := range ids { 602 err := validateID(ID(id)) 603 if err != nil { 604 return fmt.Errorf("wrong ID #%d: %w", i, err) 605 } 606 validIDs = append(validIDs, id) 607 } 608 609 sw.addrBook.AddPrivateIDs(validIDs) 610 611 return nil 612 } 613 614 func (sw *Switch) IsPeerPersistent(na *NetAddress) bool { 615 for _, pa := range sw.persistentPeersAddrs { 616 if pa.Equals(na) { 617 return true 618 } 619 } 620 return false 621 } 622 623 func (sw *Switch) acceptRoutine() { 624 for { 625 p, err := sw.transport.Accept(peerConfig{ 626 chDescs: sw.chDescs, 627 onPeerError: sw.StopPeerForError, 628 reactorsByCh: sw.reactorsByCh, 629 metrics: sw.metrics, 630 isPersistent: sw.IsPeerPersistent, 631 }) 632 if err != nil { 633 switch err := err.(type) { 634 case ErrRejected: 635 if err.IsSelf() { 636 // Remove the given address from the address book and add to our addresses 637 // to avoid dialing in the future. 638 addr := err.Addr() 639 sw.addrBook.RemoveAddress(&addr) 640 sw.addrBook.AddOurAddress(&addr) 641 } 642 643 sw.Logger.Info( 644 "Inbound Peer rejected", 645 "err", err, 646 "numPeers", sw.peers.Size(), 647 ) 648 649 continue 650 case ErrFilterTimeout: 651 sw.Logger.Error( 652 "Peer filter timed out", 653 "err", err, 654 ) 655 656 continue 657 case ErrTransportClosed: 658 sw.Logger.Error( 659 "Stopped accept routine, as transport is closed", 660 "numPeers", sw.peers.Size(), 661 ) 662 default: 663 sw.Logger.Error( 664 "Accept on transport errored", 665 "err", err, 666 "numPeers", sw.peers.Size(), 667 ) 668 // We could instead have a retry loop around the acceptRoutine, 669 // but that would need to stop and let the node shutdown eventually. 670 // So might as well panic and let process managers restart the node. 671 // There's no point in letting the node run without the acceptRoutine, 672 // since it won't be able to accept new connections. 673 panic(fmt.Errorf("accept routine exited: %v", err)) 674 } 675 676 break 677 } 678 679 if !sw.IsPeerUnconditional(p.NodeInfo().ID()) { 680 // Ignore connection if we already have enough peers. 681 _, in, _ := sw.NumPeers() 682 if in >= sw.config.MaxNumInboundPeers { 683 sw.Logger.Info( 684 "Ignoring inbound connection: already have enough inbound peers", 685 "address", p.SocketAddr(), 686 "have", in, 687 "max", sw.config.MaxNumInboundPeers, 688 ) 689 690 sw.transport.Cleanup(p) 691 692 continue 693 } 694 695 } 696 697 if err := sw.addPeer(p); err != nil { 698 sw.transport.Cleanup(p) 699 if p.IsRunning() { 700 _ = p.Stop() 701 } 702 sw.Logger.Info( 703 "Ignoring inbound connection: error while adding peer", 704 "err", err, 705 "id", p.ID(), 706 ) 707 } 708 } 709 } 710 711 // dial the peer; make secret connection; authenticate against the dialed ID; 712 // add the peer. 713 // if dialing fails, start the reconnect loop. If handshake fails, it's over. 714 // If peer is started successfully, reconnectLoop will start when 715 // StopPeerForError is called. 716 func (sw *Switch) addOutboundPeerWithConfig( 717 addr *NetAddress, 718 cfg *config.P2PConfig, 719 ) error { 720 sw.Logger.Info("Dialing peer", "address", addr) 721 722 // XXX(xla): Remove the leakage of test concerns in implementation. 723 if cfg.TestDialFail { 724 go sw.reconnectToPeer(addr) 725 return fmt.Errorf("dial err (peerConfig.DialFail == true)") 726 } 727 728 p, err := sw.transport.Dial(*addr, peerConfig{ 729 chDescs: sw.chDescs, 730 onPeerError: sw.StopPeerForError, 731 isPersistent: sw.IsPeerPersistent, 732 reactorsByCh: sw.reactorsByCh, 733 metrics: sw.metrics, 734 }) 735 if err != nil { 736 if e, ok := err.(ErrRejected); ok { 737 if e.IsSelf() { 738 // Remove the given address from the address book and add to our addresses 739 // to avoid dialing in the future. 740 sw.addrBook.RemoveAddress(addr) 741 sw.addrBook.AddOurAddress(addr) 742 743 return err 744 } 745 } 746 747 // retry persistent peers after 748 // any dial error besides IsSelf() 749 if sw.IsPeerPersistent(addr) { 750 go sw.reconnectToPeer(addr) 751 } 752 753 return err 754 } 755 756 if err := sw.addPeer(p); err != nil { 757 sw.transport.Cleanup(p) 758 if p.IsRunning() { 759 _ = p.Stop() 760 } 761 return err 762 } 763 764 return nil 765 } 766 767 func (sw *Switch) filterPeer(p Peer) error { 768 // Avoid duplicate 769 if sw.peers.Has(p.ID()) { 770 return ErrRejected{id: p.ID(), isDuplicate: true} 771 } 772 773 errc := make(chan error, len(sw.peerFilters)) 774 775 for _, f := range sw.peerFilters { 776 go func(f PeerFilterFunc, p Peer, errc chan<- error) { 777 errc <- f(sw.peers, p) 778 }(f, p, errc) 779 } 780 781 for i := 0; i < cap(errc); i++ { 782 select { 783 case err := <-errc: 784 if err != nil { 785 return ErrRejected{id: p.ID(), err: err, isFiltered: true} 786 } 787 case <-time.After(sw.filterTimeout): 788 return ErrFilterTimeout{} 789 } 790 } 791 792 return nil 793 } 794 795 // addPeer starts up the Peer and adds it to the Switch. Error is returned if 796 // the peer is filtered out or failed to start or can't be added. 797 func (sw *Switch) addPeer(p Peer) error { 798 if err := sw.filterPeer(p); err != nil { 799 return err 800 } 801 802 p.SetLogger(sw.Logger.With("peer", p.SocketAddr())) 803 804 // Handle the shut down case where the switch has stopped but we're 805 // concurrently trying to add a peer. 806 if !sw.IsRunning() { 807 // XXX should this return an error or just log and terminate? 808 sw.Logger.Error("Won't start a peer - switch is not running", "peer", p) 809 return nil 810 } 811 812 // Add some data to the peer, which is required by reactors. 813 for _, reactor := range sw.reactors { 814 p = reactor.InitPeer(p) 815 } 816 817 // Start the peer's send/recv routines. 818 // Must start it before adding it to the peer set 819 // to prevent Start and Stop from being called concurrently. 820 err := p.Start() 821 if err != nil { 822 // Should never happen 823 sw.Logger.Error("Error starting peer", "err", err, "peer", p) 824 return err 825 } 826 827 // Add the peer to PeerSet. Do this before starting the reactors 828 // so that if Receive errors, we will find the peer and remove it. 829 // Add should not err since we already checked peers.Has(). 830 if err := sw.peers.Add(p); err != nil { 831 switch err.(type) { 832 case ErrPeerRemoval: 833 sw.Logger.Error("Error starting peer ", 834 " err ", "Peer has already errored and removal was attempted.", 835 "peer", p.ID()) 836 } 837 return err 838 } 839 sw.metrics.Peers.Add(float64(1)) 840 841 // Start all the reactor protocols on the peer. 842 for _, reactor := range sw.reactors { 843 reactor.AddPeer(p) 844 } 845 846 sw.Logger.Info("Added peer", "peer", p) 847 848 return nil 849 }