github.com/okex/exchain@v1.8.0/libs/tendermint/p2p/switch.go (about) 1 package p2p 2 3 import ( 4 "fmt" 5 "math" 6 "sync" 7 "time" 8 9 "github.com/pkg/errors" 10 11 "github.com/okex/exchain/libs/tendermint/config" 12 "github.com/okex/exchain/libs/tendermint/libs/cmap" 13 "github.com/okex/exchain/libs/tendermint/libs/rand" 14 "github.com/okex/exchain/libs/tendermint/libs/service" 15 "github.com/okex/exchain/libs/tendermint/p2p/conn" 16 ) 17 18 const ( 19 // wait a random amount of time from this interval 20 // before dialing peers or reconnecting to help prevent DoS 21 dialRandomizerIntervalMilliseconds = 3000 22 23 // repeatedly try to reconnect for a few minutes 24 // ie. 5 * 20 = 100s 25 reconnectAttempts = 20 26 reconnectInterval = 5 * time.Second 27 28 // then move into exponential backoff mode for ~1day 29 // ie. 3**10 = 16hrs 30 reconnectBackOffAttempts = 10 31 reconnectBackOffBaseSeconds = 3 32 ) 33 34 // MConnConfig returns an MConnConfig with fields updated 35 // from the P2PConfig. 36 func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig { 37 mConfig := conn.DefaultMConnConfig() 38 mConfig.FlushThrottle = cfg.FlushThrottleTimeout 39 mConfig.SendRate = cfg.SendRate 40 mConfig.RecvRate = cfg.RecvRate 41 mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize 42 return mConfig 43 } 44 45 //----------------------------------------------------------------------------- 46 47 // An AddrBook represents an address book from the pex package, which is used 48 // to store peer addresses. 49 type AddrBook interface { 50 AddAddress(addr *NetAddress, src *NetAddress) error 51 AddOurAddress(*NetAddress) 52 OurAddress(*NetAddress) bool 53 MarkGood(ID) 54 RemoveAddress(*NetAddress) 55 HasAddress(*NetAddress) bool 56 Save() 57 } 58 59 // PeerFilterFunc to be implemented by filter hooks after a new Peer has been 60 // fully setup. 61 type PeerFilterFunc func(IPeerSet, Peer) error 62 63 //----------------------------------------------------------------------------- 64 65 // Switch handles peer connections and exposes an API to receive incoming messages 66 // on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one 67 // or more `Channels`. So while sending outgoing messages is typically performed on the peer, 68 // incoming messages are received on the reactor. 69 type Switch struct { 70 service.BaseService 71 72 config *config.P2PConfig 73 reactors map[string]Reactor 74 chDescs []*conn.ChannelDescriptor 75 reactorsByCh map[byte]Reactor 76 peers *PeerSet 77 dialing *cmap.CMap 78 reconnecting *cmap.CMap 79 nodeInfo NodeInfo // our node info 80 nodeKey *NodeKey // our node privkey 81 addrBook AddrBook 82 // peers addresses with whom we'll maintain constant connection 83 persistentPeersAddrs []*NetAddress 84 unconditionalPeerIDs map[ID]struct{} 85 86 transport Transport 87 88 filterTimeout time.Duration 89 peerFilters []PeerFilterFunc 90 91 rng *rand.Rand // seed for randomizing dial times and orders 92 93 metrics *Metrics 94 } 95 96 // NetAddress returns the address the switch is listening on. 97 func (sw *Switch) NetAddress() *NetAddress { 98 addr := sw.transport.NetAddress() 99 return &addr 100 } 101 102 func (sw *Switch) ListenAddress() string { 103 return sw.config.ListenAddress 104 } 105 106 // SwitchOption sets an optional parameter on the Switch. 107 type SwitchOption func(*Switch) 108 109 // NewSwitch creates a new Switch with the given config. 110 func NewSwitch( 111 cfg *config.P2PConfig, 112 transport Transport, 113 options ...SwitchOption, 114 ) *Switch { 115 sw := &Switch{ 116 config: cfg, 117 reactors: make(map[string]Reactor), 118 chDescs: make([]*conn.ChannelDescriptor, 0), 119 reactorsByCh: make(map[byte]Reactor), 120 peers: NewPeerSet(), 121 dialing: cmap.NewCMap(), 122 reconnecting: cmap.NewCMap(), 123 metrics: NopMetrics(), 124 transport: transport, 125 filterTimeout: defaultFilterTimeout, 126 persistentPeersAddrs: make([]*NetAddress, 0), 127 unconditionalPeerIDs: make(map[ID]struct{}), 128 } 129 130 // Ensure we have a completely undeterministic PRNG. 131 sw.rng = rand.NewRand() 132 133 sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw) 134 135 for _, option := range options { 136 option(sw) 137 } 138 139 return sw 140 } 141 142 // SwitchFilterTimeout sets the timeout used for peer filters. 143 func SwitchFilterTimeout(timeout time.Duration) SwitchOption { 144 return func(sw *Switch) { sw.filterTimeout = timeout } 145 } 146 147 // SwitchPeerFilters sets the filters for rejection of new peers. 148 func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption { 149 return func(sw *Switch) { sw.peerFilters = filters } 150 } 151 152 // WithMetrics sets the metrics. 153 func WithMetrics(metrics *Metrics) SwitchOption { 154 return func(sw *Switch) { sw.metrics = metrics } 155 } 156 157 //--------------------------------------------------------------------- 158 // Switch setup 159 160 // AddReactor adds the given reactor to the switch. 161 // NOTE: Not goroutine safe. 162 func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor { 163 for _, chDesc := range reactor.GetChannels() { 164 chID := chDesc.ID 165 // No two reactors can share the same channel. 166 if sw.reactorsByCh[chID] != nil { 167 panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor)) 168 } 169 sw.chDescs = append(sw.chDescs, chDesc) 170 sw.reactorsByCh[chID] = reactor 171 } 172 sw.reactors[name] = reactor 173 reactor.SetSwitch(sw) 174 return reactor 175 } 176 177 // RemoveReactor removes the given Reactor from the Switch. 178 // NOTE: Not goroutine safe. 179 func (sw *Switch) RemoveReactor(name string, reactor Reactor) { 180 for _, chDesc := range reactor.GetChannels() { 181 // remove channel description 182 for i := 0; i < len(sw.chDescs); i++ { 183 if chDesc.ID == sw.chDescs[i].ID { 184 sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...) 185 break 186 } 187 } 188 delete(sw.reactorsByCh, chDesc.ID) 189 } 190 delete(sw.reactors, name) 191 reactor.SetSwitch(nil) 192 } 193 194 // Reactors returns a map of reactors registered on the switch. 195 // NOTE: Not goroutine safe. 196 func (sw *Switch) Reactors() map[string]Reactor { 197 return sw.reactors 198 } 199 200 // Reactor returns the reactor with the given name. 201 // NOTE: Not goroutine safe. 202 func (sw *Switch) Reactor(name string) Reactor { 203 return sw.reactors[name] 204 } 205 206 // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes. 207 // NOTE: Not goroutine safe. 208 func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) { 209 sw.nodeInfo = nodeInfo 210 } 211 212 // NodeInfo returns the switch's NodeInfo. 213 // NOTE: Not goroutine safe. 214 func (sw *Switch) NodeInfo() NodeInfo { 215 return sw.nodeInfo 216 } 217 218 // SetNodeKey sets the switch's private key for authenticated encryption. 219 // NOTE: Not goroutine safe. 220 func (sw *Switch) SetNodeKey(nodeKey *NodeKey) { 221 sw.nodeKey = nodeKey 222 } 223 224 //--------------------------------------------------------------------- 225 // Service start/stop 226 227 // OnStart implements BaseService. It starts all the reactors and peers. 228 func (sw *Switch) OnStart() error { 229 // Start reactors 230 for _, reactor := range sw.reactors { 231 err := reactor.Start() 232 if err != nil { 233 return errors.Wrapf(err, "failed to start %v", reactor) 234 } 235 } 236 237 // Start accepting Peers. 238 go sw.acceptRoutine() 239 240 return nil 241 } 242 243 // OnStop implements BaseService. It stops all peers and reactors. 244 func (sw *Switch) OnStop() { 245 // Stop peers 246 for _, p := range sw.peers.List() { 247 sw.stopAndRemovePeer(p, nil) 248 } 249 250 // Stop reactors 251 sw.Logger.Debug("Switch: Stopping reactors") 252 for _, reactor := range sw.reactors { 253 reactor.Stop() 254 } 255 } 256 257 //--------------------------------------------------------------------- 258 // Peers 259 260 // Broadcast runs a go routine for each attempted send, which will block trying 261 // to send for defaultSendTimeoutSeconds. Returns a channel which receives 262 // success values for each attempted send (false if times out). Channel will be 263 // closed once msg bytes are sent to all peers (or time out). 264 // 265 // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved. 266 func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool { 267 sw.Logger.Debug("Broadcast", "channel", chID, "msgBytes", fmt.Sprintf("%X", msgBytes)) 268 269 peers := sw.peers.List() 270 var wg sync.WaitGroup 271 wg.Add(len(peers)) 272 successChan := make(chan bool, len(peers)) 273 274 for _, peer := range peers { 275 go func(p Peer) { 276 defer wg.Done() 277 success := p.Send(chID, msgBytes) 278 successChan <- success 279 }(peer) 280 } 281 282 go func() { 283 wg.Wait() 284 close(successChan) 285 }() 286 287 return successChan 288 } 289 290 // NumPeers returns the count of outbound/inbound and outbound-dialing peers. 291 // unconditional peers are not counted here. 292 func (sw *Switch) NumPeers() (outbound, inbound, dialing int) { 293 peers := sw.peers.List() 294 for _, peer := range peers { 295 if peer.IsOutbound() { 296 if !sw.IsPeerUnconditional(peer.ID()) { 297 outbound++ 298 } 299 } else { 300 if !sw.IsPeerUnconditional(peer.ID()) { 301 inbound++ 302 } 303 } 304 } 305 dialing = sw.dialing.Size() 306 return 307 } 308 309 func (sw *Switch) IsPeerUnconditional(id ID) bool { 310 _, ok := sw.unconditionalPeerIDs[id] 311 return ok 312 } 313 314 // MaxNumOutboundPeers returns a maximum number of outbound peers. 315 func (sw *Switch) MaxNumOutboundPeers() int { 316 return sw.config.MaxNumOutboundPeers 317 } 318 319 // Peers returns the set of peers that are connected to the switch. 320 func (sw *Switch) Peers() IPeerSet { 321 return sw.peers 322 } 323 324 // StopPeerForError disconnects from a peer due to external error. 325 // If the peer is persistent, it will attempt to reconnect. 326 // TODO: make record depending on reason. 327 func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) { 328 sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason) 329 sw.stopAndRemovePeer(peer, reason) 330 331 if peer.IsPersistent() { 332 var addr *NetAddress 333 if peer.IsOutbound() { // socket address for outbound peers 334 addr = peer.SocketAddr() 335 } else { // self-reported address for inbound peers 336 var err error 337 addr, err = peer.NodeInfo().NetAddress() 338 if err != nil { 339 sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong", 340 "peer", peer, "err", err) 341 return 342 } 343 } 344 go sw.reconnectToPeer(addr) 345 } 346 } 347 348 // StopPeerGracefully disconnects from a peer gracefully. 349 // TODO: handle graceful disconnects. 350 func (sw *Switch) StopPeerGracefully(peer Peer) { 351 sw.Logger.Info("Stopping peer gracefully") 352 sw.stopAndRemovePeer(peer, nil) 353 } 354 355 func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) { 356 sw.transport.Cleanup(peer) 357 peer.Stop() 358 359 for _, reactor := range sw.reactors { 360 reactor.RemovePeer(peer, reason) 361 } 362 363 // Removing a peer should go last to avoid a situation where a peer 364 // reconnect to our node and the switch calls InitPeer before 365 // RemovePeer is finished. 366 // https://github.com/tendermint/tendermint/issues/3338 367 if sw.peers.Remove(peer) { 368 sw.metrics.Peers.Add(float64(-1)) 369 } 370 } 371 372 // reconnectToPeer tries to reconnect to the addr, first repeatedly 373 // with a fixed interval, then with exponential backoff. 374 // If no success after all that, it stops trying, and leaves it 375 // to the PEX/Addrbook to find the peer with the addr again 376 // NOTE: this will keep trying even if the handshake or auth fails. 377 // TODO: be more explicit with error types so we only retry on certain failures 378 // - ie. if we're getting ErrDuplicatePeer we can stop 379 // because the addrbook got us the peer back already 380 func (sw *Switch) reconnectToPeer(addr *NetAddress) { 381 if sw.reconnecting.Has(string(addr.ID)) { 382 return 383 } 384 sw.reconnecting.Set(string(addr.ID), addr) 385 defer sw.reconnecting.Delete(string(addr.ID)) 386 387 start := time.Now() 388 sw.Logger.Info("Reconnecting to peer", "addr", addr) 389 for i := 0; i < reconnectAttempts; i++ { 390 if !sw.IsRunning() { 391 return 392 } 393 394 err := sw.DialPeerWithAddress(addr) 395 if err == nil { 396 return // success 397 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 398 return 399 } 400 401 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 402 // sleep a set amount 403 sw.randomSleep(reconnectInterval) 404 continue 405 } 406 407 sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff", 408 "addr", addr, "elapsed", time.Since(start)) 409 for i := 0; i < reconnectBackOffAttempts; i++ { 410 if !sw.IsRunning() { 411 return 412 } 413 414 // sleep an exponentially increasing amount 415 sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i)) 416 sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second) 417 418 err := sw.DialPeerWithAddress(addr) 419 if err == nil { 420 return // success 421 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 422 return 423 } 424 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 425 } 426 sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start)) 427 } 428 429 // SetAddrBook allows to set address book on Switch. 430 func (sw *Switch) SetAddrBook(addrBook AddrBook) { 431 sw.addrBook = addrBook 432 } 433 434 // MarkPeerAsGood marks the given peer as good when it did something useful 435 // like contributed to consensus. 436 func (sw *Switch) MarkPeerAsGood(peer Peer) { 437 if sw.addrBook != nil { 438 sw.addrBook.MarkGood(peer.ID()) 439 } 440 } 441 442 //--------------------------------------------------------------------- 443 // Dialing 444 445 type privateAddr interface { 446 PrivateAddr() bool 447 } 448 449 func isPrivateAddr(err error) bool { 450 te, ok := errors.Cause(err).(privateAddr) 451 return ok && te.PrivateAddr() 452 } 453 454 // DialPeersAsync dials a list of peers asynchronously in random order. 455 // Used to dial peers from config on startup or from unsafe-RPC (trusted sources). 456 // It ignores ErrNetAddressLookup. However, if there are other errors, first 457 // encounter is returned. 458 // Nop if there are no peers. 459 func (sw *Switch) DialPeersAsync(peers []string) error { 460 netAddrs, errs := NewNetAddressStrings(peers) 461 // report all the errors 462 for _, err := range errs { 463 sw.Logger.Error("Error in peer's address", "err", err) 464 } 465 // return first non-ErrNetAddressLookup error 466 for _, err := range errs { 467 if _, ok := err.(ErrNetAddressLookup); ok { 468 continue 469 } 470 return err 471 } 472 sw.dialPeersAsync(netAddrs) 473 return nil 474 } 475 476 func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) { 477 ourAddr := sw.NetAddress() 478 479 // TODO: this code feels like it's in the wrong place. 480 // The integration tests depend on the addrBook being saved 481 // right away but maybe we can change that. Recall that 482 // the addrBook is only written to disk every 2min 483 if sw.addrBook != nil { 484 // add peers to `addrBook` 485 for _, netAddr := range netAddrs { 486 // do not add our address or ID 487 if !netAddr.Same(ourAddr) { 488 if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil { 489 if isPrivateAddr(err) { 490 sw.Logger.Debug("Won't add peer's address to addrbook", "err", err) 491 } else { 492 sw.Logger.Error("Can't add peer's address to addrbook", "err", err) 493 } 494 } 495 } 496 } 497 // Persist some peers to disk right away. 498 // NOTE: integration tests depend on this 499 sw.addrBook.Save() 500 } 501 502 // permute the list, dial them in random order. 503 perm := sw.rng.Perm(len(netAddrs)) 504 for i := 0; i < len(perm); i++ { 505 go func(i int) { 506 j := perm[i] 507 addr := netAddrs[j] 508 509 if addr.Same(ourAddr) { 510 sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr) 511 return 512 } 513 514 sw.randomSleep(0) 515 516 err := sw.DialPeerWithAddress(addr) 517 if err != nil { 518 switch err.(type) { 519 case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress: 520 sw.Logger.Debug("Error dialing peer", "err", err) 521 default: 522 sw.Logger.Error("Error dialing peer", "err", err) 523 } 524 } 525 }(i) 526 } 527 } 528 529 // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects 530 // and authenticates successfully. 531 // If we're currently dialing this address or it belongs to an existing peer, 532 // ErrCurrentlyDialingOrExistingAddress is returned. 533 func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error { 534 if sw.IsDialingOrExistingAddress(addr) { 535 return ErrCurrentlyDialingOrExistingAddress{addr.String()} 536 } 537 538 sw.dialing.Set(string(addr.ID), addr) 539 defer sw.dialing.Delete(string(addr.ID)) 540 541 return sw.addOutboundPeerWithConfig(addr, sw.config) 542 } 543 544 // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds] 545 func (sw *Switch) randomSleep(interval time.Duration) { 546 r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond 547 time.Sleep(r + interval) 548 } 549 550 // IsDialingOrExistingAddress returns true if switch has a peer with the given 551 // address or dialing it at the moment. 552 func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool { 553 return sw.dialing.Has(string(addr.ID)) || 554 sw.peers.Has(addr.ID) || 555 (!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP)) 556 } 557 558 // AddPersistentPeers allows you to set persistent peers. It ignores 559 // ErrNetAddressLookup. However, if there are other errors, first encounter is 560 // returned. 561 func (sw *Switch) AddPersistentPeers(addrs []string) error { 562 sw.Logger.Info("Adding persistent peers", "addrs", addrs) 563 netAddrs, errs := NewNetAddressStrings(addrs) 564 // report all the errors 565 for _, err := range errs { 566 sw.Logger.Error("Error in peer's address", "err", err) 567 } 568 // return first non-ErrNetAddressLookup error 569 for _, err := range errs { 570 if _, ok := err.(ErrNetAddressLookup); ok { 571 continue 572 } 573 return err 574 } 575 sw.persistentPeersAddrs = netAddrs 576 return nil 577 } 578 579 func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error { 580 sw.Logger.Info("Adding unconditional peer ids", "ids", ids) 581 for i, id := range ids { 582 err := validateID(ID(id)) 583 if err != nil { 584 return errors.Wrapf(err, "wrong ID #%d", i) 585 } 586 sw.unconditionalPeerIDs[ID(id)] = struct{}{} 587 } 588 return nil 589 } 590 591 func (sw *Switch) IsPeerPersistent(na *NetAddress) bool { 592 for _, pa := range sw.persistentPeersAddrs { 593 if pa.Equals(na) { 594 return true 595 } 596 } 597 return false 598 } 599 600 func (sw *Switch) acceptRoutine() { 601 for { 602 p, err := sw.transport.Accept(peerConfig{ 603 chDescs: sw.chDescs, 604 onPeerError: sw.StopPeerForError, 605 reactorsByCh: sw.reactorsByCh, 606 metrics: sw.metrics, 607 isPersistent: sw.IsPeerPersistent, 608 }) 609 if err != nil { 610 switch err := err.(type) { 611 case ErrRejected: 612 if err.IsSelf() { 613 // Remove the given address from the address book and add to our addresses 614 // to avoid dialing in the future. 615 addr := err.Addr() 616 sw.addrBook.RemoveAddress(&addr) 617 sw.addrBook.AddOurAddress(&addr) 618 } 619 620 sw.Logger.Info( 621 "Inbound Peer rejected", 622 "err", err, 623 "numPeers", sw.peers.Size(), 624 ) 625 626 continue 627 case ErrFilterTimeout: 628 sw.Logger.Error( 629 "Peer filter timed out", 630 "err", err, 631 ) 632 633 continue 634 case ErrTransportClosed: 635 sw.Logger.Error( 636 "Stopped accept routine, as transport is closed", 637 "numPeers", sw.peers.Size(), 638 ) 639 default: 640 sw.Logger.Error( 641 "Accept on transport errored", 642 "err", err, 643 "numPeers", sw.peers.Size(), 644 ) 645 // We could instead have a retry loop around the acceptRoutine, 646 // but that would need to stop and let the node shutdown eventually. 647 // So might as well panic and let process managers restart the node. 648 // There's no point in letting the node run without the acceptRoutine, 649 // since it won't be able to accept new connections. 650 panic(fmt.Errorf("accept routine exited: %v", err)) 651 } 652 653 break 654 } 655 656 if !sw.IsPeerUnconditional(p.NodeInfo().ID()) { 657 // Ignore connection if we already have enough peers. 658 _, in, _ := sw.NumPeers() 659 if in >= sw.config.MaxNumInboundPeers { 660 sw.Logger.Info( 661 "Ignoring inbound connection: already have enough inbound peers", 662 "address", p.SocketAddr(), 663 "have", in, 664 "max", sw.config.MaxNumInboundPeers, 665 ) 666 667 sw.transport.Cleanup(p) 668 669 continue 670 } 671 672 } 673 674 if err := sw.addPeer(p); err != nil { 675 sw.transport.Cleanup(p) 676 if p.IsRunning() { 677 _ = p.Stop() 678 } 679 sw.Logger.Info( 680 "Ignoring inbound connection: error while adding peer", 681 "err", err, 682 "id", p.ID(), 683 ) 684 } 685 } 686 } 687 688 // dial the peer; make secret connection; authenticate against the dialed ID; 689 // add the peer. 690 // if dialing fails, start the reconnect loop. If handshake fails, it's over. 691 // If peer is started successfully, reconnectLoop will start when 692 // StopPeerForError is called. 693 func (sw *Switch) addOutboundPeerWithConfig( 694 addr *NetAddress, 695 cfg *config.P2PConfig, 696 ) error { 697 sw.Logger.Info("Dialing peer", "address", addr) 698 699 // XXX(xla): Remove the leakage of test concerns in implementation. 700 if cfg.TestDialFail { 701 go sw.reconnectToPeer(addr) 702 return fmt.Errorf("dial err (peerConfig.DialFail == true)") 703 } 704 705 p, err := sw.transport.Dial(*addr, peerConfig{ 706 chDescs: sw.chDescs, 707 onPeerError: sw.StopPeerForError, 708 isPersistent: sw.IsPeerPersistent, 709 reactorsByCh: sw.reactorsByCh, 710 metrics: sw.metrics, 711 }) 712 if err != nil { 713 if e, ok := err.(ErrRejected); ok { 714 if e.IsSelf() { 715 // Remove the given address from the address book and add to our addresses 716 // to avoid dialing in the future. 717 sw.addrBook.RemoveAddress(addr) 718 sw.addrBook.AddOurAddress(addr) 719 720 return err 721 } 722 } 723 724 // retry persistent peers after 725 // any dial error besides IsSelf() 726 if sw.IsPeerPersistent(addr) { 727 go sw.reconnectToPeer(addr) 728 } 729 730 return err 731 } 732 733 if err := sw.addPeer(p); err != nil { 734 sw.transport.Cleanup(p) 735 if p.IsRunning() { 736 _ = p.Stop() 737 } 738 return err 739 } 740 741 return nil 742 } 743 744 func (sw *Switch) filterPeer(p Peer) error { 745 // Avoid duplicate 746 if sw.peers.Has(p.ID()) { 747 return ErrRejected{id: p.ID(), isDuplicate: true} 748 } 749 750 errc := make(chan error, len(sw.peerFilters)) 751 752 for _, f := range sw.peerFilters { 753 go func(f PeerFilterFunc, p Peer, errc chan<- error) { 754 errc <- f(sw.peers, p) 755 }(f, p, errc) 756 } 757 758 for i := 0; i < cap(errc); i++ { 759 select { 760 case err := <-errc: 761 if err != nil { 762 return ErrRejected{id: p.ID(), err: err, isFiltered: true} 763 } 764 case <-time.After(sw.filterTimeout): 765 return ErrFilterTimeout{} 766 } 767 } 768 769 return nil 770 } 771 772 // addPeer starts up the Peer and adds it to the Switch. Error is returned if 773 // the peer is filtered out or failed to start or can't be added. 774 func (sw *Switch) addPeer(p Peer) error { 775 if err := sw.filterPeer(p); err != nil { 776 return err 777 } 778 779 p.SetLogger(sw.Logger.With("peer", p.SocketAddr())) 780 781 // Handle the shut down case where the switch has stopped but we're 782 // concurrently trying to add a peer. 783 if !sw.IsRunning() { 784 // XXX should this return an error or just log and terminate? 785 sw.Logger.Error("Won't start a peer - switch is not running", "peer", p) 786 return nil 787 } 788 789 // Add some data to the peer, which is required by reactors. 790 for _, reactor := range sw.reactors { 791 p = reactor.InitPeer(p) 792 } 793 794 // Start the peer's send/recv routines. 795 // Must start it before adding it to the peer set 796 // to prevent Start and Stop from being called concurrently. 797 err := p.Start() 798 if err != nil { 799 // Should never happen 800 sw.Logger.Error("Error starting peer", "err", err, "peer", p) 801 return err 802 } 803 804 // Add the peer to PeerSet. Do this before starting the reactors 805 // so that if Receive errors, we will find the peer and remove it. 806 // Add should not err since we already checked peers.Has(). 807 if err := sw.peers.Add(p); err != nil { 808 return err 809 } 810 sw.metrics.Peers.Add(float64(1)) 811 812 // Start all the reactor protocols on the peer. 813 for _, reactor := range sw.reactors { 814 reactor.AddPeer(p) 815 } 816 817 sw.Logger.Info("Added peer", "peer", p) 818 819 return nil 820 }