github.com/line/ostracon@v1.0.10-0.20230328032236-7f20145f065d/p2p/switch.go (about) 1 package p2p 2 3 import ( 4 "fmt" 5 "math" 6 "sync" 7 "time" 8 9 "github.com/line/ostracon/config" 10 "github.com/line/ostracon/libs/cmap" 11 "github.com/line/ostracon/libs/rand" 12 "github.com/line/ostracon/libs/service" 13 "github.com/line/ostracon/p2p/conn" 14 ) 15 16 const ( 17 // wait a random amount of time from this interval 18 // before dialing peers or reconnecting to help prevent DoS 19 dialRandomizerIntervalMilliseconds = 3000 20 21 // repeatedly try to reconnect for a few minutes 22 // ie. 5 * 20 = 100s 23 reconnectAttempts = 20 24 reconnectInterval = 5 * time.Second 25 26 // then move into exponential backoff mode for ~1day 27 // ie. 3**10 = 16hrs 28 reconnectBackOffAttempts = 10 29 reconnectBackOffBaseSeconds = 3 30 ) 31 32 // MConnConfig returns an MConnConfig with fields updated 33 // from the P2PConfig. 34 func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig { 35 mConfig := conn.DefaultMConnConfig() 36 mConfig.FlushThrottle = cfg.FlushThrottleTimeout 37 mConfig.SendRate = cfg.SendRate 38 mConfig.RecvRate = cfg.RecvRate 39 mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize 40 mConfig.RecvAsync = cfg.RecvAsync 41 return mConfig 42 } 43 44 //----------------------------------------------------------------------------- 45 46 // An AddrBook represents an address book from the pex package, which is used 47 // to store peer addresses. 48 type AddrBook interface { 49 AddAddress(addr *NetAddress, src *NetAddress) error 50 AddPrivateIDs([]string) 51 AddOurAddress(*NetAddress) 52 OurAddress(*NetAddress) bool 53 MarkGood(ID) 54 RemoveAddress(*NetAddress) 55 HasAddress(*NetAddress) bool 56 Save() 57 } 58 59 // PeerFilterFunc to be implemented by filter hooks after a new Peer has been 60 // fully setup. 61 type PeerFilterFunc func(IPeerSet, Peer) error 62 63 //----------------------------------------------------------------------------- 64 65 // Switch handles peer connections and exposes an API to receive incoming messages 66 // on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one 67 // or more `Channels`. So while sending outgoing messages is typically performed on the peer, 68 // incoming messages are received on the reactor. 69 type Switch struct { 70 service.BaseService 71 72 config *config.P2PConfig 73 reactors map[string]Reactor 74 chDescs []*conn.ChannelDescriptor 75 reactorsByCh map[byte]Reactor 76 peers *PeerSet 77 dialing *cmap.CMap 78 reconnecting *cmap.CMap 79 nodeInfo NodeInfo // our node info 80 nodeKey *NodeKey // our node privkey 81 addrBook AddrBook 82 // peers addresses with whom we'll maintain constant connection 83 persistentPeersAddrs []*NetAddress 84 unconditionalPeerIDs map[ID]struct{} 85 86 transport Transport 87 88 filterTimeout time.Duration 89 peerFilters []PeerFilterFunc 90 91 rng *rand.Rand // seed for randomizing dial times and orders 92 93 metrics *Metrics 94 } 95 96 // NetAddress returns the address the switch is listening on. 97 func (sw *Switch) NetAddress() *NetAddress { 98 addr := sw.transport.NetAddress() 99 return &addr 100 } 101 102 // SwitchOption sets an optional parameter on the Switch. 103 type SwitchOption func(*Switch) 104 105 // NewSwitch creates a new Switch with the given config. 106 func NewSwitch( 107 cfg *config.P2PConfig, 108 transport Transport, 109 options ...SwitchOption, 110 ) *Switch { 111 sw := &Switch{ 112 config: cfg, 113 reactors: make(map[string]Reactor), 114 chDescs: make([]*conn.ChannelDescriptor, 0), 115 reactorsByCh: make(map[byte]Reactor), 116 peers: NewPeerSet(), 117 dialing: cmap.NewCMap(), 118 reconnecting: cmap.NewCMap(), 119 metrics: NopMetrics(), 120 transport: transport, 121 filterTimeout: defaultFilterTimeout, 122 persistentPeersAddrs: make([]*NetAddress, 0), 123 unconditionalPeerIDs: make(map[ID]struct{}), 124 } 125 126 // Ensure we have a completely undeterministic PRNG. 127 sw.rng = rand.NewRand() 128 129 sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw) 130 131 for _, option := range options { 132 option(sw) 133 } 134 135 return sw 136 } 137 138 // SwitchFilterTimeout sets the timeout used for peer filters. 139 func SwitchFilterTimeout(timeout time.Duration) SwitchOption { 140 return func(sw *Switch) { sw.filterTimeout = timeout } 141 } 142 143 // SwitchPeerFilters sets the filters for rejection of new peers. 144 func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption { 145 return func(sw *Switch) { sw.peerFilters = filters } 146 } 147 148 // WithMetrics sets the metrics. 149 func WithMetrics(metrics *Metrics) SwitchOption { 150 return func(sw *Switch) { sw.metrics = metrics } 151 } 152 153 //--------------------------------------------------------------------- 154 // Switch setup 155 156 // AddReactor adds the given reactor to the switch. 157 // NOTE: Not goroutine safe. 158 func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor { 159 for _, chDesc := range reactor.GetChannels() { 160 chID := chDesc.ID 161 // No two reactors can share the same channel. 162 if sw.reactorsByCh[chID] != nil { 163 panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor)) 164 } 165 sw.chDescs = append(sw.chDescs, chDesc) 166 sw.reactorsByCh[chID] = reactor 167 } 168 sw.reactors[name] = reactor 169 reactor.SetSwitch(sw) 170 return reactor 171 } 172 173 // RemoveReactor removes the given Reactor from the Switch. 174 // NOTE: Not goroutine safe. 175 func (sw *Switch) RemoveReactor(name string, reactor Reactor) { 176 for _, chDesc := range reactor.GetChannels() { 177 // remove channel description 178 for i := 0; i < len(sw.chDescs); i++ { 179 if chDesc.ID == sw.chDescs[i].ID { 180 sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...) 181 break 182 } 183 } 184 delete(sw.reactorsByCh, chDesc.ID) 185 } 186 delete(sw.reactors, name) 187 reactor.SetSwitch(nil) 188 } 189 190 // Reactors returns a map of reactors registered on the switch. 191 // NOTE: Not goroutine safe. 192 func (sw *Switch) Reactors() map[string]Reactor { 193 return sw.reactors 194 } 195 196 // Reactor returns the reactor with the given name. 197 // NOTE: Not goroutine safe. 198 func (sw *Switch) Reactor(name string) Reactor { 199 return sw.reactors[name] 200 } 201 202 // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes. 203 // NOTE: Not goroutine safe. 204 func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) { 205 sw.nodeInfo = nodeInfo 206 } 207 208 // NodeInfo returns the switch's NodeInfo. 209 // NOTE: Not goroutine safe. 210 func (sw *Switch) NodeInfo() NodeInfo { 211 return sw.nodeInfo 212 } 213 214 // SetNodeKey sets the switch's private key for authenticated encryption. 215 // NOTE: Not goroutine safe. 216 func (sw *Switch) SetNodeKey(nodeKey *NodeKey) { 217 sw.nodeKey = nodeKey 218 } 219 220 //--------------------------------------------------------------------- 221 // Service start/stop 222 223 // OnStart implements BaseService. It starts all the reactors and peers. 224 func (sw *Switch) OnStart() error { 225 // Start reactors 226 for _, reactor := range sw.reactors { 227 err := reactor.Start() 228 if err != nil { 229 return fmt.Errorf("failed to start %v: %w", reactor, err) 230 } 231 } 232 233 // Start accepting Peers. 234 go sw.acceptRoutine() 235 236 return nil 237 } 238 239 // OnStop implements BaseService. It stops all peers and reactors. 240 func (sw *Switch) OnStop() { 241 // Stop peers 242 for _, p := range sw.peers.List() { 243 sw.stopAndRemovePeer(p, nil) 244 } 245 246 // Stop reactors 247 sw.Logger.Debug("Switch: Stopping reactors") 248 for _, reactor := range sw.reactors { 249 if err := reactor.Stop(); err != nil { 250 sw.Logger.Error("error while stopped reactor", "reactor", reactor, "error", err) 251 } 252 } 253 } 254 255 //--------------------------------------------------------------------- 256 // Peers 257 258 // Broadcast runs a go routine for each attempted send, which will block trying 259 // to send for defaultSendTimeoutSeconds. Returns a channel which receives 260 // success values for each attempted send (false if times out). Channel will be 261 // closed once msg bytes are sent to all peers (or time out). 262 // 263 // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved. 264 func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool { 265 sw.Logger.Debug("Broadcast", "channel", chID, "msgBytes", fmt.Sprintf("%X", msgBytes)) 266 267 peers := sw.peers.List() 268 var wg sync.WaitGroup 269 wg.Add(len(peers)) 270 successChan := make(chan bool, len(peers)) 271 272 for _, peer := range peers { 273 go func(p Peer) { 274 defer wg.Done() 275 success := p.Send(chID, msgBytes) 276 successChan <- success 277 }(peer) 278 } 279 280 go func() { 281 wg.Wait() 282 close(successChan) 283 }() 284 285 return successChan 286 } 287 288 // NumPeers returns the count of outbound/inbound and outbound-dialing peers. 289 // unconditional peers are not counted here. 290 func (sw *Switch) NumPeers() (outbound, inbound, dialing int) { 291 peers := sw.peers.List() 292 for _, peer := range peers { 293 if peer.IsOutbound() { 294 if !sw.IsPeerUnconditional(peer.ID()) { 295 outbound++ 296 } 297 } else { 298 if !sw.IsPeerUnconditional(peer.ID()) { 299 inbound++ 300 } 301 } 302 } 303 dialing = sw.dialing.Size() 304 return 305 } 306 307 func (sw *Switch) IsPeerUnconditional(id ID) bool { 308 _, ok := sw.unconditionalPeerIDs[id] 309 return ok 310 } 311 312 // MaxNumOutboundPeers returns a maximum number of outbound peers. 313 func (sw *Switch) MaxNumOutboundPeers() int { 314 return sw.config.MaxNumOutboundPeers 315 } 316 317 // Peers returns the set of peers that are connected to the switch. 318 func (sw *Switch) Peers() IPeerSet { 319 return sw.peers 320 } 321 322 // StopPeerForError disconnects from a peer due to external error. 323 // If the peer is persistent, it will attempt to reconnect. 324 // TODO: make record depending on reason. 325 func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) { 326 if !peer.IsRunning() { 327 return 328 } 329 330 sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason) 331 sw.stopAndRemovePeer(peer, reason) 332 333 if peer.IsPersistent() { 334 var addr *NetAddress 335 if peer.IsOutbound() { // socket address for outbound peers 336 addr = peer.SocketAddr() 337 } else { // self-reported address for inbound peers 338 var err error 339 addr, err = peer.NodeInfo().NetAddress() 340 if err != nil { 341 sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong", 342 "peer", peer, "err", err) 343 return 344 } 345 } 346 go sw.reconnectToPeer(addr) 347 } 348 } 349 350 // StopPeerGracefully disconnects from a peer gracefully. 351 // TODO: handle graceful disconnects. 352 func (sw *Switch) StopPeerGracefully(peer Peer) { 353 sw.Logger.Info("Stopping peer gracefully") 354 sw.stopAndRemovePeer(peer, nil) 355 } 356 357 func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) { 358 sw.transport.Cleanup(peer) 359 if err := peer.Stop(); err != nil { 360 sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly 361 } 362 363 for _, reactor := range sw.reactors { 364 reactor.RemovePeer(peer, reason) 365 } 366 367 // Removing a peer should go last to avoid a situation where a peer 368 // reconnect to our node and the switch calls InitPeer before 369 // RemovePeer is finished. 370 // https://github.com/tendermint/tendermint/issues/3338 371 if sw.peers.Remove(peer) { 372 sw.metrics.Peers.Add(float64(-1)) 373 } 374 } 375 376 // reconnectToPeer tries to reconnect to the addr, first repeatedly 377 // with a fixed interval, then with exponential backoff. 378 // If no success after all that, it stops trying, and leaves it 379 // to the PEX/Addrbook to find the peer with the addr again 380 // NOTE: this will keep trying even if the handshake or auth fails. 381 // TODO: be more explicit with error types so we only retry on certain failures 382 // - ie. if we're getting ErrDuplicatePeer we can stop 383 // because the addrbook got us the peer back already 384 func (sw *Switch) reconnectToPeer(addr *NetAddress) { 385 if sw.reconnecting.Has(string(addr.ID)) { 386 return 387 } 388 sw.reconnecting.Set(string(addr.ID), addr) 389 defer sw.reconnecting.Delete(string(addr.ID)) 390 391 start := time.Now() 392 sw.Logger.Info("Reconnecting to peer", "addr", addr) 393 for i := 0; i < reconnectAttempts; i++ { 394 if !sw.IsRunning() { 395 return 396 } 397 398 err := sw.DialPeerWithAddress(addr) 399 if err == nil { 400 return // success 401 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 402 return 403 } 404 405 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 406 // sleep a set amount 407 sw.randomSleep(reconnectInterval) 408 continue 409 } 410 411 sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff", 412 "addr", addr, "elapsed", time.Since(start)) 413 for i := 0; i < reconnectBackOffAttempts; i++ { 414 if !sw.IsRunning() { 415 return 416 } 417 418 // sleep an exponentially increasing amount 419 sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i)) 420 sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second) 421 422 err := sw.DialPeerWithAddress(addr) 423 if err == nil { 424 return // success 425 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 426 return 427 } 428 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 429 } 430 sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start)) 431 } 432 433 // SetAddrBook allows to set address book on Switch. 434 func (sw *Switch) SetAddrBook(addrBook AddrBook) { 435 sw.addrBook = addrBook 436 } 437 438 // MarkPeerAsGood marks the given peer as good when it did something useful 439 // like contributed to consensus. 440 func (sw *Switch) MarkPeerAsGood(peer Peer) { 441 if sw.addrBook != nil { 442 sw.addrBook.MarkGood(peer.ID()) 443 } 444 } 445 446 //--------------------------------------------------------------------- 447 // Dialing 448 449 type privateAddr interface { 450 PrivateAddr() bool 451 } 452 453 func isPrivateAddr(err error) bool { 454 te, ok := err.(privateAddr) 455 return ok && te.PrivateAddr() 456 } 457 458 // DialPeersAsync dials a list of peers asynchronously in random order. 459 // Used to dial peers from config on startup or from unsafe-RPC (trusted sources). 460 // It ignores ErrNetAddressLookup. However, if there are other errors, first 461 // encounter is returned. 462 // Nop if there are no peers. 463 func (sw *Switch) DialPeersAsync(peers []string) error { 464 netAddrs, errs := NewNetAddressStrings(peers) 465 // report all the errors 466 for _, err := range errs { 467 sw.Logger.Error("Error in peer's address", "err", err) 468 } 469 // return first non-ErrNetAddressLookup error 470 for _, err := range errs { 471 if _, ok := err.(ErrNetAddressLookup); ok { 472 continue 473 } 474 return err 475 } 476 sw.dialPeersAsync(netAddrs) 477 return nil 478 } 479 480 func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) { 481 ourAddr := sw.NetAddress() 482 483 // TODO: this code feels like it's in the wrong place. 484 // The integration tests depend on the addrBook being saved 485 // right away but maybe we can change that. Recall that 486 // the addrBook is only written to disk every 2min 487 if sw.addrBook != nil { 488 // add peers to `addrBook` 489 for _, netAddr := range netAddrs { 490 // do not add our address or ID 491 if !netAddr.Same(ourAddr) { 492 if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil { 493 if isPrivateAddr(err) { 494 sw.Logger.Debug("Won't add peer's address to addrbook", "err", err) 495 } else { 496 sw.Logger.Error("Can't add peer's address to addrbook", "err", err) 497 } 498 } 499 } 500 } 501 // Persist some peers to disk right away. 502 // NOTE: integration tests depend on this 503 sw.addrBook.Save() 504 } 505 506 // permute the list, dial them in random order. 507 perm := sw.rng.Perm(len(netAddrs)) 508 for i := 0; i < len(perm); i++ { 509 go func(i int) { 510 j := perm[i] 511 addr := netAddrs[j] 512 513 if addr.Same(ourAddr) { 514 sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr) 515 return 516 } 517 518 sw.randomSleep(0) 519 520 err := sw.DialPeerWithAddress(addr) 521 if err != nil { 522 switch err.(type) { 523 case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress: 524 sw.Logger.Debug("Error dialing peer", "err", err) 525 default: 526 sw.Logger.Error("Error dialing peer", "err", err) 527 } 528 } 529 }(i) 530 } 531 } 532 533 // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects 534 // and authenticates successfully. 535 // If we're currently dialing this address or it belongs to an existing peer, 536 // ErrCurrentlyDialingOrExistingAddress is returned. 537 func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error { 538 if sw.IsDialingOrExistingAddress(addr) { 539 return ErrCurrentlyDialingOrExistingAddress{addr.String()} 540 } 541 542 sw.dialing.Set(string(addr.ID), addr) 543 defer sw.dialing.Delete(string(addr.ID)) 544 545 return sw.addOutboundPeerWithConfig(addr, sw.config) 546 } 547 548 // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds] 549 func (sw *Switch) randomSleep(interval time.Duration) { 550 r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond 551 time.Sleep(r + interval) 552 } 553 554 // IsDialingOrExistingAddress returns true if switch has a peer with the given 555 // address or dialing it at the moment. 556 func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool { 557 return sw.dialing.Has(string(addr.ID)) || 558 sw.peers.Has(addr.ID) || 559 (!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP)) 560 } 561 562 // AddPersistentPeers allows you to set persistent peers. It ignores 563 // ErrNetAddressLookup. However, if there are other errors, first encounter is 564 // returned. 565 func (sw *Switch) AddPersistentPeers(addrs []string) error { 566 sw.Logger.Info("Adding persistent peers", "addrs", addrs) 567 netAddrs, errs := NewNetAddressStrings(addrs) 568 // report all the errors 569 for _, err := range errs { 570 sw.Logger.Error("Error in peer's address", "err", err) 571 } 572 // return first non-ErrNetAddressLookup error 573 for _, err := range errs { 574 if _, ok := err.(ErrNetAddressLookup); ok { 575 continue 576 } 577 return err 578 } 579 sw.persistentPeersAddrs = netAddrs 580 return nil 581 } 582 583 func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error { 584 sw.Logger.Info("Adding unconditional peer ids", "ids", ids) 585 for i, id := range ids { 586 err := validateID(ID(id)) 587 if err != nil { 588 return fmt.Errorf("wrong ID #%d: %w", i, err) 589 } 590 sw.unconditionalPeerIDs[ID(id)] = struct{}{} 591 } 592 return nil 593 } 594 595 func (sw *Switch) AddPrivatePeerIDs(ids []string) error { 596 validIDs := make([]string, 0, len(ids)) 597 for i, id := range ids { 598 err := validateID(ID(id)) 599 if err != nil { 600 return fmt.Errorf("wrong ID #%d: %w", i, err) 601 } 602 validIDs = append(validIDs, id) 603 } 604 605 sw.addrBook.AddPrivateIDs(validIDs) 606 607 return nil 608 } 609 610 func (sw *Switch) IsPeerPersistent(na *NetAddress) bool { 611 for _, pa := range sw.persistentPeersAddrs { 612 if pa.Equals(na) { 613 return true 614 } 615 } 616 return false 617 } 618 619 func (sw *Switch) acceptRoutine() { 620 for { 621 p, err := sw.transport.Accept(peerConfig{ 622 chDescs: sw.chDescs, 623 onPeerError: sw.StopPeerForError, 624 reactorsByCh: sw.reactorsByCh, 625 metrics: sw.metrics, 626 isPersistent: sw.IsPeerPersistent, 627 }) 628 if err != nil { 629 switch err := err.(type) { 630 case ErrRejected: 631 if err.IsSelf() { 632 // Remove the given address from the address book and add to our addresses 633 // to avoid dialing in the future. 634 addr := err.Addr() 635 sw.addrBook.RemoveAddress(&addr) 636 sw.addrBook.AddOurAddress(&addr) 637 } 638 639 sw.Logger.Info( 640 "Inbound Peer rejected", 641 "err", err, 642 "numPeers", sw.peers.Size(), 643 ) 644 645 continue 646 case ErrFilterTimeout: 647 sw.Logger.Error( 648 "Peer filter timed out", 649 "err", err, 650 ) 651 652 continue 653 case ErrTransportClosed: 654 sw.Logger.Error( 655 "Stopped accept routine, as transport is closed", 656 "numPeers", sw.peers.Size(), 657 ) 658 default: 659 sw.Logger.Error( 660 "Accept on transport errored", 661 "err", err, 662 "numPeers", sw.peers.Size(), 663 ) 664 // We could instead have a retry loop around the acceptRoutine, 665 // but that would need to stop and let the node shutdown eventually. 666 // So might as well panic and let process managers restart the node. 667 // There's no point in letting the node run without the acceptRoutine, 668 // since it won't be able to accept new connections. 669 panic(fmt.Errorf("accept routine exited: %v", err)) 670 } 671 672 break 673 } 674 675 if !sw.IsPeerUnconditional(p.NodeInfo().ID()) { 676 // Ignore connection if we already have enough peers. 677 _, in, _ := sw.NumPeers() 678 if in >= sw.config.MaxNumInboundPeers { 679 sw.Logger.Info( 680 "Ignoring inbound connection: already have enough inbound peers", 681 "address", p.SocketAddr(), 682 "have", in, 683 "max", sw.config.MaxNumInboundPeers, 684 ) 685 686 sw.transport.Cleanup(p) 687 688 continue 689 } 690 691 } 692 693 if err := sw.addPeer(p); err != nil { 694 sw.transport.Cleanup(p) 695 if p.IsRunning() { 696 _ = p.Stop() 697 } 698 sw.Logger.Info( 699 "Ignoring inbound connection: error while adding peer", 700 "err", err, 701 "id", p.ID(), 702 ) 703 } 704 } 705 } 706 707 // dial the peer; make secret connection; authenticate against the dialed ID; 708 // add the peer. 709 // if dialing fails, start the reconnect loop. If handshake fails, it's over. 710 // If peer is started successfully, reconnectLoop will start when 711 // StopPeerForError is called. 712 func (sw *Switch) addOutboundPeerWithConfig( 713 addr *NetAddress, 714 cfg *config.P2PConfig, 715 ) error { 716 sw.Logger.Info("Dialing peer", "address", addr) 717 718 // XXX(xla): Remove the leakage of test concerns in implementation. 719 if cfg.TestDialFail { 720 go sw.reconnectToPeer(addr) 721 return fmt.Errorf("dial err (peerConfig.DialFail == true)") 722 } 723 724 p, err := sw.transport.Dial(*addr, peerConfig{ 725 chDescs: sw.chDescs, 726 onPeerError: sw.StopPeerForError, 727 isPersistent: sw.IsPeerPersistent, 728 reactorsByCh: sw.reactorsByCh, 729 metrics: sw.metrics, 730 }) 731 if err != nil { 732 if e, ok := err.(ErrRejected); ok { 733 if e.IsSelf() { 734 // Remove the given address from the address book and add to our addresses 735 // to avoid dialing in the future. 736 sw.addrBook.RemoveAddress(addr) 737 sw.addrBook.AddOurAddress(addr) 738 739 return err 740 } 741 } 742 743 // retry persistent peers after 744 // any dial error besides IsSelf() 745 if sw.IsPeerPersistent(addr) { 746 go sw.reconnectToPeer(addr) 747 } 748 749 return err 750 } 751 752 if err := sw.addPeer(p); err != nil { 753 sw.transport.Cleanup(p) 754 if p.IsRunning() { 755 _ = p.Stop() 756 } 757 return err 758 } 759 760 return nil 761 } 762 763 func (sw *Switch) filterPeer(p Peer) error { 764 // Avoid duplicate 765 if sw.peers.Has(p.ID()) { 766 return ErrRejected{id: p.ID(), isDuplicate: true} 767 } 768 769 errc := make(chan error, len(sw.peerFilters)) 770 771 for _, f := range sw.peerFilters { 772 go func(f PeerFilterFunc, p Peer, errc chan<- error) { 773 errc <- f(sw.peers, p) 774 }(f, p, errc) 775 } 776 777 for i := 0; i < cap(errc); i++ { 778 select { 779 case err := <-errc: 780 if err != nil { 781 return ErrRejected{id: p.ID(), err: err, isFiltered: true} 782 } 783 case <-time.After(sw.filterTimeout): 784 return ErrFilterTimeout{} 785 } 786 } 787 788 return nil 789 } 790 791 // addPeer starts up the Peer and adds it to the Switch. Error is returned if 792 // the peer is filtered out or failed to start or can't be added. 793 func (sw *Switch) addPeer(p Peer) error { 794 if err := sw.filterPeer(p); err != nil { 795 return err 796 } 797 798 p.SetLogger(sw.Logger.With("peer", p.SocketAddr())) 799 800 // Handle the shut down case where the switch has stopped but we're 801 // concurrently trying to add a peer. 802 if !sw.IsRunning() { 803 // XXX should this return an error or just log and terminate? 804 sw.Logger.Error("Won't start a peer - switch is not running", "peer", p) 805 return nil 806 } 807 808 // Add some data to the peer, which is required by reactors. 809 for _, reactor := range sw.reactors { 810 p = reactor.InitPeer(p) 811 } 812 813 // Start the peer's send/recv routines. 814 // Must start it before adding it to the peer set 815 // to prevent Start and Stop from being called concurrently. 816 err := p.Start() 817 if err != nil { 818 // Should never happen 819 sw.Logger.Error("Error starting peer", "err", err, "peer", p) 820 return err 821 } 822 823 // Add the peer to PeerSet. Do this before starting the reactors 824 // so that if Receive errors, we will find the peer and remove it. 825 // Add should not err since we already checked peers.Has(). 826 if err := sw.peers.Add(p); err != nil { 827 return err 828 } 829 sw.metrics.Peers.Add(float64(1)) 830 831 // Start all the reactor protocols on the peer. 832 for _, reactor := range sw.reactors { 833 reactor.AddPeer(p) 834 } 835 836 sw.Logger.Info("Added peer", "peer", p) 837 838 return nil 839 }