github.com/Finschia/ostracon@v1.1.5/p2p/switch.go (about) 1 package p2p 2 3 import ( 4 "fmt" 5 "math" 6 "sync" 7 "time" 8 9 "github.com/gogo/protobuf/proto" 10 11 "github.com/Finschia/ostracon/config" 12 "github.com/Finschia/ostracon/libs/cmap" 13 "github.com/Finschia/ostracon/libs/rand" 14 "github.com/Finschia/ostracon/libs/service" 15 "github.com/Finschia/ostracon/p2p/conn" 16 ) 17 18 const ( 19 // wait a random amount of time from this interval 20 // before dialing peers or reconnecting to help prevent DoS 21 dialRandomizerIntervalMilliseconds = 3000 22 23 // repeatedly try to reconnect for a few minutes 24 // ie. 5 * 20 = 100s 25 reconnectAttempts = 20 26 reconnectInterval = 5 * time.Second 27 28 // then move into exponential backoff mode for ~1day 29 // ie. 3**10 = 16hrs 30 reconnectBackOffAttempts = 10 31 reconnectBackOffBaseSeconds = 3 32 ) 33 34 // MConnConfig returns an MConnConfig with fields updated 35 // from the P2PConfig. 36 func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig { 37 mConfig := conn.DefaultMConnConfig() 38 mConfig.FlushThrottle = cfg.FlushThrottleTimeout 39 mConfig.SendRate = cfg.SendRate 40 mConfig.RecvRate = cfg.RecvRate 41 mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize 42 mConfig.RecvAsync = cfg.RecvAsync 43 return mConfig 44 } 45 46 //----------------------------------------------------------------------------- 47 48 // An AddrBook represents an address book from the pex package, which is used 49 // to store peer addresses. 50 type AddrBook interface { 51 AddAddress(addr *NetAddress, src *NetAddress) error 52 AddPrivateIDs([]string) 53 AddOurAddress(*NetAddress) 54 OurAddress(*NetAddress) bool 55 MarkGood(ID) 56 RemoveAddress(*NetAddress) 57 HasAddress(*NetAddress) bool 58 Save() 59 } 60 61 // PeerFilterFunc to be implemented by filter hooks after a new Peer has been 62 // fully setup. 63 type PeerFilterFunc func(IPeerSet, Peer) error 64 65 //----------------------------------------------------------------------------- 66 67 // Switch handles peer connections and exposes an API to receive incoming messages 68 // on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one 69 // or more `Channels`. So while sending outgoing messages is typically performed on the peer, 70 // incoming messages are received on the reactor. 71 type Switch struct { 72 service.BaseService 73 74 config *config.P2PConfig 75 reactors map[string]Reactor 76 chDescs []*conn.ChannelDescriptor 77 reactorsByCh map[byte]Reactor 78 msgTypeByChID map[byte]proto.Message 79 peers *PeerSet 80 dialing *cmap.CMap 81 reconnecting *cmap.CMap 82 nodeInfo NodeInfo // our node info 83 nodeKey *NodeKey // our node privkey 84 addrBook AddrBook 85 // peers addresses with whom we'll maintain constant connection 86 persistentPeersAddrs []*NetAddress 87 unconditionalPeerIDs map[ID]struct{} 88 89 transport Transport 90 91 filterTimeout time.Duration 92 peerFilters []PeerFilterFunc 93 94 rng *rand.Rand // seed for randomizing dial times and orders 95 96 metrics *Metrics 97 mlc *metricsLabelCache 98 } 99 100 // NetAddress returns the address the switch is listening on. 101 func (sw *Switch) NetAddress() *NetAddress { 102 addr := sw.transport.NetAddress() 103 return &addr 104 } 105 106 // SwitchOption sets an optional parameter on the Switch. 107 type SwitchOption func(*Switch) 108 109 // NewSwitch creates a new Switch with the given config. 110 func NewSwitch( 111 cfg *config.P2PConfig, 112 transport Transport, 113 options ...SwitchOption, 114 ) *Switch { 115 116 sw := &Switch{ 117 config: cfg, 118 reactors: make(map[string]Reactor), 119 chDescs: make([]*conn.ChannelDescriptor, 0), 120 reactorsByCh: make(map[byte]Reactor), 121 msgTypeByChID: make(map[byte]proto.Message), 122 peers: NewPeerSet(), 123 dialing: cmap.NewCMap(), 124 reconnecting: cmap.NewCMap(), 125 metrics: NopMetrics(), 126 transport: transport, 127 filterTimeout: defaultFilterTimeout, 128 persistentPeersAddrs: make([]*NetAddress, 0), 129 unconditionalPeerIDs: make(map[ID]struct{}), 130 mlc: newMetricsLabelCache(), 131 } 132 133 // Ensure we have a completely undeterministic PRNG. 134 sw.rng = rand.NewRand() 135 136 sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw) 137 138 for _, option := range options { 139 option(sw) 140 } 141 142 return sw 143 } 144 145 // SwitchFilterTimeout sets the timeout used for peer filters. 146 func SwitchFilterTimeout(timeout time.Duration) SwitchOption { 147 return func(sw *Switch) { sw.filterTimeout = timeout } 148 } 149 150 // SwitchPeerFilters sets the filters for rejection of new peers. 151 func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption { 152 return func(sw *Switch) { sw.peerFilters = filters } 153 } 154 155 // WithMetrics sets the metrics. 156 func WithMetrics(metrics *Metrics) SwitchOption { 157 return func(sw *Switch) { sw.metrics = metrics } 158 } 159 160 //--------------------------------------------------------------------- 161 // Switch setup 162 163 // AddReactor adds the given reactor to the switch. 164 // NOTE: Not goroutine safe. 165 func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor { 166 for _, chDesc := range reactor.GetChannels() { 167 chID := chDesc.ID 168 // No two reactors can share the same channel. 169 if sw.reactorsByCh[chID] != nil { 170 panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor)) 171 } 172 sw.chDescs = append(sw.chDescs, chDesc) 173 sw.reactorsByCh[chID] = reactor 174 sw.msgTypeByChID[chID] = chDesc.MessageType 175 } 176 sw.reactors[name] = reactor 177 reactor.SetSwitch(sw) 178 return reactor 179 } 180 181 // RemoveReactor removes the given Reactor from the Switch. 182 // NOTE: Not goroutine safe. 183 func (sw *Switch) RemoveReactor(name string, reactor Reactor) { 184 for _, chDesc := range reactor.GetChannels() { 185 // remove channel description 186 for i := 0; i < len(sw.chDescs); i++ { 187 if chDesc.ID == sw.chDescs[i].ID { 188 sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...) 189 break 190 } 191 } 192 delete(sw.reactorsByCh, chDesc.ID) 193 delete(sw.msgTypeByChID, chDesc.ID) 194 } 195 delete(sw.reactors, name) 196 reactor.SetSwitch(nil) 197 } 198 199 // Reactors returns a map of reactors registered on the switch. 200 // NOTE: Not goroutine safe. 201 func (sw *Switch) Reactors() map[string]Reactor { 202 return sw.reactors 203 } 204 205 // Reactor returns the reactor with the given name. 206 // NOTE: Not goroutine safe. 207 func (sw *Switch) Reactor(name string) Reactor { 208 return sw.reactors[name] 209 } 210 211 // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes. 212 // NOTE: Not goroutine safe. 213 func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) { 214 sw.nodeInfo = nodeInfo 215 } 216 217 // NodeInfo returns the switch's NodeInfo. 218 // NOTE: Not goroutine safe. 219 func (sw *Switch) NodeInfo() NodeInfo { 220 return sw.nodeInfo 221 } 222 223 // SetNodeKey sets the switch's private key for authenticated encryption. 224 // NOTE: Not goroutine safe. 225 func (sw *Switch) SetNodeKey(nodeKey *NodeKey) { 226 sw.nodeKey = nodeKey 227 } 228 229 //--------------------------------------------------------------------- 230 // Service start/stop 231 232 // OnStart implements BaseService. It starts all the reactors and peers. 233 func (sw *Switch) OnStart() error { 234 // Start reactors 235 for _, reactor := range sw.reactors { 236 err := reactor.Start() 237 if err != nil { 238 return fmt.Errorf("failed to start %v: %w", reactor, err) 239 } 240 } 241 242 // Start accepting Peers. 243 go sw.acceptRoutine() 244 245 return nil 246 } 247 248 // OnStop implements BaseService. It stops all peers and reactors. 249 func (sw *Switch) OnStop() { 250 // Stop peers 251 for _, p := range sw.peers.List() { 252 sw.stopAndRemovePeer(p, nil) 253 } 254 255 // Stop reactors 256 sw.Logger.Debug("Switch: Stopping reactors") 257 for _, reactor := range sw.reactors { 258 if err := reactor.Stop(); err != nil { 259 sw.Logger.Error("error while stopped reactor", "reactor", reactor, "error", err) 260 } 261 } 262 } 263 264 //--------------------------------------------------------------------- 265 // Peers 266 267 // BroadcastEnvelope runs a go routine for each attempted send, which will block trying 268 // to send for defaultSendTimeoutSeconds. Returns a channel which receives 269 // success values for each attempted send (false if times out). Channel will be 270 // closed once msg bytes are sent to all peers (or time out). 271 // BroadcastEnvelope sends to the peers using the SendEnvelope method. 272 // 273 // NOTE: BroadcastEnvelope uses goroutines, so order of broadcast may not be preserved. 274 func (sw *Switch) BroadcastEnvelope(e Envelope) chan bool { 275 sw.Logger.Debug("Broadcast", "channel", e.ChannelID) 276 277 peers := sw.peers.List() 278 var wg sync.WaitGroup 279 wg.Add(len(peers)) 280 successChan := make(chan bool, len(peers)) 281 282 for _, peer := range peers { 283 go func(p Peer) { 284 defer wg.Done() 285 success := SendEnvelopeShim(p, e, sw.Logger) 286 successChan <- success 287 }(peer) 288 } 289 290 go func() { 291 wg.Wait() 292 close(successChan) 293 }() 294 295 return successChan 296 } 297 298 // Broadcast runs a go routine for each attempted send, which will block trying 299 // to send for defaultSendTimeoutSeconds. Returns a channel which receives 300 // success values for each attempted send (false if times out). Channel will be 301 // closed once msg bytes are sent to all peers (or time out). 302 // Broadcast sends to the peers using the Send method. 303 // 304 // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved. 305 // 306 // Deprecated: code looking to broadcast data to all peers should use BroadcastEnvelope. 307 // Broadcast will be removed in 0.37. 308 func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool { 309 sw.Logger.Debug("Broadcast", "channel", chID) 310 311 peers := sw.peers.List() 312 var wg sync.WaitGroup 313 wg.Add(len(peers)) 314 successChan := make(chan bool, len(peers)) 315 316 for _, peer := range peers { 317 go func(p Peer) { 318 defer wg.Done() 319 success := p.Send(chID, msgBytes) 320 successChan <- success 321 }(peer) 322 } 323 324 go func() { 325 wg.Wait() 326 close(successChan) 327 }() 328 329 return successChan 330 } 331 332 // NumPeers returns the count of outbound/inbound and outbound-dialing peers. 333 // unconditional peers are not counted here. 334 func (sw *Switch) NumPeers() (outbound, inbound, dialing int) { 335 peers := sw.peers.List() 336 for _, peer := range peers { 337 if peer.IsOutbound() { 338 if !sw.IsPeerUnconditional(peer.ID()) { 339 outbound++ 340 } 341 } else { 342 if !sw.IsPeerUnconditional(peer.ID()) { 343 inbound++ 344 } 345 } 346 } 347 dialing = sw.dialing.Size() 348 return 349 } 350 351 func (sw *Switch) IsPeerUnconditional(id ID) bool { 352 _, ok := sw.unconditionalPeerIDs[id] 353 return ok 354 } 355 356 // MaxNumOutboundPeers returns a maximum number of outbound peers. 357 func (sw *Switch) MaxNumOutboundPeers() int { 358 return sw.config.MaxNumOutboundPeers 359 } 360 361 // Peers returns the set of peers that are connected to the switch. 362 func (sw *Switch) Peers() IPeerSet { 363 return sw.peers 364 } 365 366 // StopPeerForError disconnects from a peer due to external error. 367 // If the peer is persistent, it will attempt to reconnect. 368 // TODO: make record depending on reason. 369 func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) { 370 if !peer.IsRunning() { 371 return 372 } 373 374 sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason) 375 sw.stopAndRemovePeer(peer, reason) 376 377 if peer.IsPersistent() { 378 var addr *NetAddress 379 if peer.IsOutbound() { // socket address for outbound peers 380 addr = peer.SocketAddr() 381 } else { // self-reported address for inbound peers 382 var err error 383 addr, err = peer.NodeInfo().NetAddress() 384 if err != nil { 385 sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong", 386 "peer", peer, "err", err) 387 return 388 } 389 } 390 go sw.reconnectToPeer(addr) 391 } 392 } 393 394 // StopPeerGracefully disconnects from a peer gracefully. 395 // TODO: handle graceful disconnects. 396 func (sw *Switch) StopPeerGracefully(peer Peer) { 397 sw.Logger.Info("Stopping peer gracefully") 398 sw.stopAndRemovePeer(peer, nil) 399 } 400 401 func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) { 402 sw.transport.Cleanup(peer) 403 if err := peer.Stop(); err != nil { 404 sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly 405 } 406 407 for _, reactor := range sw.reactors { 408 reactor.RemovePeer(peer, reason) 409 } 410 411 // Removing a peer should go last to avoid a situation where a peer 412 // reconnect to our node and the switch calls InitPeer before 413 // RemovePeer is finished. 414 // https://github.com/tendermint/tendermint/issues/3338 415 if sw.peers.Remove(peer) { 416 sw.metrics.Peers.Add(float64(-1)) 417 } else { 418 // Removal of the peer has failed. The function above sets a flag within the peer to mark this. 419 // We keep this message here as information to the developer. 420 sw.Logger.Debug("error on peer removal", ",", "peer", peer.ID()) 421 } 422 } 423 424 // reconnectToPeer tries to reconnect to the addr, first repeatedly 425 // with a fixed interval, then with exponential backoff. 426 // If no success after all that, it stops trying, and leaves it 427 // to the PEX/Addrbook to find the peer with the addr again 428 // NOTE: this will keep trying even if the handshake or auth fails. 429 // TODO: be more explicit with error types so we only retry on certain failures 430 // - ie. if we're getting ErrDuplicatePeer we can stop 431 // because the addrbook got us the peer back already 432 func (sw *Switch) reconnectToPeer(addr *NetAddress) { 433 if sw.reconnecting.Has(string(addr.ID)) { 434 return 435 } 436 sw.reconnecting.Set(string(addr.ID), addr) 437 defer sw.reconnecting.Delete(string(addr.ID)) 438 439 start := time.Now() 440 sw.Logger.Info("Reconnecting to peer", "addr", addr) 441 for i := 0; i < reconnectAttempts; i++ { 442 if !sw.IsRunning() { 443 return 444 } 445 446 err := sw.DialPeerWithAddress(addr) 447 if err == nil { 448 return // success 449 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 450 return 451 } 452 453 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 454 // sleep a set amount 455 sw.randomSleep(reconnectInterval) 456 continue 457 } 458 459 sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff", 460 "addr", addr, "elapsed", time.Since(start)) 461 for i := 0; i < reconnectBackOffAttempts; i++ { 462 if !sw.IsRunning() { 463 return 464 } 465 466 // sleep an exponentially increasing amount 467 sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i)) 468 sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second) 469 470 err := sw.DialPeerWithAddress(addr) 471 if err == nil { 472 return // success 473 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 474 return 475 } 476 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 477 } 478 sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start)) 479 } 480 481 // SetAddrBook allows to set address book on Switch. 482 func (sw *Switch) SetAddrBook(addrBook AddrBook) { 483 sw.addrBook = addrBook 484 } 485 486 // MarkPeerAsGood marks the given peer as good when it did something useful 487 // like contributed to consensus. 488 func (sw *Switch) MarkPeerAsGood(peer Peer) { 489 if sw.addrBook != nil { 490 sw.addrBook.MarkGood(peer.ID()) 491 } 492 } 493 494 //--------------------------------------------------------------------- 495 // Dialing 496 497 type privateAddr interface { 498 PrivateAddr() bool 499 } 500 501 func isPrivateAddr(err error) bool { 502 te, ok := err.(privateAddr) 503 return ok && te.PrivateAddr() 504 } 505 506 // DialPeersAsync dials a list of peers asynchronously in random order. 507 // Used to dial peers from config on startup or from unsafe-RPC (trusted sources). 508 // It ignores ErrNetAddressLookup. However, if there are other errors, first 509 // encounter is returned. 510 // Nop if there are no peers. 511 func (sw *Switch) DialPeersAsync(peers []string) error { 512 netAddrs, errs := NewNetAddressStrings(peers) 513 // report all the errors 514 for _, err := range errs { 515 sw.Logger.Error("Error in peer's address", "err", err) 516 } 517 // return first non-ErrNetAddressLookup error 518 for _, err := range errs { 519 if _, ok := err.(ErrNetAddressLookup); ok { 520 continue 521 } 522 return err 523 } 524 sw.dialPeersAsync(netAddrs) 525 return nil 526 } 527 528 func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) { 529 ourAddr := sw.NetAddress() 530 531 // TODO: this code feels like it's in the wrong place. 532 // The integration tests depend on the addrBook being saved 533 // right away but maybe we can change that. Recall that 534 // the addrBook is only written to disk every 2min 535 if sw.addrBook != nil { 536 // add peers to `addrBook` 537 for _, netAddr := range netAddrs { 538 // do not add our address or ID 539 if !netAddr.Same(ourAddr) { 540 if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil { 541 if isPrivateAddr(err) { 542 sw.Logger.Debug("Won't add peer's address to addrbook", "err", err) 543 } else { 544 sw.Logger.Error("Can't add peer's address to addrbook", "err", err) 545 } 546 } 547 } 548 } 549 // Persist some peers to disk right away. 550 // NOTE: integration tests depend on this 551 sw.addrBook.Save() 552 } 553 554 // permute the list, dial them in random order. 555 perm := sw.rng.Perm(len(netAddrs)) 556 for i := 0; i < len(perm); i++ { 557 go func(i int) { 558 j := perm[i] 559 addr := netAddrs[j] 560 561 if addr.Same(ourAddr) { 562 sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr) 563 return 564 } 565 566 sw.randomSleep(0) 567 568 err := sw.DialPeerWithAddress(addr) 569 if err != nil { 570 switch err.(type) { 571 case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress: 572 sw.Logger.Debug("Error dialing peer", "err", err) 573 default: 574 sw.Logger.Error("Error dialing peer", "err", err) 575 } 576 } 577 }(i) 578 } 579 } 580 581 // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects 582 // and authenticates successfully. 583 // If we're currently dialing this address or it belongs to an existing peer, 584 // ErrCurrentlyDialingOrExistingAddress is returned. 585 func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error { 586 if sw.IsDialingOrExistingAddress(addr) { 587 return ErrCurrentlyDialingOrExistingAddress{addr.String()} 588 } 589 590 sw.dialing.Set(string(addr.ID), addr) 591 defer sw.dialing.Delete(string(addr.ID)) 592 593 return sw.addOutboundPeerWithConfig(addr, sw.config) 594 } 595 596 // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds] 597 func (sw *Switch) randomSleep(interval time.Duration) { 598 r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond 599 time.Sleep(r + interval) 600 } 601 602 // IsDialingOrExistingAddress returns true if switch has a peer with the given 603 // address or dialing it at the moment. 604 func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool { 605 return sw.dialing.Has(string(addr.ID)) || 606 sw.peers.Has(addr.ID) || 607 (!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP)) 608 } 609 610 // AddPersistentPeers allows you to set persistent peers. It ignores 611 // ErrNetAddressLookup. However, if there are other errors, first encounter is 612 // returned. 613 func (sw *Switch) AddPersistentPeers(addrs []string) error { 614 sw.Logger.Info("Adding persistent peers", "addrs", addrs) 615 netAddrs, errs := NewNetAddressStrings(addrs) 616 // report all the errors 617 for _, err := range errs { 618 sw.Logger.Error("Error in peer's address", "err", err) 619 } 620 // return first non-ErrNetAddressLookup error 621 for _, err := range errs { 622 if _, ok := err.(ErrNetAddressLookup); ok { 623 continue 624 } 625 return err 626 } 627 sw.persistentPeersAddrs = netAddrs 628 return nil 629 } 630 631 func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error { 632 sw.Logger.Info("Adding unconditional peer ids", "ids", ids) 633 for i, id := range ids { 634 err := validateID(ID(id)) 635 if err != nil { 636 return fmt.Errorf("wrong ID #%d: %w", i, err) 637 } 638 sw.unconditionalPeerIDs[ID(id)] = struct{}{} 639 } 640 return nil 641 } 642 643 func (sw *Switch) AddPrivatePeerIDs(ids []string) error { 644 validIDs := make([]string, 0, len(ids)) 645 for i, id := range ids { 646 err := validateID(ID(id)) 647 if err != nil { 648 return fmt.Errorf("wrong ID #%d: %w", i, err) 649 } 650 validIDs = append(validIDs, id) 651 } 652 653 sw.addrBook.AddPrivateIDs(validIDs) 654 655 return nil 656 } 657 658 func (sw *Switch) IsPeerPersistent(na *NetAddress) bool { 659 for _, pa := range sw.persistentPeersAddrs { 660 if pa.Equals(na) { 661 return true 662 } 663 } 664 return false 665 } 666 667 func (sw *Switch) acceptRoutine() { 668 for { 669 p, err := sw.transport.Accept(peerConfig{ 670 chDescs: sw.chDescs, 671 onPeerError: sw.StopPeerForError, 672 reactorsByCh: sw.reactorsByCh, 673 msgTypeByChID: sw.msgTypeByChID, 674 metrics: sw.metrics, 675 mlc: sw.mlc, 676 isPersistent: sw.IsPeerPersistent, 677 }) 678 if err != nil { 679 switch err := err.(type) { 680 case ErrRejected: 681 if err.IsSelf() { 682 // Remove the given address from the address book and add to our addresses 683 // to avoid dialing in the future. 684 addr := err.Addr() 685 sw.addrBook.RemoveAddress(&addr) 686 sw.addrBook.AddOurAddress(&addr) 687 } 688 689 sw.Logger.Info( 690 "Inbound Peer rejected", 691 "err", err, 692 "numPeers", sw.peers.Size(), 693 ) 694 695 continue 696 case ErrFilterTimeout: 697 sw.Logger.Error( 698 "Peer filter timed out", 699 "err", err, 700 ) 701 702 continue 703 case ErrTransportClosed: 704 sw.Logger.Error( 705 "Stopped accept routine, as transport is closed", 706 "numPeers", sw.peers.Size(), 707 ) 708 default: 709 sw.Logger.Error( 710 "Accept on transport errored", 711 "err", err, 712 "numPeers", sw.peers.Size(), 713 ) 714 // We could instead have a retry loop around the acceptRoutine, 715 // but that would need to stop and let the node shutdown eventually. 716 // So might as well panic and let process managers restart the node. 717 // There's no point in letting the node run without the acceptRoutine, 718 // since it won't be able to accept new connections. 719 panic(fmt.Errorf("accept routine exited: %v", err)) 720 } 721 722 break 723 } 724 725 if !sw.IsPeerUnconditional(p.NodeInfo().ID()) { 726 // Ignore connection if we already have enough peers. 727 _, in, _ := sw.NumPeers() 728 if in >= sw.config.MaxNumInboundPeers { 729 sw.Logger.Info( 730 "Ignoring inbound connection: already have enough inbound peers", 731 "address", p.SocketAddr(), 732 "have", in, 733 "max", sw.config.MaxNumInboundPeers, 734 ) 735 736 sw.transport.Cleanup(p) 737 738 continue 739 } 740 741 } 742 743 if err := sw.addPeer(p); err != nil { 744 sw.transport.Cleanup(p) 745 if p.IsRunning() { 746 _ = p.Stop() 747 } 748 sw.Logger.Info( 749 "Ignoring inbound connection: error while adding peer", 750 "err", err, 751 "id", p.ID(), 752 ) 753 } 754 } 755 } 756 757 // dial the peer; make secret connection; authenticate against the dialed ID; 758 // add the peer. 759 // if dialing fails, start the reconnect loop. If handshake fails, it's over. 760 // If peer is started successfully, reconnectLoop will start when 761 // StopPeerForError is called. 762 func (sw *Switch) addOutboundPeerWithConfig( 763 addr *NetAddress, 764 cfg *config.P2PConfig, 765 ) error { 766 sw.Logger.Info("Dialing peer", "address", addr) 767 768 // XXX(xla): Remove the leakage of test concerns in implementation. 769 if cfg.TestDialFail { 770 go sw.reconnectToPeer(addr) 771 return fmt.Errorf("dial err (peerConfig.DialFail == true)") 772 } 773 774 p, err := sw.transport.Dial(*addr, peerConfig{ 775 chDescs: sw.chDescs, 776 onPeerError: sw.StopPeerForError, 777 isPersistent: sw.IsPeerPersistent, 778 reactorsByCh: sw.reactorsByCh, 779 msgTypeByChID: sw.msgTypeByChID, 780 metrics: sw.metrics, 781 mlc: sw.mlc, 782 }) 783 if err != nil { 784 if e, ok := err.(ErrRejected); ok { 785 if e.IsSelf() { 786 // Remove the given address from the address book and add to our addresses 787 // to avoid dialing in the future. 788 sw.addrBook.RemoveAddress(addr) 789 sw.addrBook.AddOurAddress(addr) 790 791 return err 792 } 793 } 794 795 // retry persistent peers after 796 // any dial error besides IsSelf() 797 if sw.IsPeerPersistent(addr) { 798 go sw.reconnectToPeer(addr) 799 } 800 801 return err 802 } 803 804 if err := sw.addPeer(p); err != nil { 805 sw.transport.Cleanup(p) 806 if p.IsRunning() { 807 _ = p.Stop() 808 } 809 return err 810 } 811 812 return nil 813 } 814 815 func (sw *Switch) filterPeer(p Peer) error { 816 // Avoid duplicate 817 if sw.peers.Has(p.ID()) { 818 return ErrRejected{id: p.ID(), isDuplicate: true} 819 } 820 821 errc := make(chan error, len(sw.peerFilters)) 822 823 for _, f := range sw.peerFilters { 824 go func(f PeerFilterFunc, p Peer, errc chan<- error) { 825 errc <- f(sw.peers, p) 826 }(f, p, errc) 827 } 828 829 for i := 0; i < cap(errc); i++ { 830 select { 831 case err := <-errc: 832 if err != nil { 833 return ErrRejected{id: p.ID(), err: err, isFiltered: true} 834 } 835 case <-time.After(sw.filterTimeout): 836 return ErrFilterTimeout{} 837 } 838 } 839 840 return nil 841 } 842 843 // addPeer starts up the Peer and adds it to the Switch. Error is returned if 844 // the peer is filtered out or failed to start or can't be added. 845 func (sw *Switch) addPeer(p Peer) error { 846 if err := sw.filterPeer(p); err != nil { 847 return err 848 } 849 850 p.SetLogger(sw.Logger.With("peer", p.SocketAddr())) 851 852 // Handle the shut down case where the switch has stopped but we're 853 // concurrently trying to add a peer. 854 if !sw.IsRunning() { 855 // XXX should this return an error or just log and terminate? 856 sw.Logger.Error("Won't start a peer - switch is not running", "peer", p) 857 return nil 858 } 859 860 // Add some data to the peer, which is required by reactors. 861 for _, reactor := range sw.reactors { 862 p = reactor.InitPeer(p) 863 } 864 865 // Start the peer's send/recv routines. 866 // Must start it before adding it to the peer set 867 // to prevent Start and Stop from being called concurrently. 868 err := p.Start() 869 if err != nil { 870 // Should never happen 871 sw.Logger.Error("Error starting peer", "err", err, "peer", p) 872 return err 873 } 874 875 // Add the peer to PeerSet. Do this before starting the reactors 876 // so that if Receive errors, we will find the peer and remove it. 877 // Add should not err since we already checked peers.Has(). 878 if err := sw.peers.Add(p); err != nil { 879 switch err.(type) { 880 case ErrPeerRemoval: 881 sw.Logger.Error("Error starting peer ", 882 " err ", "Peer has already errored and removal was attempted.", 883 "peer", p.ID()) 884 } 885 return err 886 } 887 sw.metrics.Peers.Add(float64(1)) 888 889 // Start all the reactor protocols on the peer. 890 for _, reactor := range sw.reactors { 891 reactor.AddPeer(p) 892 } 893 894 sw.Logger.Info("Added peer", "peer", p) 895 896 return nil 897 }