github.com/pure-x-eth/consensus_tm@v0.0.0-20230502163723-e3c2ff987250/p2p/switch.go (about) 1 package p2p 2 3 import ( 4 "fmt" 5 "math" 6 "sync" 7 "time" 8 9 "github.com/gogo/protobuf/proto" 10 "github.com/pure-x-eth/consensus_tm/config" 11 "github.com/pure-x-eth/consensus_tm/libs/cmap" 12 "github.com/pure-x-eth/consensus_tm/libs/rand" 13 "github.com/pure-x-eth/consensus_tm/libs/service" 14 "github.com/pure-x-eth/consensus_tm/p2p/conn" 15 ) 16 17 const ( 18 // wait a random amount of time from this interval 19 // before dialing peers or reconnecting to help prevent DoS 20 dialRandomizerIntervalMilliseconds = 3000 21 22 // repeatedly try to reconnect for a few minutes 23 // ie. 5 * 20 = 100s 24 reconnectAttempts = 20 25 reconnectInterval = 5 * time.Second 26 27 // then move into exponential backoff mode for ~1day 28 // ie. 3**10 = 16hrs 29 reconnectBackOffAttempts = 10 30 reconnectBackOffBaseSeconds = 3 31 ) 32 33 // MConnConfig returns an MConnConfig with fields updated 34 // from the P2PConfig. 35 func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig { 36 mConfig := conn.DefaultMConnConfig() 37 mConfig.FlushThrottle = cfg.FlushThrottleTimeout 38 mConfig.SendRate = cfg.SendRate 39 mConfig.RecvRate = cfg.RecvRate 40 mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize 41 return mConfig 42 } 43 44 //----------------------------------------------------------------------------- 45 46 // An AddrBook represents an address book from the pex package, which is used 47 // to store peer addresses. 48 type AddrBook interface { 49 AddAddress(addr *NetAddress, src *NetAddress) error 50 AddPrivateIDs([]string) 51 AddOurAddress(*NetAddress) 52 OurAddress(*NetAddress) bool 53 MarkGood(ID) 54 RemoveAddress(*NetAddress) 55 HasAddress(*NetAddress) bool 56 Save() 57 } 58 59 // PeerFilterFunc to be implemented by filter hooks after a new Peer has been 60 // fully setup. 61 type PeerFilterFunc func(IPeerSet, Peer) error 62 63 //----------------------------------------------------------------------------- 64 65 // Switch handles peer connections and exposes an API to receive incoming messages 66 // on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one 67 // or more `Channels`. So while sending outgoing messages is typically performed on the peer, 68 // incoming messages are received on the reactor. 69 type Switch struct { 70 service.BaseService 71 72 config *config.P2PConfig 73 reactors map[string]Reactor 74 chDescs []*conn.ChannelDescriptor 75 reactorsByCh map[byte]Reactor 76 msgTypeByChID map[byte]proto.Message 77 peers *PeerSet 78 dialing *cmap.CMap 79 reconnecting *cmap.CMap 80 nodeInfo NodeInfo // our node info 81 nodeKey *NodeKey // our node privkey 82 addrBook AddrBook 83 // peers addresses with whom we'll maintain constant connection 84 persistentPeersAddrs []*NetAddress 85 unconditionalPeerIDs map[ID]struct{} 86 87 transport Transport 88 89 filterTimeout time.Duration 90 peerFilters []PeerFilterFunc 91 92 rng *rand.Rand // seed for randomizing dial times and orders 93 94 metrics *Metrics 95 mlc *metricsLabelCache 96 } 97 98 // NetAddress returns the address the switch is listening on. 99 func (sw *Switch) NetAddress() *NetAddress { 100 addr := sw.transport.NetAddress() 101 return &addr 102 } 103 104 // SwitchOption sets an optional parameter on the Switch. 105 type SwitchOption func(*Switch) 106 107 // NewSwitch creates a new Switch with the given config. 108 func NewSwitch( 109 cfg *config.P2PConfig, 110 transport Transport, 111 options ...SwitchOption, 112 ) *Switch { 113 114 sw := &Switch{ 115 config: cfg, 116 reactors: make(map[string]Reactor), 117 chDescs: make([]*conn.ChannelDescriptor, 0), 118 reactorsByCh: make(map[byte]Reactor), 119 msgTypeByChID: make(map[byte]proto.Message), 120 peers: NewPeerSet(), 121 dialing: cmap.NewCMap(), 122 reconnecting: cmap.NewCMap(), 123 metrics: NopMetrics(), 124 transport: transport, 125 filterTimeout: defaultFilterTimeout, 126 persistentPeersAddrs: make([]*NetAddress, 0), 127 unconditionalPeerIDs: make(map[ID]struct{}), 128 mlc: newMetricsLabelCache(), 129 } 130 131 // Ensure we have a completely undeterministic PRNG. 132 sw.rng = rand.NewRand() 133 134 sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw) 135 136 for _, option := range options { 137 option(sw) 138 } 139 140 return sw 141 } 142 143 // SwitchFilterTimeout sets the timeout used for peer filters. 144 func SwitchFilterTimeout(timeout time.Duration) SwitchOption { 145 return func(sw *Switch) { sw.filterTimeout = timeout } 146 } 147 148 // SwitchPeerFilters sets the filters for rejection of new peers. 149 func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption { 150 return func(sw *Switch) { sw.peerFilters = filters } 151 } 152 153 // WithMetrics sets the metrics. 154 func WithMetrics(metrics *Metrics) SwitchOption { 155 return func(sw *Switch) { sw.metrics = metrics } 156 } 157 158 //--------------------------------------------------------------------- 159 // Switch setup 160 161 // AddReactor adds the given reactor to the switch. 162 // NOTE: Not goroutine safe. 163 func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor { 164 for _, chDesc := range reactor.GetChannels() { 165 chID := chDesc.ID 166 // No two reactors can share the same channel. 167 if sw.reactorsByCh[chID] != nil { 168 panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor)) 169 } 170 sw.chDescs = append(sw.chDescs, chDesc) 171 sw.reactorsByCh[chID] = reactor 172 sw.msgTypeByChID[chID] = chDesc.MessageType 173 } 174 sw.reactors[name] = reactor 175 reactor.SetSwitch(sw) 176 return reactor 177 } 178 179 // RemoveReactor removes the given Reactor from the Switch. 180 // NOTE: Not goroutine safe. 181 func (sw *Switch) RemoveReactor(name string, reactor Reactor) { 182 for _, chDesc := range reactor.GetChannels() { 183 // remove channel description 184 for i := 0; i < len(sw.chDescs); i++ { 185 if chDesc.ID == sw.chDescs[i].ID { 186 sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...) 187 break 188 } 189 } 190 delete(sw.reactorsByCh, chDesc.ID) 191 delete(sw.msgTypeByChID, chDesc.ID) 192 } 193 delete(sw.reactors, name) 194 reactor.SetSwitch(nil) 195 } 196 197 // Reactors returns a map of reactors registered on the switch. 198 // NOTE: Not goroutine safe. 199 func (sw *Switch) Reactors() map[string]Reactor { 200 return sw.reactors 201 } 202 203 // Reactor returns the reactor with the given name. 204 // NOTE: Not goroutine safe. 205 func (sw *Switch) Reactor(name string) Reactor { 206 return sw.reactors[name] 207 } 208 209 // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes. 210 // NOTE: Not goroutine safe. 211 func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) { 212 sw.nodeInfo = nodeInfo 213 } 214 215 // NodeInfo returns the switch's NodeInfo. 216 // NOTE: Not goroutine safe. 217 func (sw *Switch) NodeInfo() NodeInfo { 218 return sw.nodeInfo 219 } 220 221 // SetNodeKey sets the switch's private key for authenticated encryption. 222 // NOTE: Not goroutine safe. 223 func (sw *Switch) SetNodeKey(nodeKey *NodeKey) { 224 sw.nodeKey = nodeKey 225 } 226 227 //--------------------------------------------------------------------- 228 // Service start/stop 229 230 // OnStart implements BaseService. It starts all the reactors and peers. 231 func (sw *Switch) OnStart() error { 232 // Start reactors 233 for _, reactor := range sw.reactors { 234 err := reactor.Start() 235 if err != nil { 236 return fmt.Errorf("failed to start %v: %w", reactor, err) 237 } 238 } 239 240 // Start accepting Peers. 241 go sw.acceptRoutine() 242 243 return nil 244 } 245 246 // OnStop implements BaseService. It stops all peers and reactors. 247 func (sw *Switch) OnStop() { 248 // Stop peers 249 for _, p := range sw.peers.List() { 250 sw.stopAndRemovePeer(p, nil) 251 } 252 253 // Stop reactors 254 sw.Logger.Debug("Switch: Stopping reactors") 255 for _, reactor := range sw.reactors { 256 if err := reactor.Stop(); err != nil { 257 sw.Logger.Error("error while stopped reactor", "reactor", reactor, "error", err) 258 } 259 } 260 } 261 262 //--------------------------------------------------------------------- 263 // Peers 264 265 // BroadcastEnvelope runs a go routine for each attempted send, which will block trying 266 // to send for defaultSendTimeoutSeconds. Returns a channel which receives 267 // success values for each attempted send (false if times out). Channel will be 268 // closed once msg bytes are sent to all peers (or time out). 269 // BroadcastEnvelope sends to the peers using the SendEnvelope method. 270 // 271 // NOTE: BroadcastEnvelope uses goroutines, so order of broadcast may not be preserved. 272 func (sw *Switch) BroadcastEnvelope(e Envelope) chan bool { 273 sw.Logger.Debug("Broadcast", "channel", e.ChannelID) 274 275 peers := sw.peers.List() 276 var wg sync.WaitGroup 277 wg.Add(len(peers)) 278 successChan := make(chan bool, len(peers)) 279 280 for _, peer := range peers { 281 go func(p Peer) { 282 defer wg.Done() 283 success := SendEnvelopeShim(p, e, sw.Logger) 284 successChan <- success 285 }(peer) 286 } 287 288 go func() { 289 wg.Wait() 290 close(successChan) 291 }() 292 293 return successChan 294 } 295 296 // Broadcast runs a go routine for each attempted send, which will block trying 297 // to send for defaultSendTimeoutSeconds. Returns a channel which receives 298 // success values for each attempted send (false if times out). Channel will be 299 // closed once msg bytes are sent to all peers (or time out). 300 // Broadcast sends to the peers using the Send method. 301 // 302 // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved. 303 // 304 // Deprecated: code looking to broadcast data to all peers should use BroadcastEnvelope. 305 // Broadcast will be removed in 0.37. 306 func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool { 307 sw.Logger.Debug("Broadcast", "channel", chID) 308 309 peers := sw.peers.List() 310 var wg sync.WaitGroup 311 wg.Add(len(peers)) 312 successChan := make(chan bool, len(peers)) 313 314 for _, peer := range peers { 315 go func(p Peer) { 316 defer wg.Done() 317 success := p.Send(chID, msgBytes) 318 successChan <- success 319 }(peer) 320 } 321 322 go func() { 323 wg.Wait() 324 close(successChan) 325 }() 326 327 return successChan 328 } 329 330 // NumPeers returns the count of outbound/inbound and outbound-dialing peers. 331 // unconditional peers are not counted here. 332 func (sw *Switch) NumPeers() (outbound, inbound, dialing int) { 333 peers := sw.peers.List() 334 for _, peer := range peers { 335 if peer.IsOutbound() { 336 if !sw.IsPeerUnconditional(peer.ID()) { 337 outbound++ 338 } 339 } else { 340 if !sw.IsPeerUnconditional(peer.ID()) { 341 inbound++ 342 } 343 } 344 } 345 dialing = sw.dialing.Size() 346 return 347 } 348 349 func (sw *Switch) IsPeerUnconditional(id ID) bool { 350 _, ok := sw.unconditionalPeerIDs[id] 351 return ok 352 } 353 354 // MaxNumOutboundPeers returns a maximum number of outbound peers. 355 func (sw *Switch) MaxNumOutboundPeers() int { 356 return sw.config.MaxNumOutboundPeers 357 } 358 359 // Peers returns the set of peers that are connected to the switch. 360 func (sw *Switch) Peers() IPeerSet { 361 return sw.peers 362 } 363 364 // StopPeerForError disconnects from a peer due to external error. 365 // If the peer is persistent, it will attempt to reconnect. 366 // TODO: make record depending on reason. 367 func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) { 368 if !peer.IsRunning() { 369 return 370 } 371 372 sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason) 373 sw.stopAndRemovePeer(peer, reason) 374 375 if peer.IsPersistent() { 376 var addr *NetAddress 377 if peer.IsOutbound() { // socket address for outbound peers 378 addr = peer.SocketAddr() 379 } else { // self-reported address for inbound peers 380 var err error 381 addr, err = peer.NodeInfo().NetAddress() 382 if err != nil { 383 sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong", 384 "peer", peer, "err", err) 385 return 386 } 387 } 388 go sw.reconnectToPeer(addr) 389 } 390 } 391 392 // StopPeerGracefully disconnects from a peer gracefully. 393 // TODO: handle graceful disconnects. 394 func (sw *Switch) StopPeerGracefully(peer Peer) { 395 sw.Logger.Info("Stopping peer gracefully") 396 sw.stopAndRemovePeer(peer, nil) 397 } 398 399 func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) { 400 sw.transport.Cleanup(peer) 401 if err := peer.Stop(); err != nil { 402 sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly 403 } 404 405 for _, reactor := range sw.reactors { 406 reactor.RemovePeer(peer, reason) 407 } 408 409 // Removing a peer should go last to avoid a situation where a peer 410 // reconnect to our node and the switch calls InitPeer before 411 // RemovePeer is finished. 412 // https://github.com/pure-x-eth/consensus_tm/issues/3338 413 if sw.peers.Remove(peer) { 414 sw.metrics.Peers.Add(float64(-1)) 415 } else { 416 // Removal of the peer has failed. The function above sets a flag within the peer to mark this. 417 // We keep this message here as information to the developer. 418 sw.Logger.Debug("error on peer removal", ",", "peer", peer.ID()) 419 } 420 } 421 422 // reconnectToPeer tries to reconnect to the addr, first repeatedly 423 // with a fixed interval, then with exponential backoff. 424 // If no success after all that, it stops trying, and leaves it 425 // to the PEX/Addrbook to find the peer with the addr again 426 // NOTE: this will keep trying even if the handshake or auth fails. 427 // TODO: be more explicit with error types so we only retry on certain failures 428 // - ie. if we're getting ErrDuplicatePeer we can stop 429 // because the addrbook got us the peer back already 430 func (sw *Switch) reconnectToPeer(addr *NetAddress) { 431 if sw.reconnecting.Has(string(addr.ID)) { 432 return 433 } 434 sw.reconnecting.Set(string(addr.ID), addr) 435 defer sw.reconnecting.Delete(string(addr.ID)) 436 437 start := time.Now() 438 sw.Logger.Info("Reconnecting to peer", "addr", addr) 439 for i := 0; i < reconnectAttempts; i++ { 440 if !sw.IsRunning() { 441 return 442 } 443 444 err := sw.DialPeerWithAddress(addr) 445 if err == nil { 446 return // success 447 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 448 return 449 } 450 451 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 452 // sleep a set amount 453 sw.randomSleep(reconnectInterval) 454 continue 455 } 456 457 sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff", 458 "addr", addr, "elapsed", time.Since(start)) 459 for i := 0; i < reconnectBackOffAttempts; i++ { 460 if !sw.IsRunning() { 461 return 462 } 463 464 // sleep an exponentially increasing amount 465 sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i)) 466 sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second) 467 468 err := sw.DialPeerWithAddress(addr) 469 if err == nil { 470 return // success 471 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 472 return 473 } 474 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 475 } 476 sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start)) 477 } 478 479 // SetAddrBook allows to set address book on Switch. 480 func (sw *Switch) SetAddrBook(addrBook AddrBook) { 481 sw.addrBook = addrBook 482 } 483 484 // MarkPeerAsGood marks the given peer as good when it did something useful 485 // like contributed to consensus. 486 func (sw *Switch) MarkPeerAsGood(peer Peer) { 487 if sw.addrBook != nil { 488 sw.addrBook.MarkGood(peer.ID()) 489 } 490 } 491 492 //--------------------------------------------------------------------- 493 // Dialing 494 495 type privateAddr interface { 496 PrivateAddr() bool 497 } 498 499 func isPrivateAddr(err error) bool { 500 te, ok := err.(privateAddr) 501 return ok && te.PrivateAddr() 502 } 503 504 // DialPeersAsync dials a list of peers asynchronously in random order. 505 // Used to dial peers from config on startup or from unsafe-RPC (trusted sources). 506 // It ignores ErrNetAddressLookup. However, if there are other errors, first 507 // encounter is returned. 508 // Nop if there are no peers. 509 func (sw *Switch) DialPeersAsync(peers []string) error { 510 netAddrs, errs := NewNetAddressStrings(peers) 511 // report all the errors 512 for _, err := range errs { 513 sw.Logger.Error("Error in peer's address", "err", err) 514 } 515 // return first non-ErrNetAddressLookup error 516 for _, err := range errs { 517 if _, ok := err.(ErrNetAddressLookup); ok { 518 continue 519 } 520 return err 521 } 522 sw.dialPeersAsync(netAddrs) 523 return nil 524 } 525 526 func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) { 527 ourAddr := sw.NetAddress() 528 529 // TODO: this code feels like it's in the wrong place. 530 // The integration tests depend on the addrBook being saved 531 // right away but maybe we can change that. Recall that 532 // the addrBook is only written to disk every 2min 533 if sw.addrBook != nil { 534 // add peers to `addrBook` 535 for _, netAddr := range netAddrs { 536 // do not add our address or ID 537 if !netAddr.Same(ourAddr) { 538 if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil { 539 if isPrivateAddr(err) { 540 sw.Logger.Debug("Won't add peer's address to addrbook", "err", err) 541 } else { 542 sw.Logger.Error("Can't add peer's address to addrbook", "err", err) 543 } 544 } 545 } 546 } 547 // Persist some peers to disk right away. 548 // NOTE: integration tests depend on this 549 sw.addrBook.Save() 550 } 551 552 // permute the list, dial them in random order. 553 perm := sw.rng.Perm(len(netAddrs)) 554 for i := 0; i < len(perm); i++ { 555 go func(i int) { 556 j := perm[i] 557 addr := netAddrs[j] 558 559 if addr.Same(ourAddr) { 560 sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr) 561 return 562 } 563 564 sw.randomSleep(0) 565 566 err := sw.DialPeerWithAddress(addr) 567 if err != nil { 568 switch err.(type) { 569 case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress: 570 sw.Logger.Debug("Error dialing peer", "err", err) 571 default: 572 sw.Logger.Error("Error dialing peer", "err", err) 573 } 574 } 575 }(i) 576 } 577 } 578 579 // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects 580 // and authenticates successfully. 581 // If we're currently dialing this address or it belongs to an existing peer, 582 // ErrCurrentlyDialingOrExistingAddress is returned. 583 func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error { 584 if sw.IsDialingOrExistingAddress(addr) { 585 return ErrCurrentlyDialingOrExistingAddress{addr.String()} 586 } 587 588 sw.dialing.Set(string(addr.ID), addr) 589 defer sw.dialing.Delete(string(addr.ID)) 590 591 return sw.addOutboundPeerWithConfig(addr, sw.config) 592 } 593 594 // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds] 595 func (sw *Switch) randomSleep(interval time.Duration) { 596 r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond 597 time.Sleep(r + interval) 598 } 599 600 // IsDialingOrExistingAddress returns true if switch has a peer with the given 601 // address or dialing it at the moment. 602 func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool { 603 return sw.dialing.Has(string(addr.ID)) || 604 sw.peers.Has(addr.ID) || 605 (!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP)) 606 } 607 608 // AddPersistentPeers allows you to set persistent peers. It ignores 609 // ErrNetAddressLookup. However, if there are other errors, first encounter is 610 // returned. 611 func (sw *Switch) AddPersistentPeers(addrs []string) error { 612 sw.Logger.Info("Adding persistent peers", "addrs", addrs) 613 netAddrs, errs := NewNetAddressStrings(addrs) 614 // report all the errors 615 for _, err := range errs { 616 sw.Logger.Error("Error in peer's address", "err", err) 617 } 618 // return first non-ErrNetAddressLookup error 619 for _, err := range errs { 620 if _, ok := err.(ErrNetAddressLookup); ok { 621 continue 622 } 623 return err 624 } 625 sw.persistentPeersAddrs = netAddrs 626 return nil 627 } 628 629 func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error { 630 sw.Logger.Info("Adding unconditional peer ids", "ids", ids) 631 for i, id := range ids { 632 err := validateID(ID(id)) 633 if err != nil { 634 return fmt.Errorf("wrong ID #%d: %w", i, err) 635 } 636 sw.unconditionalPeerIDs[ID(id)] = struct{}{} 637 } 638 return nil 639 } 640 641 func (sw *Switch) AddPrivatePeerIDs(ids []string) error { 642 validIDs := make([]string, 0, len(ids)) 643 for i, id := range ids { 644 err := validateID(ID(id)) 645 if err != nil { 646 return fmt.Errorf("wrong ID #%d: %w", i, err) 647 } 648 validIDs = append(validIDs, id) 649 } 650 651 sw.addrBook.AddPrivateIDs(validIDs) 652 653 return nil 654 } 655 656 func (sw *Switch) IsPeerPersistent(na *NetAddress) bool { 657 for _, pa := range sw.persistentPeersAddrs { 658 if pa.Equals(na) { 659 return true 660 } 661 } 662 return false 663 } 664 665 func (sw *Switch) acceptRoutine() { 666 for { 667 p, err := sw.transport.Accept(peerConfig{ 668 chDescs: sw.chDescs, 669 onPeerError: sw.StopPeerForError, 670 reactorsByCh: sw.reactorsByCh, 671 msgTypeByChID: sw.msgTypeByChID, 672 metrics: sw.metrics, 673 mlc: sw.mlc, 674 isPersistent: sw.IsPeerPersistent, 675 }) 676 if err != nil { 677 switch err := err.(type) { 678 case ErrRejected: 679 if err.IsSelf() { 680 // Remove the given address from the address book and add to our addresses 681 // to avoid dialing in the future. 682 addr := err.Addr() 683 sw.addrBook.RemoveAddress(&addr) 684 sw.addrBook.AddOurAddress(&addr) 685 } 686 687 sw.Logger.Info( 688 "Inbound Peer rejected", 689 "err", err, 690 "numPeers", sw.peers.Size(), 691 ) 692 693 continue 694 case ErrFilterTimeout: 695 sw.Logger.Error( 696 "Peer filter timed out", 697 "err", err, 698 ) 699 700 continue 701 case ErrTransportClosed: 702 sw.Logger.Error( 703 "Stopped accept routine, as transport is closed", 704 "numPeers", sw.peers.Size(), 705 ) 706 default: 707 sw.Logger.Error( 708 "Accept on transport errored", 709 "err", err, 710 "numPeers", sw.peers.Size(), 711 ) 712 // We could instead have a retry loop around the acceptRoutine, 713 // but that would need to stop and let the node shutdown eventually. 714 // So might as well panic and let process managers restart the node. 715 // There's no point in letting the node run without the acceptRoutine, 716 // since it won't be able to accept new connections. 717 panic(fmt.Errorf("accept routine exited: %v", err)) 718 } 719 720 break 721 } 722 723 if !sw.IsPeerUnconditional(p.NodeInfo().ID()) { 724 // Ignore connection if we already have enough peers. 725 _, in, _ := sw.NumPeers() 726 if in >= sw.config.MaxNumInboundPeers { 727 sw.Logger.Info( 728 "Ignoring inbound connection: already have enough inbound peers", 729 "address", p.SocketAddr(), 730 "have", in, 731 "max", sw.config.MaxNumInboundPeers, 732 ) 733 734 sw.transport.Cleanup(p) 735 736 continue 737 } 738 739 } 740 741 if err := sw.addPeer(p); err != nil { 742 sw.transport.Cleanup(p) 743 if p.IsRunning() { 744 _ = p.Stop() 745 } 746 sw.Logger.Info( 747 "Ignoring inbound connection: error while adding peer", 748 "err", err, 749 "id", p.ID(), 750 ) 751 } 752 } 753 } 754 755 // dial the peer; make secret connection; authenticate against the dialed ID; 756 // add the peer. 757 // if dialing fails, start the reconnect loop. If handshake fails, it's over. 758 // If peer is started successfully, reconnectLoop will start when 759 // StopPeerForError is called. 760 func (sw *Switch) addOutboundPeerWithConfig( 761 addr *NetAddress, 762 cfg *config.P2PConfig, 763 ) error { 764 sw.Logger.Info("Dialing peer", "address", addr) 765 766 // XXX(xla): Remove the leakage of test concerns in implementation. 767 if cfg.TestDialFail { 768 go sw.reconnectToPeer(addr) 769 return fmt.Errorf("dial err (peerConfig.DialFail == true)") 770 } 771 772 p, err := sw.transport.Dial(*addr, peerConfig{ 773 chDescs: sw.chDescs, 774 onPeerError: sw.StopPeerForError, 775 isPersistent: sw.IsPeerPersistent, 776 reactorsByCh: sw.reactorsByCh, 777 msgTypeByChID: sw.msgTypeByChID, 778 metrics: sw.metrics, 779 mlc: sw.mlc, 780 }) 781 if err != nil { 782 if e, ok := err.(ErrRejected); ok { 783 if e.IsSelf() { 784 // Remove the given address from the address book and add to our addresses 785 // to avoid dialing in the future. 786 sw.addrBook.RemoveAddress(addr) 787 sw.addrBook.AddOurAddress(addr) 788 789 return err 790 } 791 } 792 793 // retry persistent peers after 794 // any dial error besides IsSelf() 795 if sw.IsPeerPersistent(addr) { 796 go sw.reconnectToPeer(addr) 797 } 798 799 return err 800 } 801 802 if err := sw.addPeer(p); err != nil { 803 sw.transport.Cleanup(p) 804 if p.IsRunning() { 805 _ = p.Stop() 806 } 807 return err 808 } 809 810 return nil 811 } 812 813 func (sw *Switch) filterPeer(p Peer) error { 814 // Avoid duplicate 815 if sw.peers.Has(p.ID()) { 816 return ErrRejected{id: p.ID(), isDuplicate: true} 817 } 818 819 errc := make(chan error, len(sw.peerFilters)) 820 821 for _, f := range sw.peerFilters { 822 go func(f PeerFilterFunc, p Peer, errc chan<- error) { 823 errc <- f(sw.peers, p) 824 }(f, p, errc) 825 } 826 827 for i := 0; i < cap(errc); i++ { 828 select { 829 case err := <-errc: 830 if err != nil { 831 return ErrRejected{id: p.ID(), err: err, isFiltered: true} 832 } 833 case <-time.After(sw.filterTimeout): 834 return ErrFilterTimeout{} 835 } 836 } 837 838 return nil 839 } 840 841 // addPeer starts up the Peer and adds it to the Switch. Error is returned if 842 // the peer is filtered out or failed to start or can't be added. 843 func (sw *Switch) addPeer(p Peer) error { 844 if err := sw.filterPeer(p); err != nil { 845 return err 846 } 847 848 p.SetLogger(sw.Logger.With("peer", p.SocketAddr())) 849 850 // Handle the shut down case where the switch has stopped but we're 851 // concurrently trying to add a peer. 852 if !sw.IsRunning() { 853 // XXX should this return an error or just log and terminate? 854 sw.Logger.Error("Won't start a peer - switch is not running", "peer", p) 855 return nil 856 } 857 858 // Add some data to the peer, which is required by reactors. 859 for _, reactor := range sw.reactors { 860 p = reactor.InitPeer(p) 861 } 862 863 // Start the peer's send/recv routines. 864 // Must start it before adding it to the peer set 865 // to prevent Start and Stop from being called concurrently. 866 err := p.Start() 867 if err != nil { 868 // Should never happen 869 sw.Logger.Error("Error starting peer", "err", err, "peer", p) 870 return err 871 } 872 873 // Add the peer to PeerSet. Do this before starting the reactors 874 // so that if Receive errors, we will find the peer and remove it. 875 // Add should not err since we already checked peers.Has(). 876 if err := sw.peers.Add(p); err != nil { 877 switch err.(type) { 878 case ErrPeerRemoval: 879 sw.Logger.Error("Error starting peer ", 880 " err ", "Peer has already errored and removal was attempted.", 881 "peer", p.ID()) 882 } 883 return err 884 } 885 sw.metrics.Peers.Add(float64(1)) 886 887 // Start all the reactor protocols on the peer. 888 for _, reactor := range sw.reactors { 889 reactor.AddPeer(p) 890 } 891 892 sw.Logger.Info("Added peer", "peer", p) 893 894 return nil 895 }