bitbucket.org/number571/tendermint@v0.8.14/internal/p2p/switch.go (about) 1 package p2p 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "math" 8 mrand "math/rand" 9 "net" 10 "sync" 11 "time" 12 13 "bitbucket.org/number571/tendermint/config" 14 "bitbucket.org/number571/tendermint/crypto" 15 "bitbucket.org/number571/tendermint/internal/p2p/conn" 16 "bitbucket.org/number571/tendermint/libs/cmap" 17 tmrand "bitbucket.org/number571/tendermint/libs/rand" 18 "bitbucket.org/number571/tendermint/libs/service" 19 "bitbucket.org/number571/tendermint/types" 20 ) 21 22 const ( 23 // wait a random amount of time from this interval 24 // before dialing peers or reconnecting to help prevent DoS 25 dialRandomizerIntervalMilliseconds = 3000 26 27 // repeatedly try to reconnect for a few minutes 28 // ie. 5 * 20 = 100s 29 reconnectAttempts = 20 30 reconnectInterval = 5 * time.Second 31 32 // then move into exponential backoff mode for ~1day 33 // ie. 3**10 = 16hrs 34 reconnectBackOffAttempts = 10 35 reconnectBackOffBaseSeconds = 3 36 37 defaultFilterTimeout = 5 * time.Second 38 ) 39 40 // MConnConfig returns an MConnConfig with fields updated 41 // from the P2PConfig. 42 func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig { 43 mConfig := conn.DefaultMConnConfig() 44 mConfig.FlushThrottle = cfg.FlushThrottleTimeout 45 mConfig.SendRate = cfg.SendRate 46 mConfig.RecvRate = cfg.RecvRate 47 mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize 48 return mConfig 49 } 50 51 //----------------------------------------------------------------------------- 52 53 // An AddrBook represents an address book from the pex package, which is used 54 // to store peer addresses. 55 type AddrBook interface { 56 AddAddress(addr *NetAddress, src *NetAddress) error 57 AddPrivateIDs([]string) 58 AddOurAddress(*NetAddress) 59 OurAddress(*NetAddress) bool 60 MarkGood(types.NodeID) 61 RemoveAddress(*NetAddress) 62 HasAddress(*NetAddress) bool 63 Save() 64 } 65 66 // ConnFilterFunc is a callback for connection filtering. If it returns an 67 // error, the connection is rejected. The set of existing connections is passed 68 // along with the new connection and all resolved IPs. 69 type ConnFilterFunc func(ConnSet, net.Conn, []net.IP) error 70 71 // PeerFilterFunc to be implemented by filter hooks after a new Peer has been 72 // fully setup. 73 type PeerFilterFunc func(IPeerSet, Peer) error 74 75 // ConnDuplicateIPFilter resolves and keeps all ips for an incoming connection 76 // and refuses new ones if they come from a known ip. 77 var ConnDuplicateIPFilter ConnFilterFunc = func(cs ConnSet, c net.Conn, ips []net.IP) error { 78 for _, ip := range ips { 79 if cs.HasIP(ip) { 80 return ErrRejected{ 81 conn: c, 82 err: fmt.Errorf("ip<%v> already connected", ip), 83 isDuplicate: true, 84 } 85 } 86 } 87 return nil 88 } 89 90 //----------------------------------------------------------------------------- 91 92 // Switch handles peer connections and exposes an API to receive incoming messages 93 // on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one 94 // or more `Channels`. So while sending outgoing messages is typically performed on the peer, 95 // incoming messages are received on the reactor. 96 type Switch struct { 97 service.BaseService 98 99 config *config.P2PConfig 100 reactors map[string]Reactor 101 chDescs []*conn.ChannelDescriptor 102 reactorsByCh map[byte]Reactor 103 peers *PeerSet 104 dialing *cmap.CMap 105 reconnecting *cmap.CMap 106 nodeInfo types.NodeInfo // our node info 107 nodeKey types.NodeKey // our node privkey 108 addrBook AddrBook 109 // peers addresses with whom we'll maintain constant connection 110 persistentPeersAddrs []*NetAddress 111 unconditionalPeerIDs map[types.NodeID]struct{} 112 113 transport Transport 114 115 filterTimeout time.Duration 116 peerFilters []PeerFilterFunc 117 connFilters []ConnFilterFunc 118 conns ConnSet 119 120 metrics *Metrics 121 } 122 123 // NetAddress returns the first address the switch is listening on, 124 // or nil if no addresses are found. 125 func (sw *Switch) NetAddress() *NetAddress { 126 endpoints := sw.transport.Endpoints() 127 if len(endpoints) == 0 { 128 return nil 129 } 130 return &NetAddress{ 131 ID: sw.nodeInfo.NodeID, 132 IP: endpoints[0].IP, 133 Port: endpoints[0].Port, 134 } 135 } 136 137 // SwitchOption sets an optional parameter on the Switch. 138 type SwitchOption func(*Switch) 139 140 // NewSwitch creates a new Switch with the given config. 141 func NewSwitch( 142 cfg *config.P2PConfig, 143 transport Transport, 144 options ...SwitchOption, 145 ) *Switch { 146 sw := &Switch{ 147 config: cfg, 148 reactors: make(map[string]Reactor), 149 chDescs: make([]*conn.ChannelDescriptor, 0), 150 reactorsByCh: make(map[byte]Reactor), 151 peers: NewPeerSet(), 152 dialing: cmap.NewCMap(), 153 reconnecting: cmap.NewCMap(), 154 metrics: NopMetrics(), 155 transport: transport, 156 persistentPeersAddrs: make([]*NetAddress, 0), 157 unconditionalPeerIDs: make(map[types.NodeID]struct{}), 158 filterTimeout: defaultFilterTimeout, 159 conns: NewConnSet(), 160 } 161 162 // Ensure PRNG is reseeded. 163 tmrand.Reseed() 164 165 sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw) 166 167 for _, option := range options { 168 option(sw) 169 } 170 171 return sw 172 } 173 174 // SwitchFilterTimeout sets the timeout used for peer filters. 175 func SwitchFilterTimeout(timeout time.Duration) SwitchOption { 176 return func(sw *Switch) { sw.filterTimeout = timeout } 177 } 178 179 // SwitchPeerFilters sets the filters for rejection of new peers. 180 func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption { 181 return func(sw *Switch) { sw.peerFilters = filters } 182 } 183 184 // SwitchConnFilters sets the filters for rejection of connections. 185 func SwitchConnFilters(filters ...ConnFilterFunc) SwitchOption { 186 return func(sw *Switch) { sw.connFilters = filters } 187 } 188 189 // WithMetrics sets the metrics. 190 func WithMetrics(metrics *Metrics) SwitchOption { 191 return func(sw *Switch) { sw.metrics = metrics } 192 } 193 194 //--------------------------------------------------------------------- 195 // Switch setup 196 197 // AddReactor adds the given reactor to the switch. 198 // NOTE: Not goroutine safe. 199 func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor { 200 for _, chDesc := range reactor.GetChannels() { 201 chID := chDesc.ID 202 // No two reactors can share the same channel. 203 if sw.reactorsByCh[chID] != nil { 204 panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor)) 205 } 206 sw.chDescs = append(sw.chDescs, chDesc) 207 sw.reactorsByCh[chID] = reactor 208 } 209 sw.reactors[name] = reactor 210 reactor.SetSwitch(sw) 211 return reactor 212 } 213 214 // RemoveReactor removes the given Reactor from the Switch. 215 // NOTE: Not goroutine safe. 216 func (sw *Switch) RemoveReactor(name string, reactor Reactor) { 217 for _, chDesc := range reactor.GetChannels() { 218 // remove channel description 219 for i := 0; i < len(sw.chDescs); i++ { 220 if chDesc.ID == sw.chDescs[i].ID { 221 sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...) 222 break 223 } 224 } 225 delete(sw.reactorsByCh, chDesc.ID) 226 } 227 delete(sw.reactors, name) 228 reactor.SetSwitch(nil) 229 } 230 231 // Reactors returns a map of reactors registered on the switch. 232 // NOTE: Not goroutine safe. 233 func (sw *Switch) Reactors() map[string]Reactor { 234 return sw.reactors 235 } 236 237 // Reactor returns the reactor with the given name. 238 // NOTE: Not goroutine safe. 239 func (sw *Switch) Reactor(name string) Reactor { 240 return sw.reactors[name] 241 } 242 243 // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes. 244 // NOTE: Not goroutine safe. 245 func (sw *Switch) SetNodeInfo(nodeInfo types.NodeInfo) { 246 sw.nodeInfo = nodeInfo 247 } 248 249 // NodeInfo returns the switch's NodeInfo. 250 // NOTE: Not goroutine safe. 251 func (sw *Switch) NodeInfo() types.NodeInfo { 252 return sw.nodeInfo 253 } 254 255 // SetNodeKey sets the switch's private key for authenticated encryption. 256 // NOTE: Not goroutine safe. 257 func (sw *Switch) SetNodeKey(nodeKey types.NodeKey) { 258 sw.nodeKey = nodeKey 259 } 260 261 //--------------------------------------------------------------------- 262 // Service start/stop 263 264 // OnStart implements BaseService. It starts all the reactors and peers. 265 func (sw *Switch) OnStart() error { 266 267 // FIXME: Temporary hack to pass channel descriptors to MConn transport, 268 // since they are not available when it is constructed. This will be 269 // fixed when we implement the new router abstraction. 270 if t, ok := sw.transport.(*MConnTransport); ok { 271 t.channelDescs = sw.chDescs 272 } 273 274 // Start reactors 275 for _, reactor := range sw.reactors { 276 err := reactor.Start() 277 if err != nil { 278 return fmt.Errorf("failed to start %v: %w", reactor, err) 279 } 280 } 281 282 // Start accepting Peers. 283 go sw.acceptRoutine() 284 285 return nil 286 } 287 288 // OnStop implements BaseService. It stops all peers and reactors. 289 func (sw *Switch) OnStop() { 290 // Stop peers 291 for _, p := range sw.peers.List() { 292 sw.stopAndRemovePeer(p, nil) 293 } 294 295 // Stop reactors 296 sw.Logger.Debug("Switch: Stopping reactors") 297 for _, reactor := range sw.reactors { 298 if err := reactor.Stop(); err != nil { 299 sw.Logger.Error("error while stopping reactor", "reactor", reactor, "error", err) 300 } 301 } 302 } 303 304 //--------------------------------------------------------------------- 305 // Peers 306 307 // Broadcast runs a go routine for each attempted send, which will block trying 308 // to send for defaultSendTimeoutSeconds. Returns a channel which receives 309 // success values for each attempted send (false if times out). Channel will be 310 // closed once msg bytes are sent to all peers (or time out). 311 // 312 // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved. 313 func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool { 314 sw.Logger.Debug("Broadcast", "channel", chID, "msgBytes", msgBytes) 315 316 peers := sw.peers.List() 317 var wg sync.WaitGroup 318 wg.Add(len(peers)) 319 successChan := make(chan bool, len(peers)) 320 321 for _, peer := range peers { 322 go func(p Peer) { 323 defer wg.Done() 324 success := p.Send(chID, msgBytes) 325 successChan <- success 326 }(peer) 327 } 328 329 go func() { 330 wg.Wait() 331 close(successChan) 332 }() 333 334 return successChan 335 } 336 337 // NumPeers returns the count of outbound/inbound and outbound-dialing peers. 338 // unconditional peers are not counted here. 339 func (sw *Switch) NumPeers() (outbound, inbound, dialing int) { 340 peers := sw.peers.List() 341 for _, peer := range peers { 342 if peer.IsOutbound() { 343 if !sw.IsPeerUnconditional(peer.ID()) { 344 outbound++ 345 } 346 } else { 347 if !sw.IsPeerUnconditional(peer.ID()) { 348 inbound++ 349 } 350 } 351 } 352 dialing = sw.dialing.Size() 353 return 354 } 355 356 func (sw *Switch) IsPeerUnconditional(id types.NodeID) bool { 357 _, ok := sw.unconditionalPeerIDs[id] 358 return ok 359 } 360 361 // MaxNumOutboundPeers returns a maximum number of outbound peers. 362 func (sw *Switch) MaxNumOutboundPeers() int { 363 return sw.config.MaxNumOutboundPeers 364 } 365 366 // Peers returns the set of peers that are connected to the switch. 367 func (sw *Switch) Peers() IPeerSet { 368 return sw.peers 369 } 370 371 // StopPeerForError disconnects from a peer due to external error. 372 // If the peer is persistent, it will attempt to reconnect. 373 // TODO: make record depending on reason. 374 func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) { 375 if !peer.IsRunning() { 376 return 377 } 378 379 sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason) 380 sw.stopAndRemovePeer(peer, reason) 381 382 if peer.IsPersistent() { 383 var addr *NetAddress 384 if peer.IsOutbound() { // socket address for outbound peers 385 addr = peer.SocketAddr() 386 } else { // self-reported address for inbound peers 387 var err error 388 addr, err = peer.NodeInfo().NetAddress() 389 if err != nil { 390 sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong", 391 "peer", peer, "err", err) 392 return 393 } 394 } 395 go sw.reconnectToPeer(addr) 396 } 397 } 398 399 // StopPeerGracefully disconnects from a peer gracefully. 400 // TODO: handle graceful disconnects. 401 func (sw *Switch) StopPeerGracefully(peer Peer) { 402 sw.Logger.Info("Stopping peer gracefully") 403 sw.stopAndRemovePeer(peer, nil) 404 } 405 406 func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) { 407 if err := peer.Stop(); err != nil { 408 sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly 409 } 410 411 for _, reactor := range sw.reactors { 412 reactor.RemovePeer(peer, reason) 413 } 414 415 // Removing a peer should go last to avoid a situation where a peer 416 // reconnect to our node and the switch calls InitPeer before 417 // RemovePeer is finished. 418 // https://bitbucket.org/number571/tendermint/issues/3338 419 if sw.peers.Remove(peer) { 420 sw.metrics.Peers.Add(float64(-1)) 421 } 422 423 sw.conns.RemoveAddr(peer.RemoteAddr()) 424 } 425 426 // reconnectToPeer tries to reconnect to the addr, first repeatedly 427 // with a fixed interval, then with exponential backoff. 428 // If no success after all that, it stops trying, and leaves it 429 // to the PEX/Addrbook to find the peer with the addr again 430 // NOTE: this will keep trying even if the handshake or auth fails. 431 // TODO: be more explicit with error types so we only retry on certain failures 432 // - ie. if we're getting ErrDuplicatePeer we can stop 433 // because the addrbook got us the peer back already 434 func (sw *Switch) reconnectToPeer(addr *NetAddress) { 435 if sw.reconnecting.Has(string(addr.ID)) { 436 return 437 } 438 sw.reconnecting.Set(string(addr.ID), addr) 439 defer sw.reconnecting.Delete(string(addr.ID)) 440 441 start := time.Now() 442 sw.Logger.Info("Reconnecting to peer", "addr", addr) 443 for i := 0; i < reconnectAttempts; i++ { 444 if !sw.IsRunning() { 445 return 446 } 447 448 err := sw.DialPeerWithAddress(addr) 449 if err == nil { 450 return // success 451 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 452 return 453 } 454 455 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 456 // sleep a set amount 457 sw.randomSleep(reconnectInterval) 458 continue 459 } 460 461 sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff", 462 "addr", addr, "elapsed", time.Since(start)) 463 for i := 0; i < reconnectBackOffAttempts; i++ { 464 if !sw.IsRunning() { 465 return 466 } 467 468 // sleep an exponentially increasing amount 469 sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i)) 470 sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second) 471 472 err := sw.DialPeerWithAddress(addr) 473 if err == nil { 474 return // success 475 } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { 476 return 477 } 478 sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) 479 } 480 sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start)) 481 } 482 483 // SetAddrBook allows to set address book on Switch. 484 func (sw *Switch) SetAddrBook(addrBook AddrBook) { 485 sw.addrBook = addrBook 486 } 487 488 // MarkPeerAsGood marks the given peer as good when it did something useful 489 // like contributed to consensus. 490 func (sw *Switch) MarkPeerAsGood(peer Peer) { 491 if sw.addrBook != nil { 492 sw.addrBook.MarkGood(peer.ID()) 493 } 494 } 495 496 //--------------------------------------------------------------------- 497 // Dialing 498 499 type privateAddr interface { 500 PrivateAddr() bool 501 } 502 503 func isPrivateAddr(err error) bool { 504 te, ok := err.(privateAddr) 505 return ok && te.PrivateAddr() 506 } 507 508 // DialPeersAsync dials a list of peers asynchronously in random order. 509 // Used to dial peers from config on startup or from unsafe-RPC (trusted sources). 510 // It ignores ErrNetAddressLookup. However, if there are other errors, first 511 // encounter is returned. 512 // Nop if there are no peers. 513 func (sw *Switch) DialPeersAsync(peers []string) error { 514 netAddrs, errs := NewNetAddressStrings(peers) 515 // report all the errors 516 for _, err := range errs { 517 sw.Logger.Error("Error in peer's address", "err", err) 518 } 519 // return first non-ErrNetAddressLookup error 520 for _, err := range errs { 521 if _, ok := err.(types.ErrNetAddressLookup); ok { 522 continue 523 } 524 return err 525 } 526 sw.dialPeersAsync(netAddrs) 527 return nil 528 } 529 530 func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) { 531 ourAddr := sw.NetAddress() 532 533 // TODO: this code feels like it's in the wrong place. 534 // The integration tests depend on the addrBook being saved 535 // right away but maybe we can change that. Recall that 536 // the addrBook is only written to disk every 2min 537 if sw.addrBook != nil { 538 // add peers to `addrBook` 539 for _, netAddr := range netAddrs { 540 // do not add our address or ID 541 if !netAddr.Same(ourAddr) { 542 if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil { 543 if isPrivateAddr(err) { 544 sw.Logger.Debug("Won't add peer's address to addrbook", "err", err) 545 } else { 546 sw.Logger.Error("Can't add peer's address to addrbook", "err", err) 547 } 548 } 549 } 550 } 551 // Persist some peers to disk right away. 552 // NOTE: integration tests depend on this 553 sw.addrBook.Save() 554 } 555 556 // permute the list, dial them in random order. 557 perm := mrand.Perm(len(netAddrs)) 558 for i := 0; i < len(perm); i++ { 559 go func(i int) { 560 j := perm[i] 561 addr := netAddrs[j] 562 563 if addr.Same(ourAddr) { 564 sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr) 565 return 566 } 567 568 sw.randomSleep(0) 569 570 err := sw.DialPeerWithAddress(addr) 571 if err != nil { 572 switch err.(type) { 573 case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress: 574 sw.Logger.Debug("Error dialing peer", "err", err) 575 default: 576 sw.Logger.Error("Error dialing peer", "err", err) 577 } 578 } 579 }(i) 580 } 581 } 582 583 // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects 584 // and authenticates successfully. 585 // If we're currently dialing this address or it belongs to an existing peer, 586 // ErrCurrentlyDialingOrExistingAddress is returned. 587 func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error { 588 if sw.IsDialingOrExistingAddress(addr) { 589 return ErrCurrentlyDialingOrExistingAddress{addr.String()} 590 } 591 592 sw.dialing.Set(string(addr.ID), addr) 593 defer sw.dialing.Delete(string(addr.ID)) 594 595 return sw.addOutboundPeerWithConfig(addr, sw.config) 596 } 597 598 // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds] 599 func (sw *Switch) randomSleep(interval time.Duration) { 600 // nolint:gosec // G404: Use of weak random number generator 601 r := time.Duration(mrand.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond 602 time.Sleep(r + interval) 603 } 604 605 // IsDialingOrExistingAddress returns true if switch has a peer with the given 606 // address or dialing it at the moment. 607 func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool { 608 return sw.dialing.Has(string(addr.ID)) || 609 sw.peers.Has(addr.ID) || 610 (!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP)) 611 } 612 613 // AddPersistentPeers allows you to set persistent peers. It ignores 614 // ErrNetAddressLookup. However, if there are other errors, first encounter is 615 // returned. 616 func (sw *Switch) AddPersistentPeers(addrs []string) error { 617 sw.Logger.Info("Adding persistent peers", "addrs", addrs) 618 netAddrs, errs := NewNetAddressStrings(addrs) 619 // report all the errors 620 for _, err := range errs { 621 sw.Logger.Error("Error in peer's address", "err", err) 622 } 623 // return first non-ErrNetAddressLookup error 624 for _, err := range errs { 625 if _, ok := err.(types.ErrNetAddressLookup); ok { 626 continue 627 } 628 return err 629 } 630 sw.persistentPeersAddrs = netAddrs 631 return nil 632 } 633 634 func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error { 635 sw.Logger.Info("Adding unconditional peer ids", "ids", ids) 636 for i, id := range ids { 637 err := types.NodeID(id).Validate() 638 if err != nil { 639 return fmt.Errorf("wrong ID #%d: %w", i, err) 640 } 641 sw.unconditionalPeerIDs[types.NodeID(id)] = struct{}{} 642 } 643 return nil 644 } 645 646 func (sw *Switch) AddPrivatePeerIDs(ids []string) error { 647 validIDs := make([]string, 0, len(ids)) 648 for i, id := range ids { 649 err := types.NodeID(id).Validate() 650 if err != nil { 651 return fmt.Errorf("wrong ID #%d: %w", i, err) 652 } 653 validIDs = append(validIDs, id) 654 } 655 656 sw.addrBook.AddPrivateIDs(validIDs) 657 658 return nil 659 } 660 661 func (sw *Switch) IsPeerPersistent(na *NetAddress) bool { 662 for _, pa := range sw.persistentPeersAddrs { 663 if pa.Equals(na) { 664 return true 665 } 666 } 667 return false 668 } 669 670 func (sw *Switch) acceptRoutine() { 671 for { 672 var peerNodeInfo types.NodeInfo 673 c, err := sw.transport.Accept() 674 if err == nil { 675 // NOTE: The legacy MConn transport did handshaking in Accept(), 676 // which was asynchronous and avoided head-of-line-blocking. 677 // However, as handshakes are being migrated out from the transport, 678 // we just do it synchronously here for now. 679 peerNodeInfo, _, err = sw.handshakePeer(c, "") 680 } 681 if err == nil { 682 err = sw.filterConn(c.(*mConnConnection).conn) 683 } 684 if err != nil { 685 if c != nil { 686 _ = c.Close() 687 } 688 if err == io.EOF { 689 err = ErrTransportClosed{} 690 } 691 switch err := err.(type) { 692 case ErrRejected: 693 addr := err.Addr() 694 if err.IsSelf() { 695 // Remove the given address from the address book and add to our addresses 696 // to avoid dialing in the future. 697 sw.addrBook.RemoveAddress(&addr) 698 sw.addrBook.AddOurAddress(&addr) 699 } 700 if err.IsIncompatible() { 701 sw.addrBook.RemoveAddress(&addr) 702 } 703 704 sw.Logger.Info( 705 "Inbound Peer rejected", 706 "err", err, 707 "numPeers", sw.peers.Size(), 708 ) 709 710 continue 711 case ErrFilterTimeout: 712 sw.Logger.Error( 713 "Peer filter timed out", 714 "err", err, 715 ) 716 717 continue 718 case ErrTransportClosed: 719 sw.Logger.Error( 720 "Stopped accept routine, as transport is closed", 721 "numPeers", sw.peers.Size(), 722 ) 723 default: 724 sw.Logger.Error( 725 "Accept on transport errored", 726 "err", err, 727 "numPeers", sw.peers.Size(), 728 ) 729 // We could instead have a retry loop around the acceptRoutine, 730 // but that would need to stop and let the node shutdown eventually. 731 // So might as well panic and let process managers restart the node. 732 // There's no point in letting the node run without the acceptRoutine, 733 // since it won't be able to accept new connections. 734 panic(fmt.Errorf("accept routine exited: %v", err)) 735 } 736 737 break 738 } 739 740 isPersistent := false 741 addr, err := peerNodeInfo.NetAddress() 742 if err == nil { 743 isPersistent = sw.IsPeerPersistent(addr) 744 } 745 746 p := newPeer( 747 peerNodeInfo, 748 newPeerConn(false, isPersistent, c), 749 sw.reactorsByCh, 750 sw.StopPeerForError, 751 PeerMetrics(sw.metrics), 752 ) 753 754 if !sw.IsPeerUnconditional(p.NodeInfo().ID()) { 755 // Ignore connection if we already have enough peers. 756 _, in, _ := sw.NumPeers() 757 if in >= sw.config.MaxNumInboundPeers { 758 sw.Logger.Info( 759 "Ignoring inbound connection: already have enough inbound peers", 760 "address", p.SocketAddr(), 761 "have", in, 762 "max", sw.config.MaxNumInboundPeers, 763 ) 764 _ = p.CloseConn() 765 continue 766 } 767 768 } 769 770 if err := sw.addPeer(p); err != nil { 771 _ = p.CloseConn() 772 if p.IsRunning() { 773 _ = p.Stop() 774 } 775 sw.conns.RemoveAddr(p.RemoteAddr()) 776 sw.Logger.Info( 777 "Ignoring inbound connection: error while adding peer", 778 "err", err, 779 "id", p.ID(), 780 ) 781 } 782 } 783 } 784 785 // dial the peer; make secret connection; authenticate against the dialed ID; 786 // add the peer. 787 // if dialing fails, start the reconnect loop. If handshake fails, it's over. 788 // If peer is started successfully, reconnectLoop will start when 789 // StopPeerForError is called. 790 func (sw *Switch) addOutboundPeerWithConfig( 791 addr *NetAddress, 792 cfg *config.P2PConfig, 793 ) error { 794 sw.Logger.Info("Dialing peer", "address", addr) 795 796 // XXX(xla): Remove the leakage of test concerns in implementation. 797 if cfg.TestDialFail { 798 go sw.reconnectToPeer(addr) 799 return fmt.Errorf("dial err (peerConfig.DialFail == true)") 800 } 801 802 // Hardcoded timeout moved from MConn transport during refactoring. 803 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 804 defer cancel() 805 806 var peerNodeInfo types.NodeInfo 807 c, err := sw.transport.Dial(ctx, Endpoint{ 808 Protocol: MConnProtocol, 809 IP: addr.IP, 810 Port: addr.Port, 811 }) 812 if err == nil { 813 peerNodeInfo, _, err = sw.handshakePeer(c, addr.ID) 814 } 815 if err == nil { 816 err = sw.filterConn(c.(*mConnConnection).conn) 817 } 818 if err != nil { 819 if c != nil { 820 _ = c.Close() 821 } 822 if e, ok := err.(ErrRejected); ok { 823 if e.IsSelf() { 824 // Remove the given address from the address book and add to our addresses 825 // to avoid dialing in the future. 826 sw.addrBook.RemoveAddress(addr) 827 sw.addrBook.AddOurAddress(addr) 828 } 829 if e.IsIncompatible() { 830 sw.addrBook.RemoveAddress(addr) 831 } 832 833 return err 834 } 835 836 // retry persistent peers after 837 // any dial error besides IsSelf() 838 if sw.IsPeerPersistent(addr) { 839 go sw.reconnectToPeer(addr) 840 } 841 842 return err 843 } 844 845 p := newPeer( 846 peerNodeInfo, 847 newPeerConn(true, sw.IsPeerPersistent(addr), c), 848 sw.reactorsByCh, 849 sw.StopPeerForError, 850 PeerMetrics(sw.metrics), 851 ) 852 853 if err := sw.addPeer(p); err != nil { 854 _ = p.CloseConn() 855 if p.IsRunning() { 856 _ = p.Stop() 857 } 858 sw.conns.RemoveAddr(p.RemoteAddr()) 859 return err 860 } 861 862 return nil 863 } 864 865 func (sw *Switch) handshakePeer( 866 c Connection, 867 expectPeerID types.NodeID, 868 ) (types.NodeInfo, crypto.PubKey, error) { 869 // Moved from transport and hardcoded until legacy P2P stack removal. 870 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 871 defer cancel() 872 873 peerInfo, peerKey, err := c.Handshake(ctx, sw.nodeInfo, sw.nodeKey.PrivKey) 874 if err != nil { 875 return peerInfo, peerKey, ErrRejected{ 876 conn: c.(*mConnConnection).conn, 877 err: fmt.Errorf("handshake failed: %v", err), 878 isAuthFailure: true, 879 } 880 } 881 882 if err = peerInfo.Validate(); err != nil { 883 return peerInfo, peerKey, ErrRejected{ 884 conn: c.(*mConnConnection).conn, 885 err: err, 886 isNodeInfoInvalid: true, 887 } 888 } 889 890 // For outgoing conns, ensure connection key matches dialed key. 891 if expectPeerID != "" { 892 peerID := types.NodeIDFromPubKey(peerKey) 893 if expectPeerID != peerID { 894 return peerInfo, peerKey, ErrRejected{ 895 conn: c.(*mConnConnection).conn, 896 id: peerID, 897 err: fmt.Errorf( 898 "conn.ID (%v) dialed ID (%v) mismatch", 899 peerID, 900 expectPeerID, 901 ), 902 isAuthFailure: true, 903 } 904 } 905 } 906 907 if sw.nodeInfo.ID() == peerInfo.ID() { 908 return peerInfo, peerKey, ErrRejected{ 909 addr: *types.NewNetAddress(peerInfo.ID(), c.(*mConnConnection).conn.RemoteAddr()), 910 conn: c.(*mConnConnection).conn, 911 id: peerInfo.ID(), 912 isSelf: true, 913 } 914 } 915 916 if err = sw.nodeInfo.CompatibleWith(peerInfo); err != nil { 917 return peerInfo, peerKey, ErrRejected{ 918 conn: c.(*mConnConnection).conn, 919 err: err, 920 id: peerInfo.ID(), 921 isIncompatible: true, 922 } 923 } 924 925 return peerInfo, peerKey, nil 926 } 927 928 func (sw *Switch) filterPeer(p Peer) error { 929 // Avoid duplicate 930 if sw.peers.Has(p.ID()) { 931 return ErrRejected{id: p.ID(), isDuplicate: true} 932 } 933 934 errc := make(chan error, len(sw.peerFilters)) 935 936 for _, f := range sw.peerFilters { 937 go func(f PeerFilterFunc, p Peer, errc chan<- error) { 938 errc <- f(sw.peers, p) 939 }(f, p, errc) 940 } 941 942 for i := 0; i < cap(errc); i++ { 943 select { 944 case err := <-errc: 945 if err != nil { 946 return ErrRejected{id: p.ID(), err: err, isFiltered: true} 947 } 948 case <-time.After(sw.filterTimeout): 949 return ErrFilterTimeout{} 950 } 951 } 952 953 return nil 954 } 955 956 // filterConn filters a connection, rejecting it if this function errors. 957 // 958 // FIXME: This is only here for compatibility with the current Switch code. In 959 // the new P2P stack, peer/connection filtering should be moved into the Router 960 // or PeerManager and removed from here. 961 func (sw *Switch) filterConn(conn net.Conn) error { 962 if sw.conns.Has(conn) { 963 return ErrRejected{conn: conn, isDuplicate: true} 964 } 965 966 host, _, err := net.SplitHostPort(conn.RemoteAddr().String()) 967 if err != nil { 968 return err 969 } 970 ip := net.ParseIP(host) 971 if ip == nil { 972 return fmt.Errorf("connection address has invalid IP address %q", host) 973 } 974 975 // Apply filter callbacks. 976 chErr := make(chan error, len(sw.connFilters)) 977 for _, connFilter := range sw.connFilters { 978 go func(connFilter ConnFilterFunc) { 979 chErr <- connFilter(sw.conns, conn, []net.IP{ip}) 980 }(connFilter) 981 } 982 983 for i := 0; i < cap(chErr); i++ { 984 select { 985 case err := <-chErr: 986 if err != nil { 987 return ErrRejected{conn: conn, err: err, isFiltered: true} 988 } 989 case <-time.After(sw.filterTimeout): 990 return ErrFilterTimeout{} 991 } 992 993 } 994 995 // FIXME: Doesn't really make sense to set this here, but we preserve the 996 // behavior from the previous P2P transport implementation. 997 sw.conns.Set(conn, []net.IP{ip}) 998 return nil 999 } 1000 1001 // addPeer starts up the Peer and adds it to the Switch. Error is returned if 1002 // the peer is filtered out or failed to start or can't be added. 1003 func (sw *Switch) addPeer(p Peer) error { 1004 if err := sw.filterPeer(p); err != nil { 1005 return err 1006 } 1007 1008 p.SetLogger(sw.Logger.With("peer", p.SocketAddr())) 1009 1010 // Handle the shut down case where the switch has stopped but we're 1011 // concurrently trying to add a peer. 1012 if !sw.IsRunning() { 1013 // XXX should this return an error or just log and terminate? 1014 sw.Logger.Error("Won't start a peer - switch is not running", "peer", p) 1015 return nil 1016 } 1017 1018 // Add some data to the peer, which is required by reactors. 1019 for _, reactor := range sw.reactors { 1020 p = reactor.InitPeer(p) 1021 } 1022 1023 // Start the peer's send/recv routines. 1024 // Must start it before adding it to the peer set 1025 // to prevent Start and Stop from being called concurrently. 1026 err := p.Start() 1027 if err != nil { 1028 // Should never happen 1029 sw.Logger.Error("Error starting peer", "err", err, "peer", p) 1030 return err 1031 } 1032 1033 // Add the peer to PeerSet. Do this before starting the reactors 1034 // so that if Receive errors, we will find the peer and remove it. 1035 // Add should not err since we already checked peers.Has(). 1036 if err := sw.peers.Add(p); err != nil { 1037 return err 1038 } 1039 sw.metrics.Peers.Add(float64(1)) 1040 1041 // Start all the reactor protocols on the peer. 1042 for _, reactor := range sw.reactors { 1043 reactor.AddPeer(p) 1044 } 1045 1046 sw.Logger.Info("Added peer", "peer", p) 1047 1048 return nil 1049 } 1050 1051 // NewNetAddressStrings returns an array of NetAddress'es build using 1052 // the provided strings. 1053 func NewNetAddressStrings(addrs []string) ([]*NetAddress, []error) { 1054 netAddrs := make([]*NetAddress, 0) 1055 errs := make([]error, 0) 1056 for _, addr := range addrs { 1057 netAddr, err := types.NewNetAddressString(addr) 1058 if err != nil { 1059 errs = append(errs, err) 1060 } else { 1061 netAddrs = append(netAddrs, netAddr) 1062 } 1063 } 1064 return netAddrs, errs 1065 }