github.com/evdatsion/aphelion-dpos-bft@v0.32.1/p2p/switch.go (about)

     1  package p2p
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/pkg/errors"
    10  
    11  	"github.com/evdatsion/aphelion-dpos-bft/config"
    12  	cmn "github.com/evdatsion/aphelion-dpos-bft/libs/common"
    13  	"github.com/evdatsion/aphelion-dpos-bft/p2p/conn"
    14  )
    15  
    16  const (
    17  	// wait a random amount of time from this interval
    18  	// before dialing peers or reconnecting to help prevent DoS
    19  	dialRandomizerIntervalMilliseconds = 3000
    20  
    21  	// repeatedly try to reconnect for a few minutes
    22  	// ie. 5 * 20 = 100s
    23  	reconnectAttempts = 20
    24  	reconnectInterval = 5 * time.Second
    25  
    26  	// then move into exponential backoff mode for ~1day
    27  	// ie. 3**10 = 16hrs
    28  	reconnectBackOffAttempts    = 10
    29  	reconnectBackOffBaseSeconds = 3
    30  )
    31  
    32  // MConnConfig returns an MConnConfig with fields updated
    33  // from the P2PConfig.
    34  func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig {
    35  	mConfig := conn.DefaultMConnConfig()
    36  	mConfig.FlushThrottle = cfg.FlushThrottleTimeout
    37  	mConfig.SendRate = cfg.SendRate
    38  	mConfig.RecvRate = cfg.RecvRate
    39  	mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize
    40  	return mConfig
    41  }
    42  
    43  //-----------------------------------------------------------------------------
    44  
    45  // An AddrBook represents an address book from the pex package, which is used
    46  // to store peer addresses.
    47  type AddrBook interface {
    48  	AddAddress(addr *NetAddress, src *NetAddress) error
    49  	AddOurAddress(*NetAddress)
    50  	OurAddress(*NetAddress) bool
    51  	MarkGood(ID)
    52  	RemoveAddress(*NetAddress)
    53  	HasAddress(*NetAddress) bool
    54  	Save()
    55  }
    56  
    57  // PeerFilterFunc to be implemented by filter hooks after a new Peer has been
    58  // fully setup.
    59  type PeerFilterFunc func(IPeerSet, Peer) error
    60  
    61  //-----------------------------------------------------------------------------
    62  
    63  // Switch handles peer connections and exposes an API to receive incoming messages
    64  // on `Reactors`.  Each `Reactor` is responsible for handling incoming messages of one
    65  // or more `Channels`.  So while sending outgoing messages is typically performed on the peer,
    66  // incoming messages are received on the reactor.
    67  type Switch struct {
    68  	cmn.BaseService
    69  
    70  	config       *config.P2PConfig
    71  	reactors     map[string]Reactor
    72  	chDescs      []*conn.ChannelDescriptor
    73  	reactorsByCh map[byte]Reactor
    74  	peers        *PeerSet
    75  	dialing      *cmn.CMap
    76  	reconnecting *cmn.CMap
    77  	nodeInfo     NodeInfo // our node info
    78  	nodeKey      *NodeKey // our node privkey
    79  	addrBook     AddrBook
    80  	// peers addresses with whom we'll maintain constant connection
    81  	persistentPeersAddrs []*NetAddress
    82  
    83  	transport Transport
    84  
    85  	filterTimeout time.Duration
    86  	peerFilters   []PeerFilterFunc
    87  
    88  	rng *cmn.Rand // seed for randomizing dial times and orders
    89  
    90  	metrics *Metrics
    91  }
    92  
    93  // NetAddress returns the address the switch is listening on.
    94  func (sw *Switch) NetAddress() *NetAddress {
    95  	addr := sw.transport.NetAddress()
    96  	return &addr
    97  }
    98  
    99  // SwitchOption sets an optional parameter on the Switch.
   100  type SwitchOption func(*Switch)
   101  
   102  // NewSwitch creates a new Switch with the given config.
   103  func NewSwitch(
   104  	cfg *config.P2PConfig,
   105  	transport Transport,
   106  	options ...SwitchOption,
   107  ) *Switch {
   108  	sw := &Switch{
   109  		config:               cfg,
   110  		reactors:             make(map[string]Reactor),
   111  		chDescs:              make([]*conn.ChannelDescriptor, 0),
   112  		reactorsByCh:         make(map[byte]Reactor),
   113  		peers:                NewPeerSet(),
   114  		dialing:              cmn.NewCMap(),
   115  		reconnecting:         cmn.NewCMap(),
   116  		metrics:              NopMetrics(),
   117  		transport:            transport,
   118  		filterTimeout:        defaultFilterTimeout,
   119  		persistentPeersAddrs: make([]*NetAddress, 0),
   120  	}
   121  
   122  	// Ensure we have a completely undeterministic PRNG.
   123  	sw.rng = cmn.NewRand()
   124  
   125  	sw.BaseService = *cmn.NewBaseService(nil, "P2P Switch", sw)
   126  
   127  	for _, option := range options {
   128  		option(sw)
   129  	}
   130  
   131  	return sw
   132  }
   133  
   134  // SwitchFilterTimeout sets the timeout used for peer filters.
   135  func SwitchFilterTimeout(timeout time.Duration) SwitchOption {
   136  	return func(sw *Switch) { sw.filterTimeout = timeout }
   137  }
   138  
   139  // SwitchPeerFilters sets the filters for rejection of new peers.
   140  func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption {
   141  	return func(sw *Switch) { sw.peerFilters = filters }
   142  }
   143  
   144  // WithMetrics sets the metrics.
   145  func WithMetrics(metrics *Metrics) SwitchOption {
   146  	return func(sw *Switch) { sw.metrics = metrics }
   147  }
   148  
   149  //---------------------------------------------------------------------
   150  // Switch setup
   151  
   152  // AddReactor adds the given reactor to the switch.
   153  // NOTE: Not goroutine safe.
   154  func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
   155  	// Validate the reactor.
   156  	// No two reactors can share the same channel.
   157  	reactorChannels := reactor.GetChannels()
   158  	for _, chDesc := range reactorChannels {
   159  		chID := chDesc.ID
   160  		if sw.reactorsByCh[chID] != nil {
   161  			panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
   162  		}
   163  		sw.chDescs = append(sw.chDescs, chDesc)
   164  		sw.reactorsByCh[chID] = reactor
   165  	}
   166  	sw.reactors[name] = reactor
   167  	reactor.SetSwitch(sw)
   168  	return reactor
   169  }
   170  
   171  // Reactors returns a map of reactors registered on the switch.
   172  // NOTE: Not goroutine safe.
   173  func (sw *Switch) Reactors() map[string]Reactor {
   174  	return sw.reactors
   175  }
   176  
   177  // Reactor returns the reactor with the given name.
   178  // NOTE: Not goroutine safe.
   179  func (sw *Switch) Reactor(name string) Reactor {
   180  	return sw.reactors[name]
   181  }
   182  
   183  // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
   184  // NOTE: Not goroutine safe.
   185  func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) {
   186  	sw.nodeInfo = nodeInfo
   187  }
   188  
   189  // NodeInfo returns the switch's NodeInfo.
   190  // NOTE: Not goroutine safe.
   191  func (sw *Switch) NodeInfo() NodeInfo {
   192  	return sw.nodeInfo
   193  }
   194  
   195  // SetNodeKey sets the switch's private key for authenticated encryption.
   196  // NOTE: Not goroutine safe.
   197  func (sw *Switch) SetNodeKey(nodeKey *NodeKey) {
   198  	sw.nodeKey = nodeKey
   199  }
   200  
   201  //---------------------------------------------------------------------
   202  // Service start/stop
   203  
   204  // OnStart implements BaseService. It starts all the reactors and peers.
   205  func (sw *Switch) OnStart() error {
   206  	// Start reactors
   207  	for _, reactor := range sw.reactors {
   208  		err := reactor.Start()
   209  		if err != nil {
   210  			return cmn.ErrorWrap(err, "failed to start %v", reactor)
   211  		}
   212  	}
   213  
   214  	// Start accepting Peers.
   215  	go sw.acceptRoutine()
   216  
   217  	return nil
   218  }
   219  
   220  // OnStop implements BaseService. It stops all peers and reactors.
   221  func (sw *Switch) OnStop() {
   222  	// Stop peers
   223  	for _, p := range sw.peers.List() {
   224  		sw.stopAndRemovePeer(p, nil)
   225  	}
   226  
   227  	// Stop reactors
   228  	sw.Logger.Debug("Switch: Stopping reactors")
   229  	for _, reactor := range sw.reactors {
   230  		reactor.Stop()
   231  	}
   232  }
   233  
   234  //---------------------------------------------------------------------
   235  // Peers
   236  
   237  // Broadcast runs a go routine for each attempted send, which will block trying
   238  // to send for defaultSendTimeoutSeconds. Returns a channel which receives
   239  // success values for each attempted send (false if times out). Channel will be
   240  // closed once msg bytes are sent to all peers (or time out).
   241  //
   242  // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
   243  func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool {
   244  	sw.Logger.Debug("Broadcast", "channel", chID, "msgBytes", fmt.Sprintf("%X", msgBytes))
   245  
   246  	peers := sw.peers.List()
   247  	var wg sync.WaitGroup
   248  	wg.Add(len(peers))
   249  	successChan := make(chan bool, len(peers))
   250  
   251  	for _, peer := range peers {
   252  		go func(p Peer) {
   253  			defer wg.Done()
   254  			success := p.Send(chID, msgBytes)
   255  			successChan <- success
   256  		}(peer)
   257  	}
   258  
   259  	go func() {
   260  		wg.Wait()
   261  		close(successChan)
   262  	}()
   263  
   264  	return successChan
   265  }
   266  
   267  // NumPeers returns the count of outbound/inbound and outbound-dialing peers.
   268  func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
   269  	peers := sw.peers.List()
   270  	for _, peer := range peers {
   271  		if peer.IsOutbound() {
   272  			outbound++
   273  		} else {
   274  			inbound++
   275  		}
   276  	}
   277  	dialing = sw.dialing.Size()
   278  	return
   279  }
   280  
   281  // MaxNumOutboundPeers returns a maximum number of outbound peers.
   282  func (sw *Switch) MaxNumOutboundPeers() int {
   283  	return sw.config.MaxNumOutboundPeers
   284  }
   285  
   286  // Peers returns the set of peers that are connected to the switch.
   287  func (sw *Switch) Peers() IPeerSet {
   288  	return sw.peers
   289  }
   290  
   291  // StopPeerForError disconnects from a peer due to external error.
   292  // If the peer is persistent, it will attempt to reconnect.
   293  // TODO: make record depending on reason.
   294  func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
   295  	sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
   296  	sw.stopAndRemovePeer(peer, reason)
   297  
   298  	if peer.IsPersistent() {
   299  		var addr *NetAddress
   300  		if peer.IsOutbound() { // socket address for outbound peers
   301  			addr = peer.SocketAddr()
   302  		} else { // self-reported address for inbound peers
   303  			var err error
   304  			addr, err = peer.NodeInfo().NetAddress()
   305  			if err != nil {
   306  				sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong",
   307  					"peer", peer, "err", err)
   308  				return
   309  			}
   310  		}
   311  		go sw.reconnectToPeer(addr)
   312  	}
   313  }
   314  
   315  // StopPeerGracefully disconnects from a peer gracefully.
   316  // TODO: handle graceful disconnects.
   317  func (sw *Switch) StopPeerGracefully(peer Peer) {
   318  	sw.Logger.Info("Stopping peer gracefully")
   319  	sw.stopAndRemovePeer(peer, nil)
   320  }
   321  
   322  func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
   323  	sw.transport.Cleanup(peer)
   324  	peer.Stop()
   325  
   326  	for _, reactor := range sw.reactors {
   327  		reactor.RemovePeer(peer, reason)
   328  	}
   329  
   330  	// Removing a peer should go last to avoid a situation where a peer
   331  	// reconnect to our node and the switch calls InitPeer before
   332  	// RemovePeer is finished.
   333  	// https://github.com/evdatsion/aphelion-dpos-bft/issues/3338
   334  	if sw.peers.Remove(peer) {
   335  		sw.metrics.Peers.Add(float64(-1))
   336  	}
   337  }
   338  
   339  // reconnectToPeer tries to reconnect to the addr, first repeatedly
   340  // with a fixed interval, then with exponential backoff.
   341  // If no success after all that, it stops trying, and leaves it
   342  // to the PEX/Addrbook to find the peer with the addr again
   343  // NOTE: this will keep trying even if the handshake or auth fails.
   344  // TODO: be more explicit with error types so we only retry on certain failures
   345  //  - ie. if we're getting ErrDuplicatePeer we can stop
   346  //  	because the addrbook got us the peer back already
   347  func (sw *Switch) reconnectToPeer(addr *NetAddress) {
   348  	if sw.reconnecting.Has(string(addr.ID)) {
   349  		return
   350  	}
   351  	sw.reconnecting.Set(string(addr.ID), addr)
   352  	defer sw.reconnecting.Delete(string(addr.ID))
   353  
   354  	start := time.Now()
   355  	sw.Logger.Info("Reconnecting to peer", "addr", addr)
   356  	for i := 0; i < reconnectAttempts; i++ {
   357  		if !sw.IsRunning() {
   358  			return
   359  		}
   360  
   361  		err := sw.DialPeerWithAddress(addr)
   362  		if err == nil {
   363  			return // success
   364  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   365  			return
   366  		}
   367  
   368  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   369  		// sleep a set amount
   370  		sw.randomSleep(reconnectInterval)
   371  		continue
   372  	}
   373  
   374  	sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
   375  		"addr", addr, "elapsed", time.Since(start))
   376  	for i := 0; i < reconnectBackOffAttempts; i++ {
   377  		if !sw.IsRunning() {
   378  			return
   379  		}
   380  
   381  		// sleep an exponentially increasing amount
   382  		sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
   383  		sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
   384  
   385  		err := sw.DialPeerWithAddress(addr)
   386  		if err == nil {
   387  			return // success
   388  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   389  			return
   390  		}
   391  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   392  	}
   393  	sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start))
   394  }
   395  
   396  // SetAddrBook allows to set address book on Switch.
   397  func (sw *Switch) SetAddrBook(addrBook AddrBook) {
   398  	sw.addrBook = addrBook
   399  }
   400  
   401  // MarkPeerAsGood marks the given peer as good when it did something useful
   402  // like contributed to consensus.
   403  func (sw *Switch) MarkPeerAsGood(peer Peer) {
   404  	if sw.addrBook != nil {
   405  		sw.addrBook.MarkGood(peer.ID())
   406  	}
   407  }
   408  
   409  //---------------------------------------------------------------------
   410  // Dialing
   411  
   412  type privateAddr interface {
   413  	PrivateAddr() bool
   414  }
   415  
   416  func isPrivateAddr(err error) bool {
   417  	te, ok := errors.Cause(err).(privateAddr)
   418  	return ok && te.PrivateAddr()
   419  }
   420  
   421  // DialPeersAsync dials a list of peers asynchronously in random order.
   422  // Used to dial peers from config on startup or from unsafe-RPC (trusted sources).
   423  // It ignores ErrNetAddressLookup. However, if there are other errors, first
   424  // encounter is returned.
   425  // Nop if there are no peers.
   426  func (sw *Switch) DialPeersAsync(peers []string) error {
   427  	netAddrs, errs := NewNetAddressStrings(peers)
   428  	// report all the errors
   429  	for _, err := range errs {
   430  		sw.Logger.Error("Error in peer's address", "err", err)
   431  	}
   432  	// return first non-ErrNetAddressLookup error
   433  	for _, err := range errs {
   434  		if _, ok := err.(ErrNetAddressLookup); ok {
   435  			continue
   436  		}
   437  		return err
   438  	}
   439  	sw.dialPeersAsync(netAddrs)
   440  	return nil
   441  }
   442  
   443  func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) {
   444  	ourAddr := sw.NetAddress()
   445  
   446  	// TODO: this code feels like it's in the wrong place.
   447  	// The integration tests depend on the addrBook being saved
   448  	// right away but maybe we can change that. Recall that
   449  	// the addrBook is only written to disk every 2min
   450  	if sw.addrBook != nil {
   451  		// add peers to `addrBook`
   452  		for _, netAddr := range netAddrs {
   453  			// do not add our address or ID
   454  			if !netAddr.Same(ourAddr) {
   455  				if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil {
   456  					if isPrivateAddr(err) {
   457  						sw.Logger.Debug("Won't add peer's address to addrbook", "err", err)
   458  					} else {
   459  						sw.Logger.Error("Can't add peer's address to addrbook", "err", err)
   460  					}
   461  				}
   462  			}
   463  		}
   464  		// Persist some peers to disk right away.
   465  		// NOTE: integration tests depend on this
   466  		sw.addrBook.Save()
   467  	}
   468  
   469  	// permute the list, dial them in random order.
   470  	perm := sw.rng.Perm(len(netAddrs))
   471  	for i := 0; i < len(perm); i++ {
   472  		go func(i int) {
   473  			j := perm[i]
   474  			addr := netAddrs[j]
   475  
   476  			if addr.Same(ourAddr) {
   477  				sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr)
   478  				return
   479  			}
   480  
   481  			sw.randomSleep(0)
   482  
   483  			err := sw.DialPeerWithAddress(addr)
   484  			if err != nil {
   485  				switch err.(type) {
   486  				case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress:
   487  					sw.Logger.Debug("Error dialing peer", "err", err)
   488  				default:
   489  					sw.Logger.Error("Error dialing peer", "err", err)
   490  				}
   491  			}
   492  		}(i)
   493  	}
   494  }
   495  
   496  // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects
   497  // and authenticates successfully.
   498  // If we're currently dialing this address or it belongs to an existing peer,
   499  // ErrCurrentlyDialingOrExistingAddress is returned.
   500  func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error {
   501  	if sw.IsDialingOrExistingAddress(addr) {
   502  		return ErrCurrentlyDialingOrExistingAddress{addr.String()}
   503  	}
   504  
   505  	sw.dialing.Set(string(addr.ID), addr)
   506  	defer sw.dialing.Delete(string(addr.ID))
   507  
   508  	return sw.addOutboundPeerWithConfig(addr, sw.config)
   509  }
   510  
   511  // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
   512  func (sw *Switch) randomSleep(interval time.Duration) {
   513  	r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond
   514  	time.Sleep(r + interval)
   515  }
   516  
   517  // IsDialingOrExistingAddress returns true if switch has a peer with the given
   518  // address or dialing it at the moment.
   519  func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool {
   520  	return sw.dialing.Has(string(addr.ID)) ||
   521  		sw.peers.Has(addr.ID) ||
   522  		(!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP))
   523  }
   524  
   525  // AddPersistentPeers allows you to set persistent peers. It ignores
   526  // ErrNetAddressLookup. However, if there are other errors, first encounter is
   527  // returned.
   528  func (sw *Switch) AddPersistentPeers(addrs []string) error {
   529  	sw.Logger.Info("Adding persistent peers", "addrs", addrs)
   530  	netAddrs, errs := NewNetAddressStrings(addrs)
   531  	// report all the errors
   532  	for _, err := range errs {
   533  		sw.Logger.Error("Error in peer's address", "err", err)
   534  	}
   535  	// return first non-ErrNetAddressLookup error
   536  	for _, err := range errs {
   537  		if _, ok := err.(ErrNetAddressLookup); ok {
   538  			continue
   539  		}
   540  		return err
   541  	}
   542  	sw.persistentPeersAddrs = netAddrs
   543  	return nil
   544  }
   545  
   546  func (sw *Switch) isPeerPersistentFn() func(*NetAddress) bool {
   547  	return func(na *NetAddress) bool {
   548  		for _, pa := range sw.persistentPeersAddrs {
   549  			if pa.Equals(na) {
   550  				return true
   551  			}
   552  		}
   553  		return false
   554  	}
   555  }
   556  
   557  func (sw *Switch) acceptRoutine() {
   558  	for {
   559  		p, err := sw.transport.Accept(peerConfig{
   560  			chDescs:      sw.chDescs,
   561  			onPeerError:  sw.StopPeerForError,
   562  			reactorsByCh: sw.reactorsByCh,
   563  			metrics:      sw.metrics,
   564  			isPersistent: sw.isPeerPersistentFn(),
   565  		})
   566  		if err != nil {
   567  			switch err := err.(type) {
   568  			case ErrRejected:
   569  				if err.IsSelf() {
   570  					// Remove the given address from the address book and add to our addresses
   571  					// to avoid dialing in the future.
   572  					addr := err.Addr()
   573  					sw.addrBook.RemoveAddress(&addr)
   574  					sw.addrBook.AddOurAddress(&addr)
   575  				}
   576  
   577  				sw.Logger.Info(
   578  					"Inbound Peer rejected",
   579  					"err", err,
   580  					"numPeers", sw.peers.Size(),
   581  				)
   582  
   583  				continue
   584  			case ErrFilterTimeout:
   585  				sw.Logger.Error(
   586  					"Peer filter timed out",
   587  					"err", err,
   588  				)
   589  
   590  				continue
   591  			case ErrTransportClosed:
   592  				sw.Logger.Error(
   593  					"Stopped accept routine, as transport is closed",
   594  					"numPeers", sw.peers.Size(),
   595  				)
   596  			default:
   597  				sw.Logger.Error(
   598  					"Accept on transport errored",
   599  					"err", err,
   600  					"numPeers", sw.peers.Size(),
   601  				)
   602  				// We could instead have a retry loop around the acceptRoutine,
   603  				// but that would need to stop and let the node shutdown eventually.
   604  				// So might as well panic and let process managers restart the node.
   605  				// There's no point in letting the node run without the acceptRoutine,
   606  				// since it won't be able to accept new connections.
   607  				panic(fmt.Errorf("accept routine exited: %v", err))
   608  			}
   609  
   610  			break
   611  		}
   612  
   613  		// Ignore connection if we already have enough peers.
   614  		_, in, _ := sw.NumPeers()
   615  		if in >= sw.config.MaxNumInboundPeers {
   616  			sw.Logger.Info(
   617  				"Ignoring inbound connection: already have enough inbound peers",
   618  				"address", p.SocketAddr(),
   619  				"have", in,
   620  				"max", sw.config.MaxNumInboundPeers,
   621  			)
   622  
   623  			sw.transport.Cleanup(p)
   624  
   625  			continue
   626  		}
   627  
   628  		if err := sw.addPeer(p); err != nil {
   629  			sw.transport.Cleanup(p)
   630  			if p.IsRunning() {
   631  				_ = p.Stop()
   632  			}
   633  			sw.Logger.Info(
   634  				"Ignoring inbound connection: error while adding peer",
   635  				"err", err,
   636  				"id", p.ID(),
   637  			)
   638  		}
   639  	}
   640  }
   641  
   642  // dial the peer; make secret connection; authenticate against the dialed ID;
   643  // add the peer.
   644  // if dialing fails, start the reconnect loop. If handshake fails, it's over.
   645  // If peer is started successfully, reconnectLoop will start when
   646  // StopPeerForError is called.
   647  func (sw *Switch) addOutboundPeerWithConfig(
   648  	addr *NetAddress,
   649  	cfg *config.P2PConfig,
   650  ) error {
   651  	sw.Logger.Info("Dialing peer", "address", addr)
   652  
   653  	// XXX(xla): Remove the leakage of test concerns in implementation.
   654  	if cfg.TestDialFail {
   655  		go sw.reconnectToPeer(addr)
   656  		return fmt.Errorf("dial err (peerConfig.DialFail == true)")
   657  	}
   658  
   659  	p, err := sw.transport.Dial(*addr, peerConfig{
   660  		chDescs:      sw.chDescs,
   661  		onPeerError:  sw.StopPeerForError,
   662  		isPersistent: sw.isPeerPersistentFn(),
   663  		reactorsByCh: sw.reactorsByCh,
   664  		metrics:      sw.metrics,
   665  	})
   666  	if err != nil {
   667  		switch e := err.(type) {
   668  		case ErrRejected:
   669  			if e.IsSelf() {
   670  				// Remove the given address from the address book and add to our addresses
   671  				// to avoid dialing in the future.
   672  				sw.addrBook.RemoveAddress(addr)
   673  				sw.addrBook.AddOurAddress(addr)
   674  
   675  				return err
   676  			}
   677  		}
   678  
   679  		// retry persistent peers after
   680  		// any dial error besides IsSelf()
   681  		if sw.isPeerPersistentFn()(addr) {
   682  			go sw.reconnectToPeer(addr)
   683  		}
   684  
   685  		return err
   686  	}
   687  
   688  	if err := sw.addPeer(p); err != nil {
   689  		sw.transport.Cleanup(p)
   690  		if p.IsRunning() {
   691  			_ = p.Stop()
   692  		}
   693  		return err
   694  	}
   695  
   696  	return nil
   697  }
   698  
   699  func (sw *Switch) filterPeer(p Peer) error {
   700  	// Avoid duplicate
   701  	if sw.peers.Has(p.ID()) {
   702  		return ErrRejected{id: p.ID(), isDuplicate: true}
   703  	}
   704  
   705  	errc := make(chan error, len(sw.peerFilters))
   706  
   707  	for _, f := range sw.peerFilters {
   708  		go func(f PeerFilterFunc, p Peer, errc chan<- error) {
   709  			errc <- f(sw.peers, p)
   710  		}(f, p, errc)
   711  	}
   712  
   713  	for i := 0; i < cap(errc); i++ {
   714  		select {
   715  		case err := <-errc:
   716  			if err != nil {
   717  				return ErrRejected{id: p.ID(), err: err, isFiltered: true}
   718  			}
   719  		case <-time.After(sw.filterTimeout):
   720  			return ErrFilterTimeout{}
   721  		}
   722  	}
   723  
   724  	return nil
   725  }
   726  
   727  // addPeer starts up the Peer and adds it to the Switch. Error is returned if
   728  // the peer is filtered out or failed to start or can't be added.
   729  func (sw *Switch) addPeer(p Peer) error {
   730  	if err := sw.filterPeer(p); err != nil {
   731  		return err
   732  	}
   733  
   734  	p.SetLogger(sw.Logger.With("peer", p.SocketAddr()))
   735  
   736  	// Handle the shut down case where the switch has stopped but we're
   737  	// concurrently trying to add a peer.
   738  	if !sw.IsRunning() {
   739  		// XXX should this return an error or just log and terminate?
   740  		sw.Logger.Error("Won't start a peer - switch is not running", "peer", p)
   741  		return nil
   742  	}
   743  
   744  	// Add some data to the peer, which is required by reactors.
   745  	for _, reactor := range sw.reactors {
   746  		p = reactor.InitPeer(p)
   747  	}
   748  
   749  	// Start the peer's send/recv routines.
   750  	// Must start it before adding it to the peer set
   751  	// to prevent Start and Stop from being called concurrently.
   752  	err := p.Start()
   753  	if err != nil {
   754  		// Should never happen
   755  		sw.Logger.Error("Error starting peer", "err", err, "peer", p)
   756  		return err
   757  	}
   758  
   759  	// Add the peer to PeerSet. Do this before starting the reactors
   760  	// so that if Receive errors, we will find the peer and remove it.
   761  	// Add should not err since we already checked peers.Has().
   762  	if err := sw.peers.Add(p); err != nil {
   763  		return err
   764  	}
   765  	sw.metrics.Peers.Add(float64(1))
   766  
   767  	// Start all the reactor protocols on the peer.
   768  	for _, reactor := range sw.reactors {
   769  		reactor.AddPeer(p)
   770  	}
   771  
   772  	sw.Logger.Info("Added peer", "peer", p)
   773  
   774  	return nil
   775  }