github.com/gnolang/gno@v0.0.0-20240520182011-228e9d0192ce/tm2/pkg/p2p/switch.go (about)

     1  package p2p
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/gnolang/gno/tm2/pkg/cmap"
    11  	"github.com/gnolang/gno/tm2/pkg/errors"
    12  	"github.com/gnolang/gno/tm2/pkg/p2p/config"
    13  	"github.com/gnolang/gno/tm2/pkg/p2p/conn"
    14  	"github.com/gnolang/gno/tm2/pkg/random"
    15  	"github.com/gnolang/gno/tm2/pkg/service"
    16  	"github.com/gnolang/gno/tm2/pkg/telemetry"
    17  	"github.com/gnolang/gno/tm2/pkg/telemetry/metrics"
    18  )
    19  
    20  const (
    21  	// wait a random amount of time from this interval
    22  	// before dialing peers or reconnecting to help prevent DoS
    23  	dialRandomizerIntervalMilliseconds = 3000
    24  
    25  	// repeatedly try to reconnect for a few minutes
    26  	// ie. 5 * 20 = 100s
    27  	reconnectAttempts = 20
    28  	reconnectInterval = 5 * time.Second
    29  
    30  	// then move into exponential backoff mode for ~1day
    31  	// ie. 3**10 = 16hrs
    32  	reconnectBackOffAttempts    = 10
    33  	reconnectBackOffBaseSeconds = 3
    34  )
    35  
    36  // MConnConfig returns an MConnConfig with fields updated
    37  // from the P2PConfig.
    38  func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig {
    39  	mConfig := conn.DefaultMConnConfig()
    40  	mConfig.FlushThrottle = cfg.FlushThrottleTimeout
    41  	mConfig.SendRate = cfg.SendRate
    42  	mConfig.RecvRate = cfg.RecvRate
    43  	mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize
    44  	return mConfig
    45  }
    46  
    47  // PeerFilterFunc to be implemented by filter hooks after a new Peer has been
    48  // fully setup.
    49  type PeerFilterFunc func(IPeerSet, Peer) error
    50  
    51  // -----------------------------------------------------------------------------
    52  
    53  // Switch handles peer connections and exposes an API to receive incoming messages
    54  // on `Reactors`.  Each `Reactor` is responsible for handling incoming messages of one
    55  // or more `Channels`.  So while sending outgoing messages is typically performed on the peer,
    56  // incoming messages are received on the reactor.
    57  type Switch struct {
    58  	service.BaseService
    59  
    60  	config       *config.P2PConfig
    61  	reactors     map[string]Reactor
    62  	chDescs      []*conn.ChannelDescriptor
    63  	reactorsByCh map[byte]Reactor
    64  	peers        *PeerSet
    65  	dialing      *cmap.CMap
    66  	reconnecting *cmap.CMap
    67  	nodeInfo     NodeInfo // our node info
    68  	nodeKey      *NodeKey // our node privkey
    69  	// peers addresses with whom we'll maintain constant connection
    70  	persistentPeersAddrs []*NetAddress
    71  
    72  	transport Transport
    73  
    74  	filterTimeout time.Duration
    75  	peerFilters   []PeerFilterFunc
    76  
    77  	rng *random.Rand // seed for randomizing dial times and orders
    78  }
    79  
    80  // NetAddress returns the address the switch is listening on.
    81  func (sw *Switch) NetAddress() *NetAddress {
    82  	addr := sw.transport.NetAddress()
    83  	return &addr
    84  }
    85  
    86  // SwitchOption sets an optional parameter on the Switch.
    87  type SwitchOption func(*Switch)
    88  
    89  // NewSwitch creates a new Switch with the given config.
    90  func NewSwitch(
    91  	cfg *config.P2PConfig,
    92  	transport Transport,
    93  	options ...SwitchOption,
    94  ) *Switch {
    95  	sw := &Switch{
    96  		config:               cfg,
    97  		reactors:             make(map[string]Reactor),
    98  		chDescs:              make([]*conn.ChannelDescriptor, 0),
    99  		reactorsByCh:         make(map[byte]Reactor),
   100  		peers:                NewPeerSet(),
   101  		dialing:              cmap.NewCMap(),
   102  		reconnecting:         cmap.NewCMap(),
   103  		transport:            transport,
   104  		filterTimeout:        defaultFilterTimeout,
   105  		persistentPeersAddrs: make([]*NetAddress, 0),
   106  	}
   107  
   108  	// Ensure we have a completely undeterministic PRNG.
   109  	sw.rng = random.NewRand()
   110  
   111  	sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw)
   112  
   113  	for _, option := range options {
   114  		option(sw)
   115  	}
   116  
   117  	return sw
   118  }
   119  
   120  // SwitchFilterTimeout sets the timeout used for peer filters.
   121  func SwitchFilterTimeout(timeout time.Duration) SwitchOption {
   122  	return func(sw *Switch) { sw.filterTimeout = timeout }
   123  }
   124  
   125  // SwitchPeerFilters sets the filters for rejection of new peers.
   126  func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption {
   127  	return func(sw *Switch) { sw.peerFilters = filters }
   128  }
   129  
   130  // ---------------------------------------------------------------------
   131  // Switch setup
   132  
   133  // AddReactor adds the given reactor to the switch.
   134  // NOTE: Not goroutine safe.
   135  func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
   136  	for _, chDesc := range reactor.GetChannels() {
   137  		chID := chDesc.ID
   138  		// No two reactors can share the same channel.
   139  		if sw.reactorsByCh[chID] != nil {
   140  			panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
   141  		}
   142  		sw.chDescs = append(sw.chDescs, chDesc)
   143  		sw.reactorsByCh[chID] = reactor
   144  	}
   145  	sw.reactors[name] = reactor
   146  	reactor.SetSwitch(sw)
   147  	return reactor
   148  }
   149  
   150  // RemoveReactor removes the given Reactor from the Switch.
   151  // NOTE: Not goroutine safe.
   152  func (sw *Switch) RemoveReactor(name string, reactor Reactor) {
   153  	for _, chDesc := range reactor.GetChannels() {
   154  		// remove channel description
   155  		for i := 0; i < len(sw.chDescs); i++ {
   156  			if chDesc.ID == sw.chDescs[i].ID {
   157  				sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...)
   158  				break
   159  			}
   160  		}
   161  		delete(sw.reactorsByCh, chDesc.ID)
   162  	}
   163  	delete(sw.reactors, name)
   164  	reactor.SetSwitch(nil)
   165  }
   166  
   167  // Reactors returns a map of reactors registered on the switch.
   168  // NOTE: Not goroutine safe.
   169  func (sw *Switch) Reactors() map[string]Reactor {
   170  	return sw.reactors
   171  }
   172  
   173  // Reactor returns the reactor with the given name.
   174  // NOTE: Not goroutine safe.
   175  func (sw *Switch) Reactor(name string) Reactor {
   176  	return sw.reactors[name]
   177  }
   178  
   179  // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
   180  // NOTE: Not goroutine safe.
   181  func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) {
   182  	sw.nodeInfo = nodeInfo
   183  }
   184  
   185  // NodeInfo returns the switch's NodeInfo.
   186  // NOTE: Not goroutine safe.
   187  func (sw *Switch) NodeInfo() NodeInfo {
   188  	return sw.nodeInfo
   189  }
   190  
   191  // SetNodeKey sets the switch's private key for authenticated encryption.
   192  // NOTE: Not goroutine safe.
   193  func (sw *Switch) SetNodeKey(nodeKey *NodeKey) {
   194  	sw.nodeKey = nodeKey
   195  }
   196  
   197  // ---------------------------------------------------------------------
   198  // Service start/stop
   199  
   200  // OnStart implements BaseService. It starts all the reactors and peers.
   201  func (sw *Switch) OnStart() error {
   202  	// Start reactors
   203  	for _, reactor := range sw.reactors {
   204  		err := reactor.Start()
   205  		if err != nil {
   206  			return errors.Wrap(err, "failed to start %v", reactor)
   207  		}
   208  	}
   209  
   210  	// Start accepting Peers.
   211  	go sw.acceptRoutine()
   212  
   213  	return nil
   214  }
   215  
   216  // OnStop implements BaseService. It stops all peers and reactors.
   217  func (sw *Switch) OnStop() {
   218  	// Stop transport
   219  	if t, ok := sw.transport.(TransportLifecycle); ok {
   220  		err := t.Close()
   221  		if err != nil {
   222  			sw.Logger.Error("Error stopping transport on stop: ", err)
   223  		}
   224  	}
   225  
   226  	// Stop peers
   227  	for _, p := range sw.peers.List() {
   228  		sw.stopAndRemovePeer(p, nil)
   229  	}
   230  
   231  	// Stop reactors
   232  	sw.Logger.Debug("Switch: Stopping reactors")
   233  	for _, reactor := range sw.reactors {
   234  		reactor.Stop()
   235  	}
   236  }
   237  
   238  // ---------------------------------------------------------------------
   239  // Peers
   240  
   241  // Broadcast runs a go routine for each attempted send, which will block trying
   242  // to send for defaultSendTimeoutSeconds. Returns a channel which receives
   243  // success values for each attempted send (false if times out). Channel will be
   244  // closed once msg bytes are sent to all peers (or time out).
   245  //
   246  // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
   247  func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool {
   248  	startTime := time.Now()
   249  
   250  	sw.Logger.Debug(
   251  		"Broadcast",
   252  		"channel", chID,
   253  		"value", fmt.Sprintf("%X", msgBytes),
   254  	)
   255  
   256  	peers := sw.peers.List()
   257  	var wg sync.WaitGroup
   258  	wg.Add(len(peers))
   259  	successChan := make(chan bool, len(peers))
   260  
   261  	for _, peer := range peers {
   262  		go func(p Peer) {
   263  			defer wg.Done()
   264  			success := p.Send(chID, msgBytes)
   265  			successChan <- success
   266  		}(peer)
   267  	}
   268  
   269  	go func() {
   270  		wg.Wait()
   271  		close(successChan)
   272  		if telemetry.MetricsEnabled() {
   273  			metrics.BroadcastTxTimer.Record(context.Background(), time.Since(startTime).Milliseconds())
   274  		}
   275  	}()
   276  
   277  	return successChan
   278  }
   279  
   280  // NumPeers returns the count of outbound/inbound and outbound-dialing peers.
   281  func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
   282  	peers := sw.peers.List()
   283  	for _, peer := range peers {
   284  		if peer.IsOutbound() {
   285  			outbound++
   286  		} else {
   287  			inbound++
   288  		}
   289  	}
   290  	dialing = sw.dialing.Size()
   291  	return
   292  }
   293  
   294  // MaxNumOutboundPeers returns a maximum number of outbound peers.
   295  func (sw *Switch) MaxNumOutboundPeers() int {
   296  	return sw.config.MaxNumOutboundPeers
   297  }
   298  
   299  // Peers returns the set of peers that are connected to the switch.
   300  func (sw *Switch) Peers() IPeerSet {
   301  	return sw.peers
   302  }
   303  
   304  // StopPeerForError disconnects from a peer due to external error.
   305  // If the peer is persistent, it will attempt to reconnect.
   306  // TODO: make record depending on reason.
   307  func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
   308  	sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
   309  	sw.stopAndRemovePeer(peer, reason)
   310  
   311  	if peer.IsPersistent() {
   312  		var addr *NetAddress
   313  		if peer.IsOutbound() { // socket address for outbound peers
   314  			addr = peer.SocketAddr()
   315  		} else { // self-reported address for inbound peers
   316  			addr = peer.NodeInfo().NetAddress
   317  		}
   318  		go sw.reconnectToPeer(addr)
   319  	}
   320  }
   321  
   322  // StopPeerGracefully disconnects from a peer gracefully.
   323  // TODO: handle graceful disconnects.
   324  func (sw *Switch) StopPeerGracefully(peer Peer) {
   325  	sw.Logger.Info("Stopping peer gracefully")
   326  	sw.stopAndRemovePeer(peer, nil)
   327  }
   328  
   329  func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
   330  	sw.transport.Cleanup(peer)
   331  	peer.Stop()
   332  
   333  	for _, reactor := range sw.reactors {
   334  		reactor.RemovePeer(peer, reason)
   335  	}
   336  
   337  	// Removing a peer should go last to avoid a situation where a peer
   338  	// reconnect to our node and the switch calls InitPeer before
   339  	// RemovePeer is finished.
   340  	// https://github.com/tendermint/classic/issues/3338
   341  	sw.peers.Remove(peer)
   342  }
   343  
   344  // reconnectToPeer tries to reconnect to the addr, first repeatedly
   345  // with a fixed interval, then with exponential backoff.
   346  // If no success after all that, it stops trying.
   347  // NOTE: this will keep trying even if the handshake or auth fails.
   348  // TODO: be more explicit with error types so we only retry on certain failures
   349  //   - ie. if we're getting ErrDuplicatePeer we can stop
   350  func (sw *Switch) reconnectToPeer(addr *NetAddress) {
   351  	if sw.reconnecting.Has(addr.ID.String()) {
   352  		return
   353  	}
   354  	sw.reconnecting.Set(addr.ID.String(), addr)
   355  	defer sw.reconnecting.Delete(addr.ID.String())
   356  
   357  	start := time.Now()
   358  	sw.Logger.Info("Reconnecting to peer", "addr", addr)
   359  	for i := 0; i < reconnectAttempts; i++ {
   360  		if !sw.IsRunning() {
   361  			return
   362  		}
   363  
   364  		err := sw.DialPeerWithAddress(addr)
   365  		if err == nil {
   366  			return // success
   367  		} else if _, ok := err.(CurrentlyDialingOrExistingAddressError); ok {
   368  			return
   369  		}
   370  
   371  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   372  		// sleep a set amount
   373  		sw.randomSleep(reconnectInterval)
   374  		continue
   375  	}
   376  
   377  	sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
   378  		"addr", addr, "elapsed", time.Since(start))
   379  	for i := 0; i < reconnectBackOffAttempts; i++ {
   380  		if !sw.IsRunning() {
   381  			return
   382  		}
   383  
   384  		// sleep an exponentially increasing amount
   385  		sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
   386  		sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
   387  
   388  		err := sw.DialPeerWithAddress(addr)
   389  		if err == nil {
   390  			return // success
   391  		} else if _, ok := err.(CurrentlyDialingOrExistingAddressError); ok {
   392  			return
   393  		}
   394  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   395  	}
   396  	sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start))
   397  }
   398  
   399  // ---------------------------------------------------------------------
   400  // Dialing
   401  
   402  // DialPeersAsync dials a list of peers asynchronously in random order.
   403  // Used to dial peers from config on startup or from unsafe-RPC (trusted sources).
   404  // It ignores NetAddressLookupError. However, if there are other errors, first
   405  // encounter is returned.
   406  // Nop if there are no peers.
   407  func (sw *Switch) DialPeersAsync(peers []string) error {
   408  	netAddrs, errs := NewNetAddressFromStrings(peers)
   409  	// report all the errors
   410  	for _, err := range errs {
   411  		sw.Logger.Error("Error in peer's address", "err", err)
   412  	}
   413  	// return first non-NetAddressLookupError error
   414  	for _, err := range errs {
   415  		if _, ok := err.(NetAddressLookupError); ok {
   416  			continue
   417  		}
   418  		return err
   419  	}
   420  	sw.dialPeersAsync(netAddrs)
   421  	return nil
   422  }
   423  
   424  func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) {
   425  	ourAddr := sw.NetAddress()
   426  
   427  	// permute the list, dial them in random order.
   428  	perm := sw.rng.Perm(len(netAddrs))
   429  	for i := 0; i < len(perm); i++ {
   430  		go func(i int) {
   431  			j := perm[i]
   432  			addr := netAddrs[j]
   433  
   434  			if addr.Same(ourAddr) {
   435  				sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr)
   436  				return
   437  			}
   438  
   439  			sw.randomSleep(0)
   440  
   441  			err := sw.DialPeerWithAddress(addr)
   442  			if err != nil {
   443  				switch err.(type) {
   444  				case SwitchConnectToSelfError, SwitchDuplicatePeerIDError, CurrentlyDialingOrExistingAddressError:
   445  					sw.Logger.Debug("Error dialing peer", "err", err)
   446  				default:
   447  					sw.Logger.Error("Error dialing peer", "err", err)
   448  				}
   449  			}
   450  		}(i)
   451  	}
   452  }
   453  
   454  // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects
   455  // and authenticates successfully.
   456  // If we're currently dialing this address or it belongs to an existing peer,
   457  // CurrentlyDialingOrExistingAddressError is returned.
   458  func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error {
   459  	if sw.IsDialingOrExistingAddress(addr) {
   460  		return CurrentlyDialingOrExistingAddressError{addr.String()}
   461  	}
   462  
   463  	sw.dialing.Set(addr.ID.String(), addr)
   464  	defer sw.dialing.Delete(addr.ID.String())
   465  
   466  	return sw.addOutboundPeerWithConfig(addr, sw.config)
   467  }
   468  
   469  // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
   470  func (sw *Switch) randomSleep(interval time.Duration) {
   471  	r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond
   472  	time.Sleep(r + interval)
   473  }
   474  
   475  // IsDialingOrExistingAddress returns true if switch has a peer with the given
   476  // address or dialing it at the moment.
   477  func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool {
   478  	return sw.dialing.Has(addr.ID.String()) ||
   479  		sw.peers.Has(addr.ID) ||
   480  		(!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP))
   481  }
   482  
   483  // AddPersistentPeers allows you to set persistent peers. It ignores
   484  // NetAddressLookupError. However, if there are other errors, first encounter is
   485  // returned.
   486  func (sw *Switch) AddPersistentPeers(addrs []string) error {
   487  	sw.Logger.Info("Adding persistent peers", "addrs", addrs)
   488  	netAddrs, errs := NewNetAddressFromStrings(addrs)
   489  	// report all the errors
   490  	for _, err := range errs {
   491  		sw.Logger.Error("Error in peer's address", "err", err)
   492  	}
   493  	// return first non-NetAddressLookupError error
   494  	for _, err := range errs {
   495  		if _, ok := err.(NetAddressLookupError); ok {
   496  			continue
   497  		}
   498  		return err
   499  	}
   500  	sw.persistentPeersAddrs = netAddrs
   501  	return nil
   502  }
   503  
   504  func (sw *Switch) isPeerPersistentFn() func(*NetAddress) bool {
   505  	return func(na *NetAddress) bool {
   506  		for _, pa := range sw.persistentPeersAddrs {
   507  			if pa.Equals(na) {
   508  				return true
   509  			}
   510  		}
   511  		return false
   512  	}
   513  }
   514  
   515  func (sw *Switch) acceptRoutine() {
   516  	for {
   517  		p, err := sw.transport.Accept(peerConfig{
   518  			chDescs:      sw.chDescs,
   519  			onPeerError:  sw.StopPeerForError,
   520  			reactorsByCh: sw.reactorsByCh,
   521  			isPersistent: sw.isPeerPersistentFn(),
   522  		})
   523  		if err != nil {
   524  			switch err := err.(type) {
   525  			case RejectedError:
   526  				if err.IsSelf() {
   527  					// TODO: warn?
   528  				}
   529  
   530  				sw.Logger.Info(
   531  					"Inbound Peer rejected",
   532  					"err", err,
   533  					"numPeers", sw.peers.Size(),
   534  				)
   535  
   536  				continue
   537  			case FilterTimeoutError:
   538  				sw.Logger.Error(
   539  					"Peer filter timed out",
   540  					"err", err,
   541  				)
   542  
   543  				continue
   544  			case TransportClosedError:
   545  				sw.Logger.Error(
   546  					"Stopped accept routine, as transport is closed",
   547  					"numPeers", sw.peers.Size(),
   548  				)
   549  			default:
   550  				sw.Logger.Error(
   551  					"Accept on transport errored",
   552  					"err", err,
   553  					"numPeers", sw.peers.Size(),
   554  				)
   555  				// We could instead have a retry loop around the acceptRoutine,
   556  				// but that would need to stop and let the node shutdown eventually.
   557  				// So might as well panic and let process managers restart the node.
   558  				// There's no point in letting the node run without the acceptRoutine,
   559  				// since it won't be able to accept new connections.
   560  				panic(fmt.Errorf("accept routine exited: %w", err))
   561  			}
   562  
   563  			break
   564  		}
   565  
   566  		// Ignore connection if we already have enough peers.
   567  		_, in, _ := sw.NumPeers()
   568  		if in >= sw.config.MaxNumInboundPeers {
   569  			sw.Logger.Info(
   570  				"Ignoring inbound connection: already have enough inbound peers",
   571  				"address", p.SocketAddr(),
   572  				"have", in,
   573  				"max", sw.config.MaxNumInboundPeers,
   574  			)
   575  
   576  			sw.transport.Cleanup(p)
   577  
   578  			continue
   579  		}
   580  
   581  		if err := sw.addPeer(p); err != nil {
   582  			sw.transport.Cleanup(p)
   583  			if p.IsRunning() {
   584  				_ = p.Stop()
   585  			}
   586  			sw.Logger.Info(
   587  				"Ignoring inbound connection: error while adding peer",
   588  				"err", err,
   589  				"id", p.ID(),
   590  			)
   591  		}
   592  	}
   593  }
   594  
   595  // dial the peer; make secret connection; authenticate against the dialed ID;
   596  // add the peer.
   597  // if dialing fails, start the reconnect loop. If handshake fails, it's over.
   598  // If peer is started successfully, reconnectLoop will start when
   599  // StopPeerForError is called.
   600  func (sw *Switch) addOutboundPeerWithConfig(
   601  	addr *NetAddress,
   602  	cfg *config.P2PConfig,
   603  ) error {
   604  	sw.Logger.Info("Dialing peer", "address", addr)
   605  
   606  	// XXX(xla): Remove the leakage of test concerns in implementation.
   607  	if cfg.TestDialFail {
   608  		go sw.reconnectToPeer(addr)
   609  		return fmt.Errorf("dial err (peerConfig.DialFail == true)")
   610  	}
   611  
   612  	p, err := sw.transport.Dial(*addr, peerConfig{
   613  		chDescs:      sw.chDescs,
   614  		onPeerError:  sw.StopPeerForError,
   615  		isPersistent: sw.isPeerPersistentFn(),
   616  		reactorsByCh: sw.reactorsByCh,
   617  	})
   618  	if err != nil {
   619  		if e, ok := err.(RejectedError); ok {
   620  			if e.IsSelf() {
   621  				// TODO: warn?
   622  				return err
   623  			}
   624  		}
   625  
   626  		// retry persistent peers after
   627  		// any dial error besides IsSelf()
   628  		if sw.isPeerPersistentFn()(addr) {
   629  			go sw.reconnectToPeer(addr)
   630  		}
   631  
   632  		return err
   633  	}
   634  
   635  	if err := sw.addPeer(p); err != nil {
   636  		sw.transport.Cleanup(p)
   637  		if p.IsRunning() {
   638  			_ = p.Stop()
   639  		}
   640  		return err
   641  	}
   642  
   643  	return nil
   644  }
   645  
   646  func (sw *Switch) filterPeer(p Peer) error {
   647  	// Avoid duplicate
   648  	if sw.peers.Has(p.ID()) {
   649  		return RejectedError{id: p.ID(), isDuplicate: true}
   650  	}
   651  
   652  	errc := make(chan error, len(sw.peerFilters))
   653  
   654  	for _, f := range sw.peerFilters {
   655  		go func(f PeerFilterFunc, p Peer, errc chan<- error) {
   656  			errc <- f(sw.peers, p)
   657  		}(f, p, errc)
   658  	}
   659  
   660  	for i := 0; i < cap(errc); i++ {
   661  		select {
   662  		case err := <-errc:
   663  			if err != nil {
   664  				return RejectedError{id: p.ID(), err: err, isFiltered: true}
   665  			}
   666  		case <-time.After(sw.filterTimeout):
   667  			return FilterTimeoutError{}
   668  		}
   669  	}
   670  
   671  	return nil
   672  }
   673  
   674  // addPeer starts up the Peer and adds it to the Switch. Error is returned if
   675  // the peer is filtered out or failed to start or can't be added.
   676  func (sw *Switch) addPeer(p Peer) error {
   677  	if err := sw.filterPeer(p); err != nil {
   678  		return err
   679  	}
   680  
   681  	p.SetLogger(sw.Logger.With("peer", p.SocketAddr()))
   682  
   683  	// Handle the shut down case where the switch has stopped but we're
   684  	// concurrently trying to add a peer.
   685  	if !sw.IsRunning() {
   686  		// XXX should this return an error or just log and terminate?
   687  		sw.Logger.Error("Won't start a peer - switch is not running", "peer", p)
   688  		return nil
   689  	}
   690  
   691  	// Add some data to the peer, which is required by reactors.
   692  	for _, reactor := range sw.reactors {
   693  		p = reactor.InitPeer(p)
   694  	}
   695  
   696  	// Start the peer's send/recv routines.
   697  	// Must start it before adding it to the peer set
   698  	// to prevent Start and Stop from being called concurrently.
   699  	err := p.Start()
   700  	if err != nil {
   701  		// Should never happen
   702  		sw.Logger.Error("Error starting peer", "err", err, "peer", p)
   703  		return err
   704  	}
   705  
   706  	// Add the peer to PeerSet. Do this before starting the reactors
   707  	// so that if Receive errors, we will find the peer and remove it.
   708  	// Add should not err since we already checked peers.Has().
   709  	if err := sw.peers.Add(p); err != nil {
   710  		return err
   711  	}
   712  
   713  	// Start all the reactor protocols on the peer.
   714  	for _, reactor := range sw.reactors {
   715  		reactor.AddPeer(p)
   716  	}
   717  
   718  	sw.Logger.Info("Added peer", "peer", p)
   719  
   720  	return nil
   721  }