github.com/MagHErmit/tendermint@v0.282.1/p2p/switch.go (about)

     1  package p2p
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/MagHErmit/tendermint/config"
    10  	"github.com/MagHErmit/tendermint/libs/cmap"
    11  	"github.com/MagHErmit/tendermint/libs/log"
    12  	"github.com/MagHErmit/tendermint/libs/rand"
    13  	"github.com/MagHErmit/tendermint/libs/service"
    14  	"github.com/MagHErmit/tendermint/p2p/conn"
    15  )
    16  
    17  const (
    18  	// wait a random amount of time from this interval
    19  	// before dialing peers or reconnecting to help prevent DoS
    20  	dialRandomizerIntervalMilliseconds = 3000
    21  
    22  	// repeatedly try to reconnect for a few minutes
    23  	// ie. 5 * 20 = 100s
    24  	reconnectAttempts = 20
    25  	reconnectInterval = 5 * time.Second
    26  
    27  	// then move into exponential backoff mode for ~1day
    28  	// ie. 3**10 = 16hrs
    29  	reconnectBackOffAttempts    = 10
    30  	reconnectBackOffBaseSeconds = 3
    31  )
    32  
    33  // MConnConfig returns an MConnConfig with fields updated
    34  // from the P2PConfig.
    35  func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig {
    36  	mConfig := conn.DefaultMConnConfig()
    37  	mConfig.FlushThrottle = cfg.FlushThrottleTimeout
    38  	mConfig.SendRate = cfg.SendRate
    39  	mConfig.RecvRate = cfg.RecvRate
    40  	mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize
    41  	return mConfig
    42  }
    43  
    44  //-----------------------------------------------------------------------------
    45  
    46  // An AddrBook represents an address book from the pex package, which is used
    47  // to store peer addresses.
    48  type AddrBook interface {
    49  	AddAddress(addr *NetAddress, src *NetAddress) error
    50  	AddPrivateIDs([]string)
    51  	AddOurAddress(*NetAddress)
    52  	OurAddress(*NetAddress) bool
    53  	MarkGood(ID)
    54  	RemoveAddress(*NetAddress)
    55  	HasAddress(*NetAddress) bool
    56  	Save()
    57  }
    58  
    59  // PeerFilterFunc to be implemented by filter hooks after a new Peer has been
    60  // fully setup.
    61  type PeerFilterFunc func(IPeerSet, Peer) error
    62  
    63  //-----------------------------------------------------------------------------
    64  
    65  // Switch handles peer connections and exposes an API to receive incoming messages
    66  // on `Reactors`.  Each `Reactor` is responsible for handling incoming messages of one
    67  // or more `Channels`.  So while sending outgoing messages is typically performed on the peer,
    68  // incoming messages are received on the reactor.
    69  type Switch struct {
    70  	service.BaseService
    71  
    72  	config       *config.P2PConfig
    73  	reactors     map[string]Reactor
    74  	chDescs      []*conn.ChannelDescriptor
    75  	reactorsByCh map[byte]Reactor
    76  	peers        *PeerSet
    77  	dialing      *cmap.CMap
    78  	reconnecting *cmap.CMap
    79  	nodeInfo     NodeInfo // our node info
    80  	nodeKey      *NodeKey // our node privkey
    81  	addrBook     AddrBook
    82  	// peers addresses with whom we'll maintain constant connection
    83  	persistentPeersAddrs []*NetAddress
    84  	unconditionalPeerIDs map[ID]struct{}
    85  
    86  	transport Transport
    87  
    88  	filterTimeout time.Duration
    89  	peerFilters   []PeerFilterFunc
    90  
    91  	rng *rand.Rand // seed for randomizing dial times and orders
    92  
    93  	metrics *Metrics
    94  }
    95  
    96  // NetAddress returns the address the switch is listening on.
    97  func (sw *Switch) NetAddress() *NetAddress {
    98  	addr := sw.transport.NetAddress()
    99  	return &addr
   100  }
   101  
   102  // SwitchOption sets an optional parameter on the Switch.
   103  type SwitchOption func(*Switch)
   104  
   105  // NewSwitch creates a new Switch with the given config.
   106  func NewSwitch(
   107  	cfg *config.P2PConfig,
   108  	transport Transport,
   109  	options ...SwitchOption,
   110  ) *Switch {
   111  	sw := &Switch{
   112  		config:               cfg,
   113  		reactors:             make(map[string]Reactor),
   114  		chDescs:              make([]*conn.ChannelDescriptor, 0),
   115  		reactorsByCh:         make(map[byte]Reactor),
   116  		peers:                NewPeerSet(),
   117  		dialing:              cmap.NewCMap(),
   118  		reconnecting:         cmap.NewCMap(),
   119  		metrics:              NopMetrics(),
   120  		transport:            transport,
   121  		filterTimeout:        defaultFilterTimeout,
   122  		persistentPeersAddrs: make([]*NetAddress, 0),
   123  		unconditionalPeerIDs: make(map[ID]struct{}),
   124  	}
   125  
   126  	// Ensure we have a completely undeterministic PRNG.
   127  	sw.rng = rand.NewRand()
   128  
   129  	sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw)
   130  
   131  	for _, option := range options {
   132  		option(sw)
   133  	}
   134  
   135  	return sw
   136  }
   137  
   138  // SwitchFilterTimeout sets the timeout used for peer filters.
   139  func SwitchFilterTimeout(timeout time.Duration) SwitchOption {
   140  	return func(sw *Switch) { sw.filterTimeout = timeout }
   141  }
   142  
   143  // SwitchPeerFilters sets the filters for rejection of new peers.
   144  func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption {
   145  	return func(sw *Switch) { sw.peerFilters = filters }
   146  }
   147  
   148  // WithMetrics sets the metrics.
   149  func WithMetrics(metrics *Metrics) SwitchOption {
   150  	return func(sw *Switch) { sw.metrics = metrics }
   151  }
   152  
   153  //---------------------------------------------------------------------
   154  // Switch setup
   155  
   156  // AddReactor adds the given reactor to the switch.
   157  // NOTE: Not goroutine safe.
   158  func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
   159  	for _, chDesc := range reactor.GetChannels() {
   160  		chID := chDesc.ID
   161  		// No two reactors can share the same channel.
   162  		if sw.reactorsByCh[chID] != nil {
   163  			panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
   164  		}
   165  		sw.chDescs = append(sw.chDescs, chDesc)
   166  		sw.reactorsByCh[chID] = reactor
   167  	}
   168  	sw.reactors[name] = reactor
   169  	reactor.SetSwitch(sw)
   170  	return reactor
   171  }
   172  
   173  // RemoveReactor removes the given Reactor from the Switch.
   174  // NOTE: Not goroutine safe.
   175  func (sw *Switch) RemoveReactor(name string, reactor Reactor) {
   176  	for _, chDesc := range reactor.GetChannels() {
   177  		// remove channel description
   178  		for i := 0; i < len(sw.chDescs); i++ {
   179  			if chDesc.ID == sw.chDescs[i].ID {
   180  				sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...)
   181  				break
   182  			}
   183  		}
   184  		delete(sw.reactorsByCh, chDesc.ID)
   185  	}
   186  	delete(sw.reactors, name)
   187  	reactor.SetSwitch(nil)
   188  }
   189  
   190  // Reactors returns a map of reactors registered on the switch.
   191  // NOTE: Not goroutine safe.
   192  func (sw *Switch) Reactors() map[string]Reactor {
   193  	return sw.reactors
   194  }
   195  
   196  // Reactor returns the reactor with the given name.
   197  // NOTE: Not goroutine safe.
   198  func (sw *Switch) Reactor(name string) Reactor {
   199  	return sw.reactors[name]
   200  }
   201  
   202  // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
   203  // NOTE: Not goroutine safe.
   204  func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) {
   205  	sw.nodeInfo = nodeInfo
   206  }
   207  
   208  // NodeInfo returns the switch's NodeInfo.
   209  // NOTE: Not goroutine safe.
   210  func (sw *Switch) NodeInfo() NodeInfo {
   211  	return sw.nodeInfo
   212  }
   213  
   214  // SetNodeKey sets the switch's private key for authenticated encryption.
   215  // NOTE: Not goroutine safe.
   216  func (sw *Switch) SetNodeKey(nodeKey *NodeKey) {
   217  	sw.nodeKey = nodeKey
   218  }
   219  
   220  //---------------------------------------------------------------------
   221  // Service start/stop
   222  
   223  // OnStart implements BaseService. It starts all the reactors and peers.
   224  func (sw *Switch) OnStart() error {
   225  	// Start reactors
   226  	for _, reactor := range sw.reactors {
   227  		err := reactor.Start()
   228  		if err != nil {
   229  			return fmt.Errorf("failed to start %v: %w", reactor, err)
   230  		}
   231  	}
   232  
   233  	// Start accepting Peers.
   234  	go sw.acceptRoutine()
   235  
   236  	return nil
   237  }
   238  
   239  // OnStop implements BaseService. It stops all peers and reactors.
   240  func (sw *Switch) OnStop() {
   241  	// Stop peers
   242  	for _, p := range sw.peers.List() {
   243  		sw.stopAndRemovePeer(p, nil)
   244  	}
   245  
   246  	// Stop reactors
   247  	sw.Logger.Debug("Switch: Stopping reactors")
   248  	for _, reactor := range sw.reactors {
   249  		if err := reactor.Stop(); err != nil {
   250  			sw.Logger.Error("error while stopped reactor", "reactor", reactor, "error", err)
   251  		}
   252  	}
   253  }
   254  
   255  //---------------------------------------------------------------------
   256  // Peers
   257  
   258  // Broadcast runs a go routine for each attempted send, which will block trying
   259  // to send for defaultSendTimeoutSeconds. Returns a channel which receives
   260  // success values for each attempted send (false if times out). Channel will be
   261  // closed once msg bytes are sent to all peers (or time out).
   262  //
   263  // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
   264  func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool {
   265  	sw.Logger.Debug("Broadcast", "channel", chID, "msgBytes", log.NewLazySprintf("%X", msgBytes))
   266  
   267  	peers := sw.peers.List()
   268  	var wg sync.WaitGroup
   269  	wg.Add(len(peers))
   270  	successChan := make(chan bool, len(peers))
   271  
   272  	for _, peer := range peers {
   273  		go func(p Peer) {
   274  			defer wg.Done()
   275  			success := p.Send(chID, msgBytes)
   276  			successChan <- success
   277  		}(peer)
   278  	}
   279  
   280  	go func() {
   281  		wg.Wait()
   282  		close(successChan)
   283  	}()
   284  
   285  	return successChan
   286  }
   287  
   288  // NumPeers returns the count of outbound/inbound and outbound-dialing peers.
   289  // unconditional peers are not counted here.
   290  func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
   291  	peers := sw.peers.List()
   292  	for _, peer := range peers {
   293  		if peer.IsOutbound() {
   294  			if !sw.IsPeerUnconditional(peer.ID()) {
   295  				outbound++
   296  			}
   297  		} else {
   298  			if !sw.IsPeerUnconditional(peer.ID()) {
   299  				inbound++
   300  			}
   301  		}
   302  	}
   303  	dialing = sw.dialing.Size()
   304  	return
   305  }
   306  
   307  func (sw *Switch) IsPeerUnconditional(id ID) bool {
   308  	_, ok := sw.unconditionalPeerIDs[id]
   309  	return ok
   310  }
   311  
   312  // MaxNumOutboundPeers returns a maximum number of outbound peers.
   313  func (sw *Switch) MaxNumOutboundPeers() int {
   314  	return sw.config.MaxNumOutboundPeers
   315  }
   316  
   317  // Peers returns the set of peers that are connected to the switch.
   318  func (sw *Switch) Peers() IPeerSet {
   319  	return sw.peers
   320  }
   321  
   322  // StopPeerForError disconnects from a peer due to external error.
   323  // If the peer is persistent, it will attempt to reconnect.
   324  // TODO: make record depending on reason.
   325  func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
   326  	if !peer.IsRunning() {
   327  		return
   328  	}
   329  
   330  	sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
   331  	sw.stopAndRemovePeer(peer, reason)
   332  
   333  	if peer.IsPersistent() {
   334  		var addr *NetAddress
   335  		if peer.IsOutbound() { // socket address for outbound peers
   336  			addr = peer.SocketAddr()
   337  		} else { // self-reported address for inbound peers
   338  			var err error
   339  			addr, err = peer.NodeInfo().NetAddress()
   340  			if err != nil {
   341  				sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong",
   342  					"peer", peer, "err", err)
   343  				return
   344  			}
   345  		}
   346  		go sw.reconnectToPeer(addr)
   347  	}
   348  }
   349  
   350  // StopPeerGracefully disconnects from a peer gracefully.
   351  // TODO: handle graceful disconnects.
   352  func (sw *Switch) StopPeerGracefully(peer Peer) {
   353  	sw.Logger.Info("Stopping peer gracefully")
   354  	sw.stopAndRemovePeer(peer, nil)
   355  }
   356  
   357  func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
   358  	sw.transport.Cleanup(peer)
   359  	if err := peer.Stop(); err != nil {
   360  		sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly
   361  	}
   362  
   363  	for _, reactor := range sw.reactors {
   364  		reactor.RemovePeer(peer, reason)
   365  	}
   366  
   367  	// Removing a peer should go last to avoid a situation where a peer
   368  	// reconnect to our node and the switch calls InitPeer before
   369  	// RemovePeer is finished.
   370  	// https://github.com/MagHErmit/tendermint/issues/3338
   371  	if sw.peers.Remove(peer) {
   372  		sw.metrics.Peers.Add(float64(-1))
   373  	} else {
   374  		// Removal of the peer has failed. The function above sets a flag within the peer to mark this.
   375  		// We keep this message here as information to the developer.
   376  		sw.Logger.Debug("error on peer removal", ",", "peer", peer.ID())
   377  	}
   378  }
   379  
   380  // reconnectToPeer tries to reconnect to the addr, first repeatedly
   381  // with a fixed interval, then with exponential backoff.
   382  // If no success after all that, it stops trying, and leaves it
   383  // to the PEX/Addrbook to find the peer with the addr again
   384  // NOTE: this will keep trying even if the handshake or auth fails.
   385  // TODO: be more explicit with error types so we only retry on certain failures
   386  //   - ie. if we're getting ErrDuplicatePeer we can stop
   387  //     because the addrbook got us the peer back already
   388  func (sw *Switch) reconnectToPeer(addr *NetAddress) {
   389  	if sw.reconnecting.Has(string(addr.ID)) {
   390  		return
   391  	}
   392  	sw.reconnecting.Set(string(addr.ID), addr)
   393  	defer sw.reconnecting.Delete(string(addr.ID))
   394  
   395  	start := time.Now()
   396  	sw.Logger.Info("Reconnecting to peer", "addr", addr)
   397  	for i := 0; i < reconnectAttempts; i++ {
   398  		if !sw.IsRunning() {
   399  			return
   400  		}
   401  
   402  		err := sw.DialPeerWithAddress(addr)
   403  		if err == nil {
   404  			return // success
   405  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   406  			return
   407  		}
   408  
   409  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   410  		// sleep a set amount
   411  		sw.randomSleep(reconnectInterval)
   412  		continue
   413  	}
   414  
   415  	sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
   416  		"addr", addr, "elapsed", time.Since(start))
   417  	for i := 0; i < reconnectBackOffAttempts; i++ {
   418  		if !sw.IsRunning() {
   419  			return
   420  		}
   421  
   422  		// sleep an exponentially increasing amount
   423  		sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
   424  		sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
   425  
   426  		err := sw.DialPeerWithAddress(addr)
   427  		if err == nil {
   428  			return // success
   429  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   430  			return
   431  		}
   432  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   433  	}
   434  	sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start))
   435  }
   436  
   437  // SetAddrBook allows to set address book on Switch.
   438  func (sw *Switch) SetAddrBook(addrBook AddrBook) {
   439  	sw.addrBook = addrBook
   440  }
   441  
   442  // MarkPeerAsGood marks the given peer as good when it did something useful
   443  // like contributed to consensus.
   444  func (sw *Switch) MarkPeerAsGood(peer Peer) {
   445  	if sw.addrBook != nil {
   446  		sw.addrBook.MarkGood(peer.ID())
   447  	}
   448  }
   449  
   450  //---------------------------------------------------------------------
   451  // Dialing
   452  
   453  type privateAddr interface {
   454  	PrivateAddr() bool
   455  }
   456  
   457  func isPrivateAddr(err error) bool {
   458  	te, ok := err.(privateAddr)
   459  	return ok && te.PrivateAddr()
   460  }
   461  
   462  // DialPeersAsync dials a list of peers asynchronously in random order.
   463  // Used to dial peers from config on startup or from unsafe-RPC (trusted sources).
   464  // It ignores ErrNetAddressLookup. However, if there are other errors, first
   465  // encounter is returned.
   466  // Nop if there are no peers.
   467  func (sw *Switch) DialPeersAsync(peers []string) error {
   468  	netAddrs, errs := NewNetAddressStrings(peers)
   469  	// report all the errors
   470  	for _, err := range errs {
   471  		sw.Logger.Error("Error in peer's address", "err", err)
   472  	}
   473  	// return first non-ErrNetAddressLookup error
   474  	for _, err := range errs {
   475  		if _, ok := err.(ErrNetAddressLookup); ok {
   476  			continue
   477  		}
   478  		return err
   479  	}
   480  	sw.dialPeersAsync(netAddrs)
   481  	return nil
   482  }
   483  
   484  func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) {
   485  	ourAddr := sw.NetAddress()
   486  
   487  	// TODO: this code feels like it's in the wrong place.
   488  	// The integration tests depend on the addrBook being saved
   489  	// right away but maybe we can change that. Recall that
   490  	// the addrBook is only written to disk every 2min
   491  	if sw.addrBook != nil {
   492  		// add peers to `addrBook`
   493  		for _, netAddr := range netAddrs {
   494  			// do not add our address or ID
   495  			if !netAddr.Same(ourAddr) {
   496  				if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil {
   497  					if isPrivateAddr(err) {
   498  						sw.Logger.Debug("Won't add peer's address to addrbook", "err", err)
   499  					} else {
   500  						sw.Logger.Error("Can't add peer's address to addrbook", "err", err)
   501  					}
   502  				}
   503  			}
   504  		}
   505  		// Persist some peers to disk right away.
   506  		// NOTE: integration tests depend on this
   507  		sw.addrBook.Save()
   508  	}
   509  
   510  	// permute the list, dial them in random order.
   511  	perm := sw.rng.Perm(len(netAddrs))
   512  	for i := 0; i < len(perm); i++ {
   513  		go func(i int) {
   514  			j := perm[i]
   515  			addr := netAddrs[j]
   516  
   517  			if addr.Same(ourAddr) {
   518  				sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr)
   519  				return
   520  			}
   521  
   522  			sw.randomSleep(0)
   523  
   524  			err := sw.DialPeerWithAddress(addr)
   525  			if err != nil {
   526  				switch err.(type) {
   527  				case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress:
   528  					sw.Logger.Debug("Error dialing peer", "err", err)
   529  				default:
   530  					sw.Logger.Error("Error dialing peer", "err", err)
   531  				}
   532  			}
   533  		}(i)
   534  	}
   535  }
   536  
   537  // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects
   538  // and authenticates successfully.
   539  // If we're currently dialing this address or it belongs to an existing peer,
   540  // ErrCurrentlyDialingOrExistingAddress is returned.
   541  func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error {
   542  	if sw.IsDialingOrExistingAddress(addr) {
   543  		return ErrCurrentlyDialingOrExistingAddress{addr.String()}
   544  	}
   545  
   546  	sw.dialing.Set(string(addr.ID), addr)
   547  	defer sw.dialing.Delete(string(addr.ID))
   548  
   549  	return sw.addOutboundPeerWithConfig(addr, sw.config)
   550  }
   551  
   552  // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
   553  func (sw *Switch) randomSleep(interval time.Duration) {
   554  	r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond
   555  	time.Sleep(r + interval)
   556  }
   557  
   558  // IsDialingOrExistingAddress returns true if switch has a peer with the given
   559  // address or dialing it at the moment.
   560  func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool {
   561  	return sw.dialing.Has(string(addr.ID)) ||
   562  		sw.peers.Has(addr.ID) ||
   563  		(!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP))
   564  }
   565  
   566  // AddPersistentPeers allows you to set persistent peers. It ignores
   567  // ErrNetAddressLookup. However, if there are other errors, first encounter is
   568  // returned.
   569  func (sw *Switch) AddPersistentPeers(addrs []string) error {
   570  	sw.Logger.Info("Adding persistent peers", "addrs", addrs)
   571  	netAddrs, errs := NewNetAddressStrings(addrs)
   572  	// report all the errors
   573  	for _, err := range errs {
   574  		sw.Logger.Error("Error in peer's address", "err", err)
   575  	}
   576  	// return first non-ErrNetAddressLookup error
   577  	for _, err := range errs {
   578  		if _, ok := err.(ErrNetAddressLookup); ok {
   579  			continue
   580  		}
   581  		return err
   582  	}
   583  	sw.persistentPeersAddrs = netAddrs
   584  	return nil
   585  }
   586  
   587  func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error {
   588  	sw.Logger.Info("Adding unconditional peer ids", "ids", ids)
   589  	for i, id := range ids {
   590  		err := validateID(ID(id))
   591  		if err != nil {
   592  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   593  		}
   594  		sw.unconditionalPeerIDs[ID(id)] = struct{}{}
   595  	}
   596  	return nil
   597  }
   598  
   599  func (sw *Switch) AddPrivatePeerIDs(ids []string) error {
   600  	validIDs := make([]string, 0, len(ids))
   601  	for i, id := range ids {
   602  		err := validateID(ID(id))
   603  		if err != nil {
   604  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   605  		}
   606  		validIDs = append(validIDs, id)
   607  	}
   608  
   609  	sw.addrBook.AddPrivateIDs(validIDs)
   610  
   611  	return nil
   612  }
   613  
   614  func (sw *Switch) IsPeerPersistent(na *NetAddress) bool {
   615  	for _, pa := range sw.persistentPeersAddrs {
   616  		if pa.Equals(na) {
   617  			return true
   618  		}
   619  	}
   620  	return false
   621  }
   622  
   623  func (sw *Switch) acceptRoutine() {
   624  	for {
   625  		p, err := sw.transport.Accept(peerConfig{
   626  			chDescs:      sw.chDescs,
   627  			onPeerError:  sw.StopPeerForError,
   628  			reactorsByCh: sw.reactorsByCh,
   629  			metrics:      sw.metrics,
   630  			isPersistent: sw.IsPeerPersistent,
   631  		})
   632  		if err != nil {
   633  			switch err := err.(type) {
   634  			case ErrRejected:
   635  				if err.IsSelf() {
   636  					// Remove the given address from the address book and add to our addresses
   637  					// to avoid dialing in the future.
   638  					addr := err.Addr()
   639  					sw.addrBook.RemoveAddress(&addr)
   640  					sw.addrBook.AddOurAddress(&addr)
   641  				}
   642  
   643  				sw.Logger.Info(
   644  					"Inbound Peer rejected",
   645  					"err", err,
   646  					"numPeers", sw.peers.Size(),
   647  				)
   648  
   649  				continue
   650  			case ErrFilterTimeout:
   651  				sw.Logger.Error(
   652  					"Peer filter timed out",
   653  					"err", err,
   654  				)
   655  
   656  				continue
   657  			case ErrTransportClosed:
   658  				sw.Logger.Error(
   659  					"Stopped accept routine, as transport is closed",
   660  					"numPeers", sw.peers.Size(),
   661  				)
   662  			default:
   663  				sw.Logger.Error(
   664  					"Accept on transport errored",
   665  					"err", err,
   666  					"numPeers", sw.peers.Size(),
   667  				)
   668  				// We could instead have a retry loop around the acceptRoutine,
   669  				// but that would need to stop and let the node shutdown eventually.
   670  				// So might as well panic and let process managers restart the node.
   671  				// There's no point in letting the node run without the acceptRoutine,
   672  				// since it won't be able to accept new connections.
   673  				panic(fmt.Errorf("accept routine exited: %v", err))
   674  			}
   675  
   676  			break
   677  		}
   678  
   679  		if !sw.IsPeerUnconditional(p.NodeInfo().ID()) {
   680  			// Ignore connection if we already have enough peers.
   681  			_, in, _ := sw.NumPeers()
   682  			if in >= sw.config.MaxNumInboundPeers {
   683  				sw.Logger.Info(
   684  					"Ignoring inbound connection: already have enough inbound peers",
   685  					"address", p.SocketAddr(),
   686  					"have", in,
   687  					"max", sw.config.MaxNumInboundPeers,
   688  				)
   689  
   690  				sw.transport.Cleanup(p)
   691  
   692  				continue
   693  			}
   694  
   695  		}
   696  
   697  		if err := sw.addPeer(p); err != nil {
   698  			sw.transport.Cleanup(p)
   699  			if p.IsRunning() {
   700  				_ = p.Stop()
   701  			}
   702  			sw.Logger.Info(
   703  				"Ignoring inbound connection: error while adding peer",
   704  				"err", err,
   705  				"id", p.ID(),
   706  			)
   707  		}
   708  	}
   709  }
   710  
   711  // dial the peer; make secret connection; authenticate against the dialed ID;
   712  // add the peer.
   713  // if dialing fails, start the reconnect loop. If handshake fails, it's over.
   714  // If peer is started successfully, reconnectLoop will start when
   715  // StopPeerForError is called.
   716  func (sw *Switch) addOutboundPeerWithConfig(
   717  	addr *NetAddress,
   718  	cfg *config.P2PConfig,
   719  ) error {
   720  	sw.Logger.Info("Dialing peer", "address", addr)
   721  
   722  	// XXX(xla): Remove the leakage of test concerns in implementation.
   723  	if cfg.TestDialFail {
   724  		go sw.reconnectToPeer(addr)
   725  		return fmt.Errorf("dial err (peerConfig.DialFail == true)")
   726  	}
   727  
   728  	p, err := sw.transport.Dial(*addr, peerConfig{
   729  		chDescs:      sw.chDescs,
   730  		onPeerError:  sw.StopPeerForError,
   731  		isPersistent: sw.IsPeerPersistent,
   732  		reactorsByCh: sw.reactorsByCh,
   733  		metrics:      sw.metrics,
   734  	})
   735  	if err != nil {
   736  		if e, ok := err.(ErrRejected); ok {
   737  			if e.IsSelf() {
   738  				// Remove the given address from the address book and add to our addresses
   739  				// to avoid dialing in the future.
   740  				sw.addrBook.RemoveAddress(addr)
   741  				sw.addrBook.AddOurAddress(addr)
   742  
   743  				return err
   744  			}
   745  		}
   746  
   747  		// retry persistent peers after
   748  		// any dial error besides IsSelf()
   749  		if sw.IsPeerPersistent(addr) {
   750  			go sw.reconnectToPeer(addr)
   751  		}
   752  
   753  		return err
   754  	}
   755  
   756  	if err := sw.addPeer(p); err != nil {
   757  		sw.transport.Cleanup(p)
   758  		if p.IsRunning() {
   759  			_ = p.Stop()
   760  		}
   761  		return err
   762  	}
   763  
   764  	return nil
   765  }
   766  
   767  func (sw *Switch) filterPeer(p Peer) error {
   768  	// Avoid duplicate
   769  	if sw.peers.Has(p.ID()) {
   770  		return ErrRejected{id: p.ID(), isDuplicate: true}
   771  	}
   772  
   773  	errc := make(chan error, len(sw.peerFilters))
   774  
   775  	for _, f := range sw.peerFilters {
   776  		go func(f PeerFilterFunc, p Peer, errc chan<- error) {
   777  			errc <- f(sw.peers, p)
   778  		}(f, p, errc)
   779  	}
   780  
   781  	for i := 0; i < cap(errc); i++ {
   782  		select {
   783  		case err := <-errc:
   784  			if err != nil {
   785  				return ErrRejected{id: p.ID(), err: err, isFiltered: true}
   786  			}
   787  		case <-time.After(sw.filterTimeout):
   788  			return ErrFilterTimeout{}
   789  		}
   790  	}
   791  
   792  	return nil
   793  }
   794  
   795  // addPeer starts up the Peer and adds it to the Switch. Error is returned if
   796  // the peer is filtered out or failed to start or can't be added.
   797  func (sw *Switch) addPeer(p Peer) error {
   798  	if err := sw.filterPeer(p); err != nil {
   799  		return err
   800  	}
   801  
   802  	p.SetLogger(sw.Logger.With("peer", p.SocketAddr()))
   803  
   804  	// Handle the shut down case where the switch has stopped but we're
   805  	// concurrently trying to add a peer.
   806  	if !sw.IsRunning() {
   807  		// XXX should this return an error or just log and terminate?
   808  		sw.Logger.Error("Won't start a peer - switch is not running", "peer", p)
   809  		return nil
   810  	}
   811  
   812  	// Add some data to the peer, which is required by reactors.
   813  	for _, reactor := range sw.reactors {
   814  		p = reactor.InitPeer(p)
   815  	}
   816  
   817  	// Start the peer's send/recv routines.
   818  	// Must start it before adding it to the peer set
   819  	// to prevent Start and Stop from being called concurrently.
   820  	err := p.Start()
   821  	if err != nil {
   822  		// Should never happen
   823  		sw.Logger.Error("Error starting peer", "err", err, "peer", p)
   824  		return err
   825  	}
   826  
   827  	// Add the peer to PeerSet. Do this before starting the reactors
   828  	// so that if Receive errors, we will find the peer and remove it.
   829  	// Add should not err since we already checked peers.Has().
   830  	if err := sw.peers.Add(p); err != nil {
   831  		switch err.(type) {
   832  		case ErrPeerRemoval:
   833  			sw.Logger.Error("Error starting peer ",
   834  				" err ", "Peer has already errored and removal was attempted.",
   835  				"peer", p.ID())
   836  		}
   837  		return err
   838  	}
   839  	sw.metrics.Peers.Add(float64(1))
   840  
   841  	// Start all the reactor protocols on the peer.
   842  	for _, reactor := range sw.reactors {
   843  		reactor.AddPeer(p)
   844  	}
   845  
   846  	sw.Logger.Info("Added peer", "peer", p)
   847  
   848  	return nil
   849  }