github.com/badrootd/nibiru-cometbft@v0.37.5-0.20240307173500-2a75559eee9b/p2p/switch.go (about)

     1  package p2p
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/badrootd/nibiru-cometbft/config"
    10  	"github.com/badrootd/nibiru-cometbft/libs/cmap"
    11  	"github.com/badrootd/nibiru-cometbft/libs/rand"
    12  	"github.com/badrootd/nibiru-cometbft/libs/service"
    13  	"github.com/badrootd/nibiru-cometbft/p2p/conn"
    14  	"github.com/cosmos/gogoproto/proto"
    15  )
    16  
    17  const (
    18  	// wait a random amount of time from this interval
    19  	// before dialing peers or reconnecting to help prevent DoS
    20  	dialRandomizerIntervalMilliseconds = 3000
    21  
    22  	// repeatedly try to reconnect for a few minutes
    23  	// ie. 5 * 20 = 100s
    24  	reconnectAttempts = 20
    25  	reconnectInterval = 5 * time.Second
    26  
    27  	// then move into exponential backoff mode for ~1day
    28  	// ie. 3**10 = 16hrs
    29  	reconnectBackOffAttempts    = 10
    30  	reconnectBackOffBaseSeconds = 3
    31  )
    32  
    33  // MConnConfig returns an MConnConfig with fields updated
    34  // from the P2PConfig.
    35  func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig {
    36  	mConfig := conn.DefaultMConnConfig()
    37  	mConfig.FlushThrottle = cfg.FlushThrottleTimeout
    38  	mConfig.SendRate = cfg.SendRate
    39  	mConfig.RecvRate = cfg.RecvRate
    40  	mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize
    41  	return mConfig
    42  }
    43  
    44  //-----------------------------------------------------------------------------
    45  
    46  // An AddrBook represents an address book from the pex package, which is used
    47  // to store peer addresses.
    48  type AddrBook interface {
    49  	AddAddress(addr *NetAddress, src *NetAddress) error
    50  	AddPrivateIDs([]string)
    51  	AddOurAddress(*NetAddress)
    52  	OurAddress(*NetAddress) bool
    53  	MarkGood(ID)
    54  	RemoveAddress(*NetAddress)
    55  	HasAddress(*NetAddress) bool
    56  	Save()
    57  }
    58  
    59  // PeerFilterFunc to be implemented by filter hooks after a new Peer has been
    60  // fully setup.
    61  type PeerFilterFunc func(IPeerSet, Peer) error
    62  
    63  //-----------------------------------------------------------------------------
    64  
    65  // Switch handles peer connections and exposes an API to receive incoming messages
    66  // on `Reactors`.  Each `Reactor` is responsible for handling incoming messages of one
    67  // or more `Channels`.  So while sending outgoing messages is typically performed on the peer,
    68  // incoming messages are received on the reactor.
    69  type Switch struct {
    70  	service.BaseService
    71  
    72  	config        *config.P2PConfig
    73  	reactors      map[string]Reactor
    74  	chDescs       []*conn.ChannelDescriptor
    75  	reactorsByCh  map[byte]Reactor
    76  	msgTypeByChID map[byte]proto.Message
    77  	peers         *PeerSet
    78  	dialing       *cmap.CMap
    79  	reconnecting  *cmap.CMap
    80  	nodeInfo      NodeInfo // our node info
    81  	nodeKey       *NodeKey // our node privkey
    82  	addrBook      AddrBook
    83  	// peers addresses with whom we'll maintain constant connection
    84  	persistentPeersAddrs []*NetAddress
    85  	unconditionalPeerIDs map[ID]struct{}
    86  
    87  	transport Transport
    88  
    89  	filterTimeout time.Duration
    90  	peerFilters   []PeerFilterFunc
    91  
    92  	rng *rand.Rand // seed for randomizing dial times and orders
    93  
    94  	metrics *Metrics
    95  	mlc     *metricsLabelCache
    96  }
    97  
    98  // NetAddress returns the address the switch is listening on.
    99  func (sw *Switch) NetAddress() *NetAddress {
   100  	addr := sw.transport.NetAddress()
   101  	return &addr
   102  }
   103  
   104  // SwitchOption sets an optional parameter on the Switch.
   105  type SwitchOption func(*Switch)
   106  
   107  // NewSwitch creates a new Switch with the given config.
   108  func NewSwitch(
   109  	cfg *config.P2PConfig,
   110  	transport Transport,
   111  	options ...SwitchOption,
   112  ) *Switch {
   113  
   114  	sw := &Switch{
   115  		config:               cfg,
   116  		reactors:             make(map[string]Reactor),
   117  		chDescs:              make([]*conn.ChannelDescriptor, 0),
   118  		reactorsByCh:         make(map[byte]Reactor),
   119  		msgTypeByChID:        make(map[byte]proto.Message),
   120  		peers:                NewPeerSet(),
   121  		dialing:              cmap.NewCMap(),
   122  		reconnecting:         cmap.NewCMap(),
   123  		metrics:              NopMetrics(),
   124  		transport:            transport,
   125  		filterTimeout:        defaultFilterTimeout,
   126  		persistentPeersAddrs: make([]*NetAddress, 0),
   127  		unconditionalPeerIDs: make(map[ID]struct{}),
   128  		mlc:                  newMetricsLabelCache(),
   129  	}
   130  
   131  	// Ensure we have a completely undeterministic PRNG.
   132  	sw.rng = rand.NewRand()
   133  
   134  	sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw)
   135  
   136  	for _, option := range options {
   137  		option(sw)
   138  	}
   139  
   140  	return sw
   141  }
   142  
   143  // SwitchFilterTimeout sets the timeout used for peer filters.
   144  func SwitchFilterTimeout(timeout time.Duration) SwitchOption {
   145  	return func(sw *Switch) { sw.filterTimeout = timeout }
   146  }
   147  
   148  // SwitchPeerFilters sets the filters for rejection of new peers.
   149  func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption {
   150  	return func(sw *Switch) { sw.peerFilters = filters }
   151  }
   152  
   153  // WithMetrics sets the metrics.
   154  func WithMetrics(metrics *Metrics) SwitchOption {
   155  	return func(sw *Switch) { sw.metrics = metrics }
   156  }
   157  
   158  //---------------------------------------------------------------------
   159  // Switch setup
   160  
   161  // AddReactor adds the given reactor to the switch.
   162  // NOTE: Not goroutine safe.
   163  func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
   164  	for _, chDesc := range reactor.GetChannels() {
   165  		chID := chDesc.ID
   166  		// No two reactors can share the same channel.
   167  		if sw.reactorsByCh[chID] != nil {
   168  			panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
   169  		}
   170  		sw.chDescs = append(sw.chDescs, chDesc)
   171  		sw.reactorsByCh[chID] = reactor
   172  		sw.msgTypeByChID[chID] = chDesc.MessageType
   173  	}
   174  	sw.reactors[name] = reactor
   175  	reactor.SetSwitch(sw)
   176  	return reactor
   177  }
   178  
   179  // RemoveReactor removes the given Reactor from the Switch.
   180  // NOTE: Not goroutine safe.
   181  func (sw *Switch) RemoveReactor(name string, reactor Reactor) {
   182  	for _, chDesc := range reactor.GetChannels() {
   183  		// remove channel description
   184  		for i := 0; i < len(sw.chDescs); i++ {
   185  			if chDesc.ID == sw.chDescs[i].ID {
   186  				sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...)
   187  				break
   188  			}
   189  		}
   190  		delete(sw.reactorsByCh, chDesc.ID)
   191  		delete(sw.msgTypeByChID, chDesc.ID)
   192  	}
   193  	delete(sw.reactors, name)
   194  	reactor.SetSwitch(nil)
   195  }
   196  
   197  // Reactors returns a map of reactors registered on the switch.
   198  // NOTE: Not goroutine safe.
   199  func (sw *Switch) Reactors() map[string]Reactor {
   200  	return sw.reactors
   201  }
   202  
   203  // Reactor returns the reactor with the given name.
   204  // NOTE: Not goroutine safe.
   205  func (sw *Switch) Reactor(name string) Reactor {
   206  	return sw.reactors[name]
   207  }
   208  
   209  // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
   210  // NOTE: Not goroutine safe.
   211  func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) {
   212  	sw.nodeInfo = nodeInfo
   213  }
   214  
   215  // NodeInfo returns the switch's NodeInfo.
   216  // NOTE: Not goroutine safe.
   217  func (sw *Switch) NodeInfo() NodeInfo {
   218  	return sw.nodeInfo
   219  }
   220  
   221  // SetNodeKey sets the switch's private key for authenticated encryption.
   222  // NOTE: Not goroutine safe.
   223  func (sw *Switch) SetNodeKey(nodeKey *NodeKey) {
   224  	sw.nodeKey = nodeKey
   225  }
   226  
   227  //---------------------------------------------------------------------
   228  // Service start/stop
   229  
   230  // OnStart implements BaseService. It starts all the reactors and peers.
   231  func (sw *Switch) OnStart() error {
   232  	// Start reactors
   233  	for _, reactor := range sw.reactors {
   234  		err := reactor.Start()
   235  		if err != nil {
   236  			return fmt.Errorf("failed to start %v: %w", reactor, err)
   237  		}
   238  	}
   239  
   240  	// Start accepting Peers.
   241  	go sw.acceptRoutine()
   242  
   243  	return nil
   244  }
   245  
   246  // OnStop implements BaseService. It stops all peers and reactors.
   247  func (sw *Switch) OnStop() {
   248  	// Stop peers
   249  	for _, p := range sw.peers.List() {
   250  		sw.stopAndRemovePeer(p, nil)
   251  	}
   252  
   253  	// Stop reactors
   254  	sw.Logger.Debug("Switch: Stopping reactors")
   255  	for _, reactor := range sw.reactors {
   256  		if err := reactor.Stop(); err != nil {
   257  			sw.Logger.Error("error while stopped reactor", "reactor", reactor, "error", err)
   258  		}
   259  	}
   260  }
   261  
   262  //---------------------------------------------------------------------
   263  // Peers
   264  
   265  // BroadcastEnvelope runs a go routine for each attempted send, which will block trying
   266  // to send for defaultSendTimeoutSeconds. Returns a channel which receives
   267  // success values for each attempted send (false if times out). Channel will be
   268  // closed once msg bytes are sent to all peers (or time out).
   269  // BroadcastEnvelopes sends to the peers using the SendEnvelope method.
   270  //
   271  // NOTE: BroadcastEnvelope uses goroutines, so order of broadcast may not be preserved.
   272  func (sw *Switch) BroadcastEnvelope(e Envelope) chan bool {
   273  	sw.Logger.Debug("Broadcast", "channel", e.ChannelID)
   274  
   275  	peers := sw.peers.List()
   276  	var wg sync.WaitGroup
   277  	wg.Add(len(peers))
   278  	successChan := make(chan bool, len(peers))
   279  
   280  	for _, peer := range peers {
   281  		go func(p Peer) {
   282  			defer wg.Done()
   283  			success := p.SendEnvelope(e)
   284  			successChan <- success
   285  		}(peer)
   286  	}
   287  
   288  	go func() {
   289  		wg.Wait()
   290  		close(successChan)
   291  	}()
   292  
   293  	return successChan
   294  }
   295  
   296  // NumPeers returns the count of outbound/inbound and outbound-dialing peers.
   297  // unconditional peers are not counted here.
   298  func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
   299  	peers := sw.peers.List()
   300  	for _, peer := range peers {
   301  		if peer.IsOutbound() {
   302  			if !sw.IsPeerUnconditional(peer.ID()) {
   303  				outbound++
   304  			}
   305  		} else {
   306  			if !sw.IsPeerUnconditional(peer.ID()) {
   307  				inbound++
   308  			}
   309  		}
   310  	}
   311  	dialing = sw.dialing.Size()
   312  	return
   313  }
   314  
   315  func (sw *Switch) IsPeerUnconditional(id ID) bool {
   316  	_, ok := sw.unconditionalPeerIDs[id]
   317  	return ok
   318  }
   319  
   320  // MaxNumOutboundPeers returns a maximum number of outbound peers.
   321  func (sw *Switch) MaxNumOutboundPeers() int {
   322  	return sw.config.MaxNumOutboundPeers
   323  }
   324  
   325  // Peers returns the set of peers that are connected to the switch.
   326  func (sw *Switch) Peers() IPeerSet {
   327  	return sw.peers
   328  }
   329  
   330  // StopPeerForError disconnects from a peer due to external error.
   331  // If the peer is persistent, it will attempt to reconnect.
   332  // TODO: make record depending on reason.
   333  func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
   334  	if !peer.IsRunning() {
   335  		return
   336  	}
   337  
   338  	sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
   339  	sw.stopAndRemovePeer(peer, reason)
   340  
   341  	if peer.IsPersistent() {
   342  		var addr *NetAddress
   343  		if peer.IsOutbound() { // socket address for outbound peers
   344  			addr = peer.SocketAddr()
   345  		} else { // self-reported address for inbound peers
   346  			var err error
   347  			addr, err = peer.NodeInfo().NetAddress()
   348  			if err != nil {
   349  				sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong",
   350  					"peer", peer, "err", err)
   351  				return
   352  			}
   353  		}
   354  		go sw.reconnectToPeer(addr)
   355  	}
   356  }
   357  
   358  // StopPeerGracefully disconnects from a peer gracefully.
   359  // TODO: handle graceful disconnects.
   360  func (sw *Switch) StopPeerGracefully(peer Peer) {
   361  	sw.Logger.Info("Stopping peer gracefully")
   362  	sw.stopAndRemovePeer(peer, nil)
   363  }
   364  
   365  func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
   366  	sw.transport.Cleanup(peer)
   367  	if err := peer.Stop(); err != nil {
   368  		sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly
   369  	}
   370  
   371  	for _, reactor := range sw.reactors {
   372  		reactor.RemovePeer(peer, reason)
   373  	}
   374  
   375  	// Removing a peer should go last to avoid a situation where a peer
   376  	// reconnect to our node and the switch calls InitPeer before
   377  	// RemovePeer is finished.
   378  	// https://github.com/tendermint/tendermint/issues/3338
   379  	if sw.peers.Remove(peer) {
   380  		sw.metrics.Peers.Add(float64(-1))
   381  	} else {
   382  		// Removal of the peer has failed. The function above sets a flag within the peer to mark this.
   383  		// We keep this message here as information to the developer.
   384  		sw.Logger.Debug("error on peer removal", ",", "peer", peer.ID())
   385  	}
   386  }
   387  
   388  // reconnectToPeer tries to reconnect to the addr, first repeatedly
   389  // with a fixed interval, then with exponential backoff.
   390  // If no success after all that, it stops trying, and leaves it
   391  // to the PEX/Addrbook to find the peer with the addr again
   392  // NOTE: this will keep trying even if the handshake or auth fails.
   393  // TODO: be more explicit with error types so we only retry on certain failures
   394  //   - ie. if we're getting ErrDuplicatePeer we can stop
   395  //     because the addrbook got us the peer back already
   396  func (sw *Switch) reconnectToPeer(addr *NetAddress) {
   397  	if sw.reconnecting.Has(string(addr.ID)) {
   398  		return
   399  	}
   400  	sw.reconnecting.Set(string(addr.ID), addr)
   401  	defer sw.reconnecting.Delete(string(addr.ID))
   402  
   403  	start := time.Now()
   404  	sw.Logger.Info("Reconnecting to peer", "addr", addr)
   405  	for i := 0; i < reconnectAttempts; i++ {
   406  		if !sw.IsRunning() {
   407  			return
   408  		}
   409  
   410  		err := sw.DialPeerWithAddress(addr)
   411  		if err == nil {
   412  			return // success
   413  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   414  			return
   415  		}
   416  
   417  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   418  		// sleep a set amount
   419  		sw.randomSleep(reconnectInterval)
   420  		continue
   421  	}
   422  
   423  	sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
   424  		"addr", addr, "elapsed", time.Since(start))
   425  	for i := 0; i < reconnectBackOffAttempts; i++ {
   426  		if !sw.IsRunning() {
   427  			return
   428  		}
   429  
   430  		// sleep an exponentially increasing amount
   431  		sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
   432  		sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
   433  
   434  		err := sw.DialPeerWithAddress(addr)
   435  		if err == nil {
   436  			return // success
   437  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   438  			return
   439  		}
   440  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   441  	}
   442  	sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start))
   443  }
   444  
   445  // SetAddrBook allows to set address book on Switch.
   446  func (sw *Switch) SetAddrBook(addrBook AddrBook) {
   447  	sw.addrBook = addrBook
   448  }
   449  
   450  // MarkPeerAsGood marks the given peer as good when it did something useful
   451  // like contributed to consensus.
   452  func (sw *Switch) MarkPeerAsGood(peer Peer) {
   453  	if sw.addrBook != nil {
   454  		sw.addrBook.MarkGood(peer.ID())
   455  	}
   456  }
   457  
   458  //---------------------------------------------------------------------
   459  // Dialing
   460  
   461  type privateAddr interface {
   462  	PrivateAddr() bool
   463  }
   464  
   465  func isPrivateAddr(err error) bool {
   466  	te, ok := err.(privateAddr)
   467  	return ok && te.PrivateAddr()
   468  }
   469  
   470  // DialPeersAsync dials a list of peers asynchronously in random order.
   471  // Used to dial peers from config on startup or from unsafe-RPC (trusted sources).
   472  // It ignores ErrNetAddressLookup. However, if there are other errors, first
   473  // encounter is returned.
   474  // Nop if there are no peers.
   475  func (sw *Switch) DialPeersAsync(peers []string) error {
   476  	netAddrs, errs := NewNetAddressStrings(peers)
   477  	// report all the errors
   478  	for _, err := range errs {
   479  		sw.Logger.Error("Error in peer's address", "err", err)
   480  	}
   481  	// return first non-ErrNetAddressLookup error
   482  	for _, err := range errs {
   483  		if _, ok := err.(ErrNetAddressLookup); ok {
   484  			continue
   485  		}
   486  		return err
   487  	}
   488  	sw.dialPeersAsync(netAddrs)
   489  	return nil
   490  }
   491  
   492  func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) {
   493  	ourAddr := sw.NetAddress()
   494  
   495  	// TODO: this code feels like it's in the wrong place.
   496  	// The integration tests depend on the addrBook being saved
   497  	// right away but maybe we can change that. Recall that
   498  	// the addrBook is only written to disk every 2min
   499  	if sw.addrBook != nil {
   500  		// add peers to `addrBook`
   501  		for _, netAddr := range netAddrs {
   502  			// do not add our address or ID
   503  			if !netAddr.Same(ourAddr) {
   504  				if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil {
   505  					if isPrivateAddr(err) {
   506  						sw.Logger.Debug("Won't add peer's address to addrbook", "err", err)
   507  					} else {
   508  						sw.Logger.Error("Can't add peer's address to addrbook", "err", err)
   509  					}
   510  				}
   511  			}
   512  		}
   513  		// Persist some peers to disk right away.
   514  		// NOTE: integration tests depend on this
   515  		sw.addrBook.Save()
   516  	}
   517  
   518  	// permute the list, dial them in random order.
   519  	perm := sw.rng.Perm(len(netAddrs))
   520  	for i := 0; i < len(perm); i++ {
   521  		go func(i int) {
   522  			j := perm[i]
   523  			addr := netAddrs[j]
   524  
   525  			if addr.Same(ourAddr) {
   526  				sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr)
   527  				return
   528  			}
   529  
   530  			sw.randomSleep(0)
   531  
   532  			err := sw.DialPeerWithAddress(addr)
   533  			if err != nil {
   534  				switch err.(type) {
   535  				case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress:
   536  					sw.Logger.Debug("Error dialing peer", "err", err)
   537  				default:
   538  					sw.Logger.Error("Error dialing peer", "err", err)
   539  				}
   540  			}
   541  		}(i)
   542  	}
   543  }
   544  
   545  // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects
   546  // and authenticates successfully.
   547  // If we're currently dialing this address or it belongs to an existing peer,
   548  // ErrCurrentlyDialingOrExistingAddress is returned.
   549  func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error {
   550  	if sw.IsDialingOrExistingAddress(addr) {
   551  		return ErrCurrentlyDialingOrExistingAddress{addr.String()}
   552  	}
   553  
   554  	sw.dialing.Set(string(addr.ID), addr)
   555  	defer sw.dialing.Delete(string(addr.ID))
   556  
   557  	return sw.addOutboundPeerWithConfig(addr, sw.config)
   558  }
   559  
   560  // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
   561  func (sw *Switch) randomSleep(interval time.Duration) {
   562  	r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond
   563  	time.Sleep(r + interval)
   564  }
   565  
   566  // IsDialingOrExistingAddress returns true if switch has a peer with the given
   567  // address or dialing it at the moment.
   568  func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool {
   569  	return sw.dialing.Has(string(addr.ID)) ||
   570  		sw.peers.Has(addr.ID) ||
   571  		(!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP))
   572  }
   573  
   574  // AddPersistentPeers allows you to set persistent peers. It ignores
   575  // ErrNetAddressLookup. However, if there are other errors, first encounter is
   576  // returned.
   577  func (sw *Switch) AddPersistentPeers(addrs []string) error {
   578  	sw.Logger.Info("Adding persistent peers", "addrs", addrs)
   579  	netAddrs, errs := NewNetAddressStrings(addrs)
   580  	// report all the errors
   581  	for _, err := range errs {
   582  		sw.Logger.Error("Error in peer's address", "err", err)
   583  	}
   584  	// return first non-ErrNetAddressLookup error
   585  	for _, err := range errs {
   586  		if _, ok := err.(ErrNetAddressLookup); ok {
   587  			continue
   588  		}
   589  		return err
   590  	}
   591  	sw.persistentPeersAddrs = netAddrs
   592  	return nil
   593  }
   594  
   595  func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error {
   596  	sw.Logger.Info("Adding unconditional peer ids", "ids", ids)
   597  	for i, id := range ids {
   598  		err := validateID(ID(id))
   599  		if err != nil {
   600  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   601  		}
   602  		sw.unconditionalPeerIDs[ID(id)] = struct{}{}
   603  	}
   604  	return nil
   605  }
   606  
   607  func (sw *Switch) AddPrivatePeerIDs(ids []string) error {
   608  	validIDs := make([]string, 0, len(ids))
   609  	for i, id := range ids {
   610  		err := validateID(ID(id))
   611  		if err != nil {
   612  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   613  		}
   614  		validIDs = append(validIDs, id)
   615  	}
   616  
   617  	sw.addrBook.AddPrivateIDs(validIDs)
   618  
   619  	return nil
   620  }
   621  
   622  func (sw *Switch) IsPeerPersistent(na *NetAddress) bool {
   623  	for _, pa := range sw.persistentPeersAddrs {
   624  		if pa.Equals(na) {
   625  			return true
   626  		}
   627  	}
   628  	return false
   629  }
   630  
   631  func (sw *Switch) acceptRoutine() {
   632  	for {
   633  		p, err := sw.transport.Accept(peerConfig{
   634  			chDescs:       sw.chDescs,
   635  			onPeerError:   sw.StopPeerForError,
   636  			reactorsByCh:  sw.reactorsByCh,
   637  			msgTypeByChID: sw.msgTypeByChID,
   638  			metrics:       sw.metrics,
   639  			mlc:           sw.mlc,
   640  			isPersistent:  sw.IsPeerPersistent,
   641  		})
   642  		if err != nil {
   643  			switch err := err.(type) {
   644  			case ErrRejected:
   645  				if err.IsSelf() {
   646  					// Remove the given address from the address book and add to our addresses
   647  					// to avoid dialing in the future.
   648  					addr := err.Addr()
   649  					sw.addrBook.RemoveAddress(&addr)
   650  					sw.addrBook.AddOurAddress(&addr)
   651  				}
   652  
   653  				sw.Logger.Info(
   654  					"Inbound Peer rejected",
   655  					"err", err,
   656  					"numPeers", sw.peers.Size(),
   657  				)
   658  
   659  				continue
   660  			case ErrFilterTimeout:
   661  				sw.Logger.Error(
   662  					"Peer filter timed out",
   663  					"err", err,
   664  				)
   665  
   666  				continue
   667  			case ErrTransportClosed:
   668  				sw.Logger.Error(
   669  					"Stopped accept routine, as transport is closed",
   670  					"numPeers", sw.peers.Size(),
   671  				)
   672  			default:
   673  				sw.Logger.Error(
   674  					"Accept on transport errored",
   675  					"err", err,
   676  					"numPeers", sw.peers.Size(),
   677  				)
   678  				// We could instead have a retry loop around the acceptRoutine,
   679  				// but that would need to stop and let the node shutdown eventually.
   680  				// So might as well panic and let process managers restart the node.
   681  				// There's no point in letting the node run without the acceptRoutine,
   682  				// since it won't be able to accept new connections.
   683  				panic(fmt.Errorf("accept routine exited: %v", err))
   684  			}
   685  
   686  			break
   687  		}
   688  
   689  		if !sw.IsPeerUnconditional(p.NodeInfo().ID()) {
   690  			// Ignore connection if we already have enough peers.
   691  			_, in, _ := sw.NumPeers()
   692  			if in >= sw.config.MaxNumInboundPeers {
   693  				sw.Logger.Info(
   694  					"Ignoring inbound connection: already have enough inbound peers",
   695  					"address", p.SocketAddr(),
   696  					"have", in,
   697  					"max", sw.config.MaxNumInboundPeers,
   698  				)
   699  
   700  				sw.transport.Cleanup(p)
   701  
   702  				continue
   703  			}
   704  
   705  		}
   706  
   707  		if err := sw.addPeer(p); err != nil {
   708  			sw.transport.Cleanup(p)
   709  			if p.IsRunning() {
   710  				_ = p.Stop()
   711  			}
   712  			sw.Logger.Info(
   713  				"Ignoring inbound connection: error while adding peer",
   714  				"err", err,
   715  				"id", p.ID(),
   716  			)
   717  		}
   718  	}
   719  }
   720  
   721  // dial the peer; make secret connection; authenticate against the dialed ID;
   722  // add the peer.
   723  // if dialing fails, start the reconnect loop. If handshake fails, it's over.
   724  // If peer is started successfully, reconnectLoop will start when
   725  // StopPeerForError is called.
   726  func (sw *Switch) addOutboundPeerWithConfig(
   727  	addr *NetAddress,
   728  	cfg *config.P2PConfig,
   729  ) error {
   730  	sw.Logger.Debug("Dialing peer", "address", addr)
   731  
   732  	// XXX(xla): Remove the leakage of test concerns in implementation.
   733  	if cfg.TestDialFail {
   734  		go sw.reconnectToPeer(addr)
   735  		return fmt.Errorf("dial err (peerConfig.DialFail == true)")
   736  	}
   737  
   738  	p, err := sw.transport.Dial(*addr, peerConfig{
   739  		chDescs:       sw.chDescs,
   740  		onPeerError:   sw.StopPeerForError,
   741  		isPersistent:  sw.IsPeerPersistent,
   742  		reactorsByCh:  sw.reactorsByCh,
   743  		msgTypeByChID: sw.msgTypeByChID,
   744  		metrics:       sw.metrics,
   745  		mlc:           sw.mlc,
   746  	})
   747  	if err != nil {
   748  		if e, ok := err.(ErrRejected); ok {
   749  			if e.IsSelf() {
   750  				// Remove the given address from the address book and add to our addresses
   751  				// to avoid dialing in the future.
   752  				sw.addrBook.RemoveAddress(addr)
   753  				sw.addrBook.AddOurAddress(addr)
   754  
   755  				return err
   756  			}
   757  		}
   758  
   759  		// retry persistent peers after
   760  		// any dial error besides IsSelf()
   761  		if sw.IsPeerPersistent(addr) {
   762  			go sw.reconnectToPeer(addr)
   763  		}
   764  
   765  		return err
   766  	}
   767  
   768  	if err := sw.addPeer(p); err != nil {
   769  		sw.transport.Cleanup(p)
   770  		if p.IsRunning() {
   771  			_ = p.Stop()
   772  		}
   773  		return err
   774  	}
   775  
   776  	return nil
   777  }
   778  
   779  func (sw *Switch) filterPeer(p Peer) error {
   780  	// Avoid duplicate
   781  	if sw.peers.Has(p.ID()) {
   782  		return ErrRejected{id: p.ID(), isDuplicate: true}
   783  	}
   784  
   785  	errc := make(chan error, len(sw.peerFilters))
   786  
   787  	for _, f := range sw.peerFilters {
   788  		go func(f PeerFilterFunc, p Peer, errc chan<- error) {
   789  			errc <- f(sw.peers, p)
   790  		}(f, p, errc)
   791  	}
   792  
   793  	for i := 0; i < cap(errc); i++ {
   794  		select {
   795  		case err := <-errc:
   796  			if err != nil {
   797  				return ErrRejected{id: p.ID(), err: err, isFiltered: true}
   798  			}
   799  		case <-time.After(sw.filterTimeout):
   800  			return ErrFilterTimeout{}
   801  		}
   802  	}
   803  
   804  	return nil
   805  }
   806  
   807  // addPeer starts up the Peer and adds it to the Switch. Error is returned if
   808  // the peer is filtered out or failed to start or can't be added.
   809  func (sw *Switch) addPeer(p Peer) error {
   810  	if err := sw.filterPeer(p); err != nil {
   811  		return err
   812  	}
   813  
   814  	p.SetLogger(sw.Logger.With("peer", p.SocketAddr()))
   815  
   816  	// Handle the shut down case where the switch has stopped but we're
   817  	// concurrently trying to add a peer.
   818  	if !sw.IsRunning() {
   819  		// XXX should this return an error or just log and terminate?
   820  		sw.Logger.Error("Won't start a peer - switch is not running", "peer", p)
   821  		return nil
   822  	}
   823  
   824  	// Add some data to the peer, which is required by reactors.
   825  	for _, reactor := range sw.reactors {
   826  		p = reactor.InitPeer(p)
   827  	}
   828  
   829  	// Start the peer's send/recv routines.
   830  	// Must start it before adding it to the peer set
   831  	// to prevent Start and Stop from being called concurrently.
   832  	err := p.Start()
   833  	if err != nil {
   834  		// Should never happen
   835  		sw.Logger.Error("Error starting peer", "err", err, "peer", p)
   836  		return err
   837  	}
   838  
   839  	// Add the peer to PeerSet. Do this before starting the reactors
   840  	// so that if Receive errors, we will find the peer and remove it.
   841  	// Add should not err since we already checked peers.Has().
   842  	if err := sw.peers.Add(p); err != nil {
   843  		switch err.(type) {
   844  		case ErrPeerRemoval:
   845  			sw.Logger.Error("Error starting peer ",
   846  				" err ", "Peer has already errored and removal was attempted.",
   847  				"peer", p.ID())
   848  		}
   849  		return err
   850  	}
   851  	sw.metrics.Peers.Add(float64(1))
   852  
   853  	// Start all the reactor protocols on the peer.
   854  	for _, reactor := range sw.reactors {
   855  		reactor.AddPeer(p)
   856  	}
   857  
   858  	sw.Logger.Debug("Added peer", "peer", p)
   859  
   860  	return nil
   861  }