github.com/KYVENetwork/cometbft/v38@v38.0.3/p2p/switch.go (about)

     1  package p2p
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/cosmos/gogoproto/proto"
    10  
    11  	"github.com/KYVENetwork/cometbft/v38/config"
    12  	"github.com/KYVENetwork/cometbft/v38/libs/cmap"
    13  	"github.com/KYVENetwork/cometbft/v38/libs/rand"
    14  	"github.com/KYVENetwork/cometbft/v38/libs/service"
    15  	"github.com/KYVENetwork/cometbft/v38/p2p/conn"
    16  )
    17  
    18  const (
    19  	// wait a random amount of time from this interval
    20  	// before dialing peers or reconnecting to help prevent DoS
    21  	dialRandomizerIntervalMilliseconds = 3000
    22  
    23  	// repeatedly try to reconnect for a few minutes
    24  	// ie. 5 * 20 = 100s
    25  	reconnectAttempts = 20
    26  	reconnectInterval = 5 * time.Second
    27  
    28  	// then move into exponential backoff mode for ~1day
    29  	// ie. 3**10 = 16hrs
    30  	reconnectBackOffAttempts    = 10
    31  	reconnectBackOffBaseSeconds = 3
    32  )
    33  
    34  // MConnConfig returns an MConnConfig with fields updated
    35  // from the P2PConfig.
    36  func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig {
    37  	mConfig := conn.DefaultMConnConfig()
    38  	mConfig.FlushThrottle = cfg.FlushThrottleTimeout
    39  	mConfig.SendRate = cfg.SendRate
    40  	mConfig.RecvRate = cfg.RecvRate
    41  	mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize
    42  	mConfig.TestFuzz = cfg.TestFuzz
    43  	mConfig.TestFuzzConfig = cfg.TestFuzzConfig
    44  	return mConfig
    45  }
    46  
    47  //-----------------------------------------------------------------------------
    48  
    49  // An AddrBook represents an address book from the pex package, which is used
    50  // to store peer addresses.
    51  type AddrBook interface {
    52  	AddAddress(addr *NetAddress, src *NetAddress) error
    53  	AddPrivateIDs([]string)
    54  	AddOurAddress(*NetAddress)
    55  	OurAddress(*NetAddress) bool
    56  	MarkGood(ID)
    57  	RemoveAddress(*NetAddress)
    58  	HasAddress(*NetAddress) bool
    59  	Save()
    60  }
    61  
    62  // PeerFilterFunc to be implemented by filter hooks after a new Peer has been
    63  // fully setup.
    64  type PeerFilterFunc func(IPeerSet, Peer) error
    65  
    66  //-----------------------------------------------------------------------------
    67  
    68  // Switch handles peer connections and exposes an API to receive incoming messages
    69  // on `Reactors`.  Each `Reactor` is responsible for handling incoming messages of one
    70  // or more `Channels`.  So while sending outgoing messages is typically performed on the peer,
    71  // incoming messages are received on the reactor.
    72  type Switch struct {
    73  	service.BaseService
    74  
    75  	config        *config.P2PConfig
    76  	reactors      map[string]Reactor
    77  	chDescs       []*conn.ChannelDescriptor
    78  	reactorsByCh  map[byte]Reactor
    79  	msgTypeByChID map[byte]proto.Message
    80  	peers         *PeerSet
    81  	dialing       *cmap.CMap
    82  	reconnecting  *cmap.CMap
    83  	nodeInfo      NodeInfo // our node info
    84  	nodeKey       *NodeKey // our node privkey
    85  	addrBook      AddrBook
    86  	// peers addresses with whom we'll maintain constant connection
    87  	persistentPeersAddrs []*NetAddress
    88  	unconditionalPeerIDs map[ID]struct{}
    89  
    90  	transport Transport
    91  
    92  	filterTimeout time.Duration
    93  	peerFilters   []PeerFilterFunc
    94  
    95  	rng *rand.Rand // seed for randomizing dial times and orders
    96  
    97  	metrics *Metrics
    98  	mlc     *metricsLabelCache
    99  }
   100  
   101  // NetAddress returns the address the switch is listening on.
   102  func (sw *Switch) NetAddress() *NetAddress {
   103  	addr := sw.transport.NetAddress()
   104  	return &addr
   105  }
   106  
   107  // SwitchOption sets an optional parameter on the Switch.
   108  type SwitchOption func(*Switch)
   109  
   110  // NewSwitch creates a new Switch with the given config.
   111  func NewSwitch(
   112  	cfg *config.P2PConfig,
   113  	transport Transport,
   114  	options ...SwitchOption,
   115  ) *Switch {
   116  
   117  	sw := &Switch{
   118  		config:               cfg,
   119  		reactors:             make(map[string]Reactor),
   120  		chDescs:              make([]*conn.ChannelDescriptor, 0),
   121  		reactorsByCh:         make(map[byte]Reactor),
   122  		msgTypeByChID:        make(map[byte]proto.Message),
   123  		peers:                NewPeerSet(),
   124  		dialing:              cmap.NewCMap(),
   125  		reconnecting:         cmap.NewCMap(),
   126  		metrics:              NopMetrics(),
   127  		transport:            transport,
   128  		filterTimeout:        defaultFilterTimeout,
   129  		persistentPeersAddrs: make([]*NetAddress, 0),
   130  		unconditionalPeerIDs: make(map[ID]struct{}),
   131  		mlc:                  newMetricsLabelCache(),
   132  	}
   133  
   134  	// Ensure we have a completely undeterministic PRNG.
   135  	sw.rng = rand.NewRand()
   136  
   137  	sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw)
   138  
   139  	for _, option := range options {
   140  		option(sw)
   141  	}
   142  
   143  	return sw
   144  }
   145  
   146  // SwitchFilterTimeout sets the timeout used for peer filters.
   147  func SwitchFilterTimeout(timeout time.Duration) SwitchOption {
   148  	return func(sw *Switch) { sw.filterTimeout = timeout }
   149  }
   150  
   151  // SwitchPeerFilters sets the filters for rejection of new peers.
   152  func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption {
   153  	return func(sw *Switch) { sw.peerFilters = filters }
   154  }
   155  
   156  // WithMetrics sets the metrics.
   157  func WithMetrics(metrics *Metrics) SwitchOption {
   158  	return func(sw *Switch) { sw.metrics = metrics }
   159  }
   160  
   161  //---------------------------------------------------------------------
   162  // Switch setup
   163  
   164  // AddReactor adds the given reactor to the switch.
   165  // NOTE: Not goroutine safe.
   166  func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
   167  	for _, chDesc := range reactor.GetChannels() {
   168  		chID := chDesc.ID
   169  		// No two reactors can share the same channel.
   170  		if sw.reactorsByCh[chID] != nil {
   171  			panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
   172  		}
   173  		sw.chDescs = append(sw.chDescs, chDesc)
   174  		sw.reactorsByCh[chID] = reactor
   175  		sw.msgTypeByChID[chID] = chDesc.MessageType
   176  	}
   177  	sw.reactors[name] = reactor
   178  	reactor.SetSwitch(sw)
   179  	return reactor
   180  }
   181  
   182  // RemoveReactor removes the given Reactor from the Switch.
   183  // NOTE: Not goroutine safe.
   184  func (sw *Switch) RemoveReactor(name string, reactor Reactor) {
   185  	for _, chDesc := range reactor.GetChannels() {
   186  		// remove channel description
   187  		for i := 0; i < len(sw.chDescs); i++ {
   188  			if chDesc.ID == sw.chDescs[i].ID {
   189  				sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...)
   190  				break
   191  			}
   192  		}
   193  		delete(sw.reactorsByCh, chDesc.ID)
   194  		delete(sw.msgTypeByChID, chDesc.ID)
   195  	}
   196  	delete(sw.reactors, name)
   197  	reactor.SetSwitch(nil)
   198  }
   199  
   200  // Reactors returns a map of reactors registered on the switch.
   201  // NOTE: Not goroutine safe.
   202  func (sw *Switch) Reactors() map[string]Reactor {
   203  	return sw.reactors
   204  }
   205  
   206  // Reactor returns the reactor with the given name.
   207  // NOTE: Not goroutine safe.
   208  func (sw *Switch) Reactor(name string) Reactor {
   209  	return sw.reactors[name]
   210  }
   211  
   212  // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
   213  // NOTE: Not goroutine safe.
   214  func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) {
   215  	sw.nodeInfo = nodeInfo
   216  }
   217  
   218  // NodeInfo returns the switch's NodeInfo.
   219  // NOTE: Not goroutine safe.
   220  func (sw *Switch) NodeInfo() NodeInfo {
   221  	return sw.nodeInfo
   222  }
   223  
   224  // SetNodeKey sets the switch's private key for authenticated encryption.
   225  // NOTE: Not goroutine safe.
   226  func (sw *Switch) SetNodeKey(nodeKey *NodeKey) {
   227  	sw.nodeKey = nodeKey
   228  }
   229  
   230  //---------------------------------------------------------------------
   231  // Service start/stop
   232  
   233  // OnStart implements BaseService. It starts all the reactors and peers.
   234  func (sw *Switch) OnStart() error {
   235  	// Start reactors
   236  	for _, reactor := range sw.reactors {
   237  		err := reactor.Start()
   238  		if err != nil {
   239  			return fmt.Errorf("failed to start %v: %w", reactor, err)
   240  		}
   241  	}
   242  
   243  	// Start accepting Peers.
   244  	go sw.acceptRoutine()
   245  
   246  	return nil
   247  }
   248  
   249  // OnStop implements BaseService. It stops all peers and reactors.
   250  func (sw *Switch) OnStop() {
   251  	// Stop peers
   252  	for _, p := range sw.peers.List() {
   253  		sw.stopAndRemovePeer(p, nil)
   254  	}
   255  
   256  	// Stop reactors
   257  	sw.Logger.Debug("Switch: Stopping reactors")
   258  	for _, reactor := range sw.reactors {
   259  		if err := reactor.Stop(); err != nil {
   260  			sw.Logger.Error("error while stopped reactor", "reactor", reactor, "error", err)
   261  		}
   262  	}
   263  }
   264  
   265  //---------------------------------------------------------------------
   266  // Peers
   267  
   268  // Broadcast runs a go routine for each attempted send, which will block trying
   269  // to send for defaultSendTimeoutSeconds. Returns a channel which receives
   270  // success values for each attempted send (false if times out). Channel will be
   271  // closed once msg bytes are sent to all peers (or time out).
   272  //
   273  // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
   274  func (sw *Switch) Broadcast(e Envelope) chan bool {
   275  	sw.Logger.Debug("Broadcast", "channel", e.ChannelID)
   276  
   277  	peers := sw.peers.List()
   278  	var wg sync.WaitGroup
   279  	wg.Add(len(peers))
   280  	successChan := make(chan bool, len(peers))
   281  
   282  	for _, peer := range peers {
   283  		go func(p Peer) {
   284  			defer wg.Done()
   285  			success := p.Send(e)
   286  			successChan <- success
   287  		}(peer)
   288  	}
   289  
   290  	go func() {
   291  		wg.Wait()
   292  		close(successChan)
   293  	}()
   294  
   295  	return successChan
   296  }
   297  
   298  // NumPeers returns the count of outbound/inbound and outbound-dialing peers.
   299  // unconditional peers are not counted here.
   300  func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
   301  	peers := sw.peers.List()
   302  	for _, peer := range peers {
   303  		if peer.IsOutbound() {
   304  			if !sw.IsPeerUnconditional(peer.ID()) {
   305  				outbound++
   306  			}
   307  		} else {
   308  			if !sw.IsPeerUnconditional(peer.ID()) {
   309  				inbound++
   310  			}
   311  		}
   312  	}
   313  	dialing = sw.dialing.Size()
   314  	return
   315  }
   316  
   317  func (sw *Switch) IsPeerUnconditional(id ID) bool {
   318  	_, ok := sw.unconditionalPeerIDs[id]
   319  	return ok
   320  }
   321  
   322  // MaxNumOutboundPeers returns a maximum number of outbound peers.
   323  func (sw *Switch) MaxNumOutboundPeers() int {
   324  	return sw.config.MaxNumOutboundPeers
   325  }
   326  
   327  // Peers returns the set of peers that are connected to the switch.
   328  func (sw *Switch) Peers() IPeerSet {
   329  	return sw.peers
   330  }
   331  
   332  // StopPeerForError disconnects from a peer due to external error.
   333  // If the peer is persistent, it will attempt to reconnect.
   334  // TODO: make record depending on reason.
   335  func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
   336  	if !peer.IsRunning() {
   337  		return
   338  	}
   339  
   340  	sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
   341  	sw.stopAndRemovePeer(peer, reason)
   342  
   343  	if peer.IsPersistent() {
   344  		var addr *NetAddress
   345  		if peer.IsOutbound() { // socket address for outbound peers
   346  			addr = peer.SocketAddr()
   347  		} else { // self-reported address for inbound peers
   348  			var err error
   349  			addr, err = peer.NodeInfo().NetAddress()
   350  			if err != nil {
   351  				sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong",
   352  					"peer", peer, "err", err)
   353  				return
   354  			}
   355  		}
   356  		go sw.reconnectToPeer(addr)
   357  	}
   358  }
   359  
   360  // StopPeerGracefully disconnects from a peer gracefully.
   361  // TODO: handle graceful disconnects.
   362  func (sw *Switch) StopPeerGracefully(peer Peer) {
   363  	sw.Logger.Info("Stopping peer gracefully")
   364  	sw.stopAndRemovePeer(peer, nil)
   365  }
   366  
   367  func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
   368  	sw.transport.Cleanup(peer)
   369  	if err := peer.Stop(); err != nil {
   370  		sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly
   371  	}
   372  
   373  	for _, reactor := range sw.reactors {
   374  		reactor.RemovePeer(peer, reason)
   375  	}
   376  
   377  	// Removing a peer should go last to avoid a situation where a peer
   378  	// reconnect to our node and the switch calls InitPeer before
   379  	// RemovePeer is finished.
   380  	// https://github.com/tendermint/tendermint/issues/3338
   381  	if sw.peers.Remove(peer) {
   382  		sw.metrics.Peers.Add(float64(-1))
   383  	} else {
   384  		// Removal of the peer has failed. The function above sets a flag within the peer to mark this.
   385  		// We keep this message here as information to the developer.
   386  		sw.Logger.Debug("error on peer removal", ",", "peer", peer.ID())
   387  	}
   388  }
   389  
   390  // reconnectToPeer tries to reconnect to the addr, first repeatedly
   391  // with a fixed interval, then with exponential backoff.
   392  // If no success after all that, it stops trying, and leaves it
   393  // to the PEX/Addrbook to find the peer with the addr again
   394  // NOTE: this will keep trying even if the handshake or auth fails.
   395  // TODO: be more explicit with error types so we only retry on certain failures
   396  //   - ie. if we're getting ErrDuplicatePeer we can stop
   397  //     because the addrbook got us the peer back already
   398  func (sw *Switch) reconnectToPeer(addr *NetAddress) {
   399  	if sw.reconnecting.Has(string(addr.ID)) {
   400  		return
   401  	}
   402  	sw.reconnecting.Set(string(addr.ID), addr)
   403  	defer sw.reconnecting.Delete(string(addr.ID))
   404  
   405  	start := time.Now()
   406  	sw.Logger.Info("Reconnecting to peer", "addr", addr)
   407  	for i := 0; i < reconnectAttempts; i++ {
   408  		if !sw.IsRunning() {
   409  			return
   410  		}
   411  
   412  		err := sw.DialPeerWithAddress(addr)
   413  		if err == nil {
   414  			return // success
   415  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   416  			return
   417  		}
   418  
   419  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   420  		// sleep a set amount
   421  		sw.randomSleep(reconnectInterval)
   422  		continue
   423  	}
   424  
   425  	sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
   426  		"addr", addr, "elapsed", time.Since(start))
   427  	for i := 0; i < reconnectBackOffAttempts; i++ {
   428  		if !sw.IsRunning() {
   429  			return
   430  		}
   431  
   432  		// sleep an exponentially increasing amount
   433  		sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
   434  		sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
   435  
   436  		err := sw.DialPeerWithAddress(addr)
   437  		if err == nil {
   438  			return // success
   439  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   440  			return
   441  		}
   442  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   443  	}
   444  	sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start))
   445  }
   446  
   447  // SetAddrBook allows to set address book on Switch.
   448  func (sw *Switch) SetAddrBook(addrBook AddrBook) {
   449  	sw.addrBook = addrBook
   450  }
   451  
   452  // MarkPeerAsGood marks the given peer as good when it did something useful
   453  // like contributed to consensus.
   454  func (sw *Switch) MarkPeerAsGood(peer Peer) {
   455  	if sw.addrBook != nil {
   456  		sw.addrBook.MarkGood(peer.ID())
   457  	}
   458  }
   459  
   460  //---------------------------------------------------------------------
   461  // Dialing
   462  
   463  type privateAddr interface {
   464  	PrivateAddr() bool
   465  }
   466  
   467  func isPrivateAddr(err error) bool {
   468  	te, ok := err.(privateAddr)
   469  	return ok && te.PrivateAddr()
   470  }
   471  
   472  // DialPeersAsync dials a list of peers asynchronously in random order.
   473  // Used to dial peers from config on startup or from unsafe-RPC (trusted sources).
   474  // It ignores ErrNetAddressLookup. However, if there are other errors, first
   475  // encounter is returned.
   476  // Nop if there are no peers.
   477  func (sw *Switch) DialPeersAsync(peers []string) error {
   478  	netAddrs, errs := NewNetAddressStrings(peers)
   479  	// report all the errors
   480  	for _, err := range errs {
   481  		sw.Logger.Error("Error in peer's address", "err", err)
   482  	}
   483  	// return first non-ErrNetAddressLookup error
   484  	for _, err := range errs {
   485  		if _, ok := err.(ErrNetAddressLookup); ok {
   486  			continue
   487  		}
   488  		return err
   489  	}
   490  	sw.dialPeersAsync(netAddrs)
   491  	return nil
   492  }
   493  
   494  func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) {
   495  	ourAddr := sw.NetAddress()
   496  
   497  	// TODO: this code feels like it's in the wrong place.
   498  	// The integration tests depend on the addrBook being saved
   499  	// right away but maybe we can change that. Recall that
   500  	// the addrBook is only written to disk every 2min
   501  	if sw.addrBook != nil {
   502  		// add peers to `addrBook`
   503  		for _, netAddr := range netAddrs {
   504  			// do not add our address or ID
   505  			if !netAddr.Same(ourAddr) {
   506  				if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil {
   507  					if isPrivateAddr(err) {
   508  						sw.Logger.Debug("Won't add peer's address to addrbook", "err", err)
   509  					} else {
   510  						sw.Logger.Error("Can't add peer's address to addrbook", "err", err)
   511  					}
   512  				}
   513  			}
   514  		}
   515  		// Persist some peers to disk right away.
   516  		// NOTE: integration tests depend on this
   517  		sw.addrBook.Save()
   518  	}
   519  
   520  	// permute the list, dial them in random order.
   521  	perm := sw.rng.Perm(len(netAddrs))
   522  	for i := 0; i < len(perm); i++ {
   523  		go func(i int) {
   524  			j := perm[i]
   525  			addr := netAddrs[j]
   526  
   527  			if addr.Same(ourAddr) {
   528  				sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr)
   529  				return
   530  			}
   531  
   532  			sw.randomSleep(0)
   533  
   534  			err := sw.DialPeerWithAddress(addr)
   535  			if err != nil {
   536  				switch err.(type) {
   537  				case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress:
   538  					sw.Logger.Debug("Error dialing peer", "err", err)
   539  				default:
   540  					sw.Logger.Error("Error dialing peer", "err", err)
   541  				}
   542  			}
   543  		}(i)
   544  	}
   545  }
   546  
   547  // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects
   548  // and authenticates successfully.
   549  // If we're currently dialing this address or it belongs to an existing peer,
   550  // ErrCurrentlyDialingOrExistingAddress is returned.
   551  func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error {
   552  	if sw.IsDialingOrExistingAddress(addr) {
   553  		return ErrCurrentlyDialingOrExistingAddress{addr.String()}
   554  	}
   555  
   556  	sw.dialing.Set(string(addr.ID), addr)
   557  	defer sw.dialing.Delete(string(addr.ID))
   558  
   559  	return sw.addOutboundPeerWithConfig(addr, sw.config)
   560  }
   561  
   562  // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
   563  func (sw *Switch) randomSleep(interval time.Duration) {
   564  	r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond
   565  	time.Sleep(r + interval)
   566  }
   567  
   568  // IsDialingOrExistingAddress returns true if switch has a peer with the given
   569  // address or dialing it at the moment.
   570  func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool {
   571  	return sw.dialing.Has(string(addr.ID)) ||
   572  		sw.peers.Has(addr.ID) ||
   573  		(!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP))
   574  }
   575  
   576  // AddPersistentPeers allows you to set persistent peers. It ignores
   577  // ErrNetAddressLookup. However, if there are other errors, first encounter is
   578  // returned.
   579  func (sw *Switch) AddPersistentPeers(addrs []string) error {
   580  	sw.Logger.Info("Adding persistent peers", "addrs", addrs)
   581  	netAddrs, errs := NewNetAddressStrings(addrs)
   582  	// report all the errors
   583  	for _, err := range errs {
   584  		sw.Logger.Error("Error in peer's address", "err", err)
   585  	}
   586  	// return first non-ErrNetAddressLookup error
   587  	for _, err := range errs {
   588  		if _, ok := err.(ErrNetAddressLookup); ok {
   589  			continue
   590  		}
   591  		return err
   592  	}
   593  	sw.persistentPeersAddrs = netAddrs
   594  	return nil
   595  }
   596  
   597  func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error {
   598  	sw.Logger.Info("Adding unconditional peer ids", "ids", ids)
   599  	for i, id := range ids {
   600  		err := validateID(ID(id))
   601  		if err != nil {
   602  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   603  		}
   604  		sw.unconditionalPeerIDs[ID(id)] = struct{}{}
   605  	}
   606  	return nil
   607  }
   608  
   609  func (sw *Switch) AddPrivatePeerIDs(ids []string) error {
   610  	validIDs := make([]string, 0, len(ids))
   611  	for i, id := range ids {
   612  		err := validateID(ID(id))
   613  		if err != nil {
   614  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   615  		}
   616  		validIDs = append(validIDs, id)
   617  	}
   618  
   619  	sw.addrBook.AddPrivateIDs(validIDs)
   620  
   621  	return nil
   622  }
   623  
   624  func (sw *Switch) IsPeerPersistent(na *NetAddress) bool {
   625  	for _, pa := range sw.persistentPeersAddrs {
   626  		if pa.Equals(na) {
   627  			return true
   628  		}
   629  	}
   630  	return false
   631  }
   632  
   633  func (sw *Switch) acceptRoutine() {
   634  	for {
   635  		p, err := sw.transport.Accept(peerConfig{
   636  			chDescs:       sw.chDescs,
   637  			onPeerError:   sw.StopPeerForError,
   638  			reactorsByCh:  sw.reactorsByCh,
   639  			msgTypeByChID: sw.msgTypeByChID,
   640  			metrics:       sw.metrics,
   641  			mlc:           sw.mlc,
   642  			isPersistent:  sw.IsPeerPersistent,
   643  		})
   644  		if err != nil {
   645  			switch err := err.(type) {
   646  			case ErrRejected:
   647  				if err.IsSelf() {
   648  					// Remove the given address from the address book and add to our addresses
   649  					// to avoid dialing in the future.
   650  					addr := err.Addr()
   651  					sw.addrBook.RemoveAddress(&addr)
   652  					sw.addrBook.AddOurAddress(&addr)
   653  				}
   654  
   655  				sw.Logger.Info(
   656  					"Inbound Peer rejected",
   657  					"err", err,
   658  					"numPeers", sw.peers.Size(),
   659  				)
   660  
   661  				continue
   662  			case ErrFilterTimeout:
   663  				sw.Logger.Error(
   664  					"Peer filter timed out",
   665  					"err", err,
   666  				)
   667  
   668  				continue
   669  			case ErrTransportClosed:
   670  				sw.Logger.Error(
   671  					"Stopped accept routine, as transport is closed",
   672  					"numPeers", sw.peers.Size(),
   673  				)
   674  			default:
   675  				sw.Logger.Error(
   676  					"Accept on transport errored",
   677  					"err", err,
   678  					"numPeers", sw.peers.Size(),
   679  				)
   680  				// We could instead have a retry loop around the acceptRoutine,
   681  				// but that would need to stop and let the node shutdown eventually.
   682  				// So might as well panic and let process managers restart the node.
   683  				// There's no point in letting the node run without the acceptRoutine,
   684  				// since it won't be able to accept new connections.
   685  				panic(fmt.Errorf("accept routine exited: %v", err))
   686  			}
   687  
   688  			break
   689  		}
   690  
   691  		if !sw.IsPeerUnconditional(p.NodeInfo().ID()) {
   692  			// Ignore connection if we already have enough peers.
   693  			_, in, _ := sw.NumPeers()
   694  			if in >= sw.config.MaxNumInboundPeers {
   695  				sw.Logger.Info(
   696  					"Ignoring inbound connection: already have enough inbound peers",
   697  					"address", p.SocketAddr(),
   698  					"have", in,
   699  					"max", sw.config.MaxNumInboundPeers,
   700  				)
   701  
   702  				sw.transport.Cleanup(p)
   703  
   704  				continue
   705  			}
   706  
   707  		}
   708  
   709  		if err := sw.addPeer(p); err != nil {
   710  			sw.transport.Cleanup(p)
   711  			if p.IsRunning() {
   712  				_ = p.Stop()
   713  			}
   714  			sw.Logger.Info(
   715  				"Ignoring inbound connection: error while adding peer",
   716  				"err", err,
   717  				"id", p.ID(),
   718  			)
   719  		}
   720  	}
   721  }
   722  
   723  // dial the peer; make secret connection; authenticate against the dialed ID;
   724  // add the peer.
   725  // if dialing fails, start the reconnect loop. If handshake fails, it's over.
   726  // If peer is started successfully, reconnectLoop will start when
   727  // StopPeerForError is called.
   728  func (sw *Switch) addOutboundPeerWithConfig(
   729  	addr *NetAddress,
   730  	cfg *config.P2PConfig,
   731  ) error {
   732  	sw.Logger.Debug("Dialing peer", "address", addr)
   733  
   734  	// XXX(xla): Remove the leakage of test concerns in implementation.
   735  	if cfg.TestDialFail {
   736  		go sw.reconnectToPeer(addr)
   737  		return fmt.Errorf("dial err (peerConfig.DialFail == true)")
   738  	}
   739  
   740  	p, err := sw.transport.Dial(*addr, peerConfig{
   741  		chDescs:       sw.chDescs,
   742  		onPeerError:   sw.StopPeerForError,
   743  		isPersistent:  sw.IsPeerPersistent,
   744  		reactorsByCh:  sw.reactorsByCh,
   745  		msgTypeByChID: sw.msgTypeByChID,
   746  		metrics:       sw.metrics,
   747  		mlc:           sw.mlc,
   748  	})
   749  	if err != nil {
   750  		if e, ok := err.(ErrRejected); ok {
   751  			if e.IsSelf() {
   752  				// Remove the given address from the address book and add to our addresses
   753  				// to avoid dialing in the future.
   754  				sw.addrBook.RemoveAddress(addr)
   755  				sw.addrBook.AddOurAddress(addr)
   756  
   757  				return err
   758  			}
   759  		}
   760  
   761  		// retry persistent peers after
   762  		// any dial error besides IsSelf()
   763  		if sw.IsPeerPersistent(addr) {
   764  			go sw.reconnectToPeer(addr)
   765  		}
   766  
   767  		return err
   768  	}
   769  
   770  	if err := sw.addPeer(p); err != nil {
   771  		sw.transport.Cleanup(p)
   772  		if p.IsRunning() {
   773  			_ = p.Stop()
   774  		}
   775  		return err
   776  	}
   777  
   778  	return nil
   779  }
   780  
   781  func (sw *Switch) filterPeer(p Peer) error {
   782  	// Avoid duplicate
   783  	if sw.peers.Has(p.ID()) {
   784  		return ErrRejected{id: p.ID(), isDuplicate: true}
   785  	}
   786  
   787  	errc := make(chan error, len(sw.peerFilters))
   788  
   789  	for _, f := range sw.peerFilters {
   790  		go func(f PeerFilterFunc, p Peer, errc chan<- error) {
   791  			errc <- f(sw.peers, p)
   792  		}(f, p, errc)
   793  	}
   794  
   795  	for i := 0; i < cap(errc); i++ {
   796  		select {
   797  		case err := <-errc:
   798  			if err != nil {
   799  				return ErrRejected{id: p.ID(), err: err, isFiltered: true}
   800  			}
   801  		case <-time.After(sw.filterTimeout):
   802  			return ErrFilterTimeout{}
   803  		}
   804  	}
   805  
   806  	return nil
   807  }
   808  
   809  // addPeer starts up the Peer and adds it to the Switch. Error is returned if
   810  // the peer is filtered out or failed to start or can't be added.
   811  func (sw *Switch) addPeer(p Peer) error {
   812  	if err := sw.filterPeer(p); err != nil {
   813  		return err
   814  	}
   815  
   816  	p.SetLogger(sw.Logger.With("peer", p.SocketAddr()))
   817  
   818  	// Handle the shut down case where the switch has stopped but we're
   819  	// concurrently trying to add a peer.
   820  	if !sw.IsRunning() {
   821  		// XXX should this return an error or just log and terminate?
   822  		sw.Logger.Error("Won't start a peer - switch is not running", "peer", p)
   823  		return nil
   824  	}
   825  
   826  	// Add some data to the peer, which is required by reactors.
   827  	for _, reactor := range sw.reactors {
   828  		p = reactor.InitPeer(p)
   829  	}
   830  
   831  	// Start the peer's send/recv routines.
   832  	// Must start it before adding it to the peer set
   833  	// to prevent Start and Stop from being called concurrently.
   834  	err := p.Start()
   835  	if err != nil {
   836  		// Should never happen
   837  		sw.Logger.Error("Error starting peer", "err", err, "peer", p)
   838  		return err
   839  	}
   840  
   841  	// Add the peer to PeerSet. Do this before starting the reactors
   842  	// so that if Receive errors, we will find the peer and remove it.
   843  	// Add should not err since we already checked peers.Has().
   844  	if err := sw.peers.Add(p); err != nil {
   845  		switch err.(type) {
   846  		case ErrPeerRemoval:
   847  			sw.Logger.Error("Error starting peer ",
   848  				" err ", "Peer has already errored and removal was attempted.",
   849  				"peer", p.ID())
   850  		}
   851  		return err
   852  	}
   853  	sw.metrics.Peers.Add(float64(1))
   854  
   855  	// Start all the reactor protocols on the peer.
   856  	for _, reactor := range sw.reactors {
   857  		reactor.AddPeer(p)
   858  	}
   859  
   860  	sw.Logger.Debug("Added peer", "peer", p)
   861  
   862  	return nil
   863  }