github.com/vipernet-xyz/tm@v0.34.24/p2p/switch.go (about)

     1  package p2p
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/gogo/protobuf/proto"
    10  	"github.com/vipernet-xyz/tm/config"
    11  	"github.com/vipernet-xyz/tm/libs/cmap"
    12  	"github.com/vipernet-xyz/tm/libs/rand"
    13  	"github.com/vipernet-xyz/tm/libs/service"
    14  	"github.com/vipernet-xyz/tm/p2p/conn"
    15  )
    16  
    17  const (
    18  	// wait a random amount of time from this interval
    19  	// before dialing peers or reconnecting to help prevent DoS
    20  	dialRandomizerIntervalMilliseconds = 3000
    21  
    22  	// repeatedly try to reconnect for a few minutes
    23  	// ie. 5 * 20 = 100s
    24  	reconnectAttempts = 20
    25  	reconnectInterval = 5 * time.Second
    26  
    27  	// then move into exponential backoff mode for ~1day
    28  	// ie. 3**10 = 16hrs
    29  	reconnectBackOffAttempts    = 10
    30  	reconnectBackOffBaseSeconds = 3
    31  )
    32  
    33  // MConnConfig returns an MConnConfig with fields updated
    34  // from the P2PConfig.
    35  func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig {
    36  	mConfig := conn.DefaultMConnConfig()
    37  	mConfig.FlushThrottle = cfg.FlushThrottleTimeout
    38  	mConfig.SendRate = cfg.SendRate
    39  	mConfig.RecvRate = cfg.RecvRate
    40  	mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize
    41  	return mConfig
    42  }
    43  
    44  //-----------------------------------------------------------------------------
    45  
    46  // An AddrBook represents an address book from the pex package, which is used
    47  // to store peer addresses.
    48  type AddrBook interface {
    49  	AddAddress(addr *NetAddress, src *NetAddress) error
    50  	AddPrivateIDs([]string)
    51  	AddOurAddress(*NetAddress)
    52  	OurAddress(*NetAddress) bool
    53  	MarkGood(ID)
    54  	RemoveAddress(*NetAddress)
    55  	HasAddress(*NetAddress) bool
    56  	Save()
    57  }
    58  
    59  // PeerFilterFunc to be implemented by filter hooks after a new Peer has been
    60  // fully setup.
    61  type PeerFilterFunc func(IPeerSet, Peer) error
    62  
    63  //-----------------------------------------------------------------------------
    64  
    65  // Switch handles peer connections and exposes an API to receive incoming messages
    66  // on `Reactors`.  Each `Reactor` is responsible for handling incoming messages of one
    67  // or more `Channels`.  So while sending outgoing messages is typically performed on the peer,
    68  // incoming messages are received on the reactor.
    69  type Switch struct {
    70  	service.BaseService
    71  
    72  	config        *config.P2PConfig
    73  	reactors      map[string]Reactor
    74  	chDescs       []*conn.ChannelDescriptor
    75  	reactorsByCh  map[byte]Reactor
    76  	msgTypeByChID map[byte]proto.Message
    77  	peers         *PeerSet
    78  	dialing       *cmap.CMap
    79  	reconnecting  *cmap.CMap
    80  	nodeInfo      NodeInfo // our node info
    81  	nodeKey       *NodeKey // our node privkey
    82  	addrBook      AddrBook
    83  	// peers addresses with whom we'll maintain constant connection
    84  	persistentPeersAddrs []*NetAddress
    85  	unconditionalPeerIDs map[ID]struct{}
    86  
    87  	transport Transport
    88  
    89  	filterTimeout time.Duration
    90  	peerFilters   []PeerFilterFunc
    91  
    92  	rng *rand.Rand // seed for randomizing dial times and orders
    93  
    94  	metrics *Metrics
    95  	mlc     *metricsLabelCache
    96  }
    97  
    98  // NetAddress returns the address the switch is listening on.
    99  func (sw *Switch) NetAddress() *NetAddress {
   100  	addr := sw.transport.NetAddress()
   101  	return &addr
   102  }
   103  
   104  // SwitchOption sets an optional parameter on the Switch.
   105  type SwitchOption func(*Switch)
   106  
   107  // NewSwitch creates a new Switch with the given config.
   108  func NewSwitch(
   109  	cfg *config.P2PConfig,
   110  	transport Transport,
   111  	options ...SwitchOption,
   112  ) *Switch {
   113  
   114  	sw := &Switch{
   115  		config:               cfg,
   116  		reactors:             make(map[string]Reactor),
   117  		chDescs:              make([]*conn.ChannelDescriptor, 0),
   118  		reactorsByCh:         make(map[byte]Reactor),
   119  		msgTypeByChID:        make(map[byte]proto.Message),
   120  		peers:                NewPeerSet(),
   121  		dialing:              cmap.NewCMap(),
   122  		reconnecting:         cmap.NewCMap(),
   123  		metrics:              NopMetrics(),
   124  		transport:            transport,
   125  		filterTimeout:        defaultFilterTimeout,
   126  		persistentPeersAddrs: make([]*NetAddress, 0),
   127  		unconditionalPeerIDs: make(map[ID]struct{}),
   128  		mlc:                  newMetricsLabelCache(),
   129  	}
   130  
   131  	// Ensure we have a completely undeterministic PRNG.
   132  	sw.rng = rand.NewRand()
   133  
   134  	sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw)
   135  
   136  	for _, option := range options {
   137  		option(sw)
   138  	}
   139  
   140  	return sw
   141  }
   142  
   143  // SwitchFilterTimeout sets the timeout used for peer filters.
   144  func SwitchFilterTimeout(timeout time.Duration) SwitchOption {
   145  	return func(sw *Switch) { sw.filterTimeout = timeout }
   146  }
   147  
   148  // SwitchPeerFilters sets the filters for rejection of new peers.
   149  func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption {
   150  	return func(sw *Switch) { sw.peerFilters = filters }
   151  }
   152  
   153  // WithMetrics sets the metrics.
   154  func WithMetrics(metrics *Metrics) SwitchOption {
   155  	return func(sw *Switch) { sw.metrics = metrics }
   156  }
   157  
   158  //---------------------------------------------------------------------
   159  // Switch setup
   160  
   161  // AddReactor adds the given reactor to the switch.
   162  // NOTE: Not goroutine safe.
   163  func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
   164  	for _, chDesc := range reactor.GetChannels() {
   165  		chID := chDesc.ID
   166  		// No two reactors can share the same channel.
   167  		if sw.reactorsByCh[chID] != nil {
   168  			panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
   169  		}
   170  		sw.chDescs = append(sw.chDescs, chDesc)
   171  		sw.reactorsByCh[chID] = reactor
   172  		sw.msgTypeByChID[chID] = chDesc.MessageType
   173  	}
   174  	sw.reactors[name] = reactor
   175  	reactor.SetSwitch(sw)
   176  	return reactor
   177  }
   178  
   179  // RemoveReactor removes the given Reactor from the Switch.
   180  // NOTE: Not goroutine safe.
   181  func (sw *Switch) RemoveReactor(name string, reactor Reactor) {
   182  	for _, chDesc := range reactor.GetChannels() {
   183  		// remove channel description
   184  		for i := 0; i < len(sw.chDescs); i++ {
   185  			if chDesc.ID == sw.chDescs[i].ID {
   186  				sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...)
   187  				break
   188  			}
   189  		}
   190  		delete(sw.reactorsByCh, chDesc.ID)
   191  		delete(sw.msgTypeByChID, chDesc.ID)
   192  	}
   193  	delete(sw.reactors, name)
   194  	reactor.SetSwitch(nil)
   195  }
   196  
   197  // Reactors returns a map of reactors registered on the switch.
   198  // NOTE: Not goroutine safe.
   199  func (sw *Switch) Reactors() map[string]Reactor {
   200  	return sw.reactors
   201  }
   202  
   203  // Reactor returns the reactor with the given name.
   204  // NOTE: Not goroutine safe.
   205  func (sw *Switch) Reactor(name string) Reactor {
   206  	return sw.reactors[name]
   207  }
   208  
   209  // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
   210  // NOTE: Not goroutine safe.
   211  func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) {
   212  	sw.nodeInfo = nodeInfo
   213  }
   214  
   215  // NodeInfo returns the switch's NodeInfo.
   216  // NOTE: Not goroutine safe.
   217  func (sw *Switch) NodeInfo() NodeInfo {
   218  	return sw.nodeInfo
   219  }
   220  
   221  // SetNodeKey sets the switch's private key for authenticated encryption.
   222  // NOTE: Not goroutine safe.
   223  func (sw *Switch) SetNodeKey(nodeKey *NodeKey) {
   224  	sw.nodeKey = nodeKey
   225  }
   226  
   227  //---------------------------------------------------------------------
   228  // Service start/stop
   229  
   230  // OnStart implements BaseService. It starts all the reactors and peers.
   231  func (sw *Switch) OnStart() error {
   232  	// Start reactors
   233  	for _, reactor := range sw.reactors {
   234  		err := reactor.Start()
   235  		if err != nil {
   236  			return fmt.Errorf("failed to start %v: %w", reactor, err)
   237  		}
   238  	}
   239  
   240  	// Start accepting Peers.
   241  	go sw.acceptRoutine()
   242  
   243  	return nil
   244  }
   245  
   246  // OnStop implements BaseService. It stops all peers and reactors.
   247  func (sw *Switch) OnStop() {
   248  	// Stop peers
   249  	for _, p := range sw.peers.List() {
   250  		sw.stopAndRemovePeer(p, nil)
   251  	}
   252  
   253  	// Stop reactors
   254  	sw.Logger.Debug("Switch: Stopping reactors")
   255  	for _, reactor := range sw.reactors {
   256  		if err := reactor.Stop(); err != nil {
   257  			sw.Logger.Error("error while stopped reactor", "reactor", reactor, "error", err)
   258  		}
   259  	}
   260  }
   261  
   262  //---------------------------------------------------------------------
   263  // Peers
   264  
   265  // BroadcastEnvelope runs a go routine for each attempted send, which will block trying
   266  // to send for defaultSendTimeoutSeconds. Returns a channel which receives
   267  // success values for each attempted send (false if times out). Channel will be
   268  // closed once msg bytes are sent to all peers (or time out).
   269  // BroadcastEnvelope sends to the peers using the SendEnvelope method.
   270  //
   271  // NOTE: BroadcastEnvelope uses goroutines, so order of broadcast may not be preserved.
   272  func (sw *Switch) BroadcastEnvelope(e Envelope) chan bool {
   273  	sw.Logger.Debug("Broadcast", "channel", e.ChannelID)
   274  
   275  	peers := sw.peers.List()
   276  	var wg sync.WaitGroup
   277  	wg.Add(len(peers))
   278  	successChan := make(chan bool, len(peers))
   279  
   280  	for _, peer := range peers {
   281  		go func(p Peer) {
   282  			defer wg.Done()
   283  			success := SendEnvelopeShim(p, e, sw.Logger)
   284  			successChan <- success
   285  		}(peer)
   286  	}
   287  
   288  	go func() {
   289  		wg.Wait()
   290  		close(successChan)
   291  	}()
   292  
   293  	return successChan
   294  }
   295  
   296  // Broadcast runs a go routine for each attempted send, which will block trying
   297  // to send for defaultSendTimeoutSeconds. Returns a channel which receives
   298  // success values for each attempted send (false if times out). Channel will be
   299  // closed once msg bytes are sent to all peers (or time out).
   300  // Broadcast sends to the peers using the Send method.
   301  //
   302  // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
   303  //
   304  // Deprecated: code looking to broadcast data to all peers should use BroadcastEnvelope.
   305  // Broadcast will be removed in 0.37.
   306  func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool {
   307  	sw.Logger.Debug("Broadcast", "channel", chID)
   308  
   309  	peers := sw.peers.List()
   310  	var wg sync.WaitGroup
   311  	wg.Add(len(peers))
   312  	successChan := make(chan bool, len(peers))
   313  
   314  	for _, peer := range peers {
   315  		go func(p Peer) {
   316  			defer wg.Done()
   317  			success := p.Send(chID, msgBytes)
   318  			successChan <- success
   319  		}(peer)
   320  	}
   321  
   322  	go func() {
   323  		wg.Wait()
   324  		close(successChan)
   325  	}()
   326  
   327  	return successChan
   328  }
   329  
   330  // NumPeers returns the count of outbound/inbound and outbound-dialing peers.
   331  // unconditional peers are not counted here.
   332  func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
   333  	peers := sw.peers.List()
   334  	for _, peer := range peers {
   335  		if peer.IsOutbound() {
   336  			if !sw.IsPeerUnconditional(peer.ID()) {
   337  				outbound++
   338  			}
   339  		} else {
   340  			if !sw.IsPeerUnconditional(peer.ID()) {
   341  				inbound++
   342  			}
   343  		}
   344  	}
   345  	dialing = sw.dialing.Size()
   346  	return
   347  }
   348  
   349  func (sw *Switch) IsPeerUnconditional(id ID) bool {
   350  	_, ok := sw.unconditionalPeerIDs[id]
   351  	return ok
   352  }
   353  
   354  // MaxNumOutboundPeers returns a maximum number of outbound peers.
   355  func (sw *Switch) MaxNumOutboundPeers() int {
   356  	return sw.config.MaxNumOutboundPeers
   357  }
   358  
   359  // Peers returns the set of peers that are connected to the switch.
   360  func (sw *Switch) Peers() IPeerSet {
   361  	return sw.peers
   362  }
   363  
   364  // StopPeerForError disconnects from a peer due to external error.
   365  // If the peer is persistent, it will attempt to reconnect.
   366  // TODO: make record depending on reason.
   367  func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
   368  	if !peer.IsRunning() {
   369  		return
   370  	}
   371  
   372  	sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
   373  	sw.stopAndRemovePeer(peer, reason)
   374  
   375  	if peer.IsPersistent() {
   376  		var addr *NetAddress
   377  		if peer.IsOutbound() { // socket address for outbound peers
   378  			addr = peer.SocketAddr()
   379  		} else { // self-reported address for inbound peers
   380  			var err error
   381  			addr, err = peer.NodeInfo().NetAddress()
   382  			if err != nil {
   383  				sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong",
   384  					"peer", peer, "err", err)
   385  				return
   386  			}
   387  		}
   388  		go sw.reconnectToPeer(addr)
   389  	}
   390  }
   391  
   392  // StopPeerGracefully disconnects from a peer gracefully.
   393  // TODO: handle graceful disconnects.
   394  func (sw *Switch) StopPeerGracefully(peer Peer) {
   395  	sw.Logger.Info("Stopping peer gracefully")
   396  	sw.stopAndRemovePeer(peer, nil)
   397  }
   398  
   399  func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
   400  	sw.transport.Cleanup(peer)
   401  	if err := peer.Stop(); err != nil {
   402  		sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly
   403  	}
   404  
   405  	for _, reactor := range sw.reactors {
   406  		reactor.RemovePeer(peer, reason)
   407  	}
   408  
   409  	// Removing a peer should go last to avoid a situation where a peer
   410  	// reconnect to our node and the switch calls InitPeer before
   411  	// RemovePeer is finished.
   412  	// https://github.com/vipernet-xyz/tm/issues/3338
   413  	if sw.peers.Remove(peer) {
   414  		sw.metrics.Peers.Add(float64(-1))
   415  	} else {
   416  		// Removal of the peer has failed. The function above sets a flag within the peer to mark this.
   417  		// We keep this message here as information to the developer.
   418  		sw.Logger.Debug("error on peer removal", ",", "peer", peer.ID())
   419  	}
   420  }
   421  
   422  // reconnectToPeer tries to reconnect to the addr, first repeatedly
   423  // with a fixed interval, then with exponential backoff.
   424  // If no success after all that, it stops trying, and leaves it
   425  // to the PEX/Addrbook to find the peer with the addr again
   426  // NOTE: this will keep trying even if the handshake or auth fails.
   427  // TODO: be more explicit with error types so we only retry on certain failures
   428  //   - ie. if we're getting ErrDuplicatePeer we can stop
   429  //     because the addrbook got us the peer back already
   430  func (sw *Switch) reconnectToPeer(addr *NetAddress) {
   431  	if sw.reconnecting.Has(string(addr.ID)) {
   432  		return
   433  	}
   434  	sw.reconnecting.Set(string(addr.ID), addr)
   435  	defer sw.reconnecting.Delete(string(addr.ID))
   436  
   437  	start := time.Now()
   438  	sw.Logger.Info("Reconnecting to peer", "addr", addr)
   439  	for i := 0; i < reconnectAttempts; i++ {
   440  		if !sw.IsRunning() {
   441  			return
   442  		}
   443  
   444  		err := sw.DialPeerWithAddress(addr)
   445  		if err == nil {
   446  			return // success
   447  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   448  			return
   449  		}
   450  
   451  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   452  		// sleep a set amount
   453  		sw.randomSleep(reconnectInterval)
   454  		continue
   455  	}
   456  
   457  	sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
   458  		"addr", addr, "elapsed", time.Since(start))
   459  	for i := 0; i < reconnectBackOffAttempts; i++ {
   460  		if !sw.IsRunning() {
   461  			return
   462  		}
   463  
   464  		// sleep an exponentially increasing amount
   465  		sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
   466  		sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
   467  
   468  		err := sw.DialPeerWithAddress(addr)
   469  		if err == nil {
   470  			return // success
   471  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   472  			return
   473  		}
   474  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   475  	}
   476  	sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start))
   477  }
   478  
   479  // SetAddrBook allows to set address book on Switch.
   480  func (sw *Switch) SetAddrBook(addrBook AddrBook) {
   481  	sw.addrBook = addrBook
   482  }
   483  
   484  // MarkPeerAsGood marks the given peer as good when it did something useful
   485  // like contributed to consensus.
   486  func (sw *Switch) MarkPeerAsGood(peer Peer) {
   487  	if sw.addrBook != nil {
   488  		sw.addrBook.MarkGood(peer.ID())
   489  	}
   490  }
   491  
   492  //---------------------------------------------------------------------
   493  // Dialing
   494  
   495  type privateAddr interface {
   496  	PrivateAddr() bool
   497  }
   498  
   499  func isPrivateAddr(err error) bool {
   500  	te, ok := err.(privateAddr)
   501  	return ok && te.PrivateAddr()
   502  }
   503  
   504  // DialPeersAsync dials a list of peers asynchronously in random order.
   505  // Used to dial peers from config on startup or from unsafe-RPC (trusted sources).
   506  // It ignores ErrNetAddressLookup. However, if there are other errors, first
   507  // encounter is returned.
   508  // Nop if there are no peers.
   509  func (sw *Switch) DialPeersAsync(peers []string) error {
   510  	netAddrs, errs := NewNetAddressStrings(peers)
   511  	// report all the errors
   512  	for _, err := range errs {
   513  		sw.Logger.Error("Error in peer's address", "err", err)
   514  	}
   515  	// return first non-ErrNetAddressLookup error
   516  	for _, err := range errs {
   517  		if _, ok := err.(ErrNetAddressLookup); ok {
   518  			continue
   519  		}
   520  		return err
   521  	}
   522  	sw.dialPeersAsync(netAddrs)
   523  	return nil
   524  }
   525  
   526  func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) {
   527  	ourAddr := sw.NetAddress()
   528  
   529  	// TODO: this code feels like it's in the wrong place.
   530  	// The integration tests depend on the addrBook being saved
   531  	// right away but maybe we can change that. Recall that
   532  	// the addrBook is only written to disk every 2min
   533  	if sw.addrBook != nil {
   534  		// add peers to `addrBook`
   535  		for _, netAddr := range netAddrs {
   536  			// do not add our address or ID
   537  			if !netAddr.Same(ourAddr) {
   538  				if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil {
   539  					if isPrivateAddr(err) {
   540  						sw.Logger.Debug("Won't add peer's address to addrbook", "err", err)
   541  					} else {
   542  						sw.Logger.Error("Can't add peer's address to addrbook", "err", err)
   543  					}
   544  				}
   545  			}
   546  		}
   547  		// Persist some peers to disk right away.
   548  		// NOTE: integration tests depend on this
   549  		sw.addrBook.Save()
   550  	}
   551  
   552  	// permute the list, dial them in random order.
   553  	perm := sw.rng.Perm(len(netAddrs))
   554  	for i := 0; i < len(perm); i++ {
   555  		go func(i int) {
   556  			j := perm[i]
   557  			addr := netAddrs[j]
   558  
   559  			if addr.Same(ourAddr) {
   560  				sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr)
   561  				return
   562  			}
   563  
   564  			sw.randomSleep(0)
   565  
   566  			err := sw.DialPeerWithAddress(addr)
   567  			if err != nil {
   568  				switch err.(type) {
   569  				case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress:
   570  					sw.Logger.Debug("Error dialing peer", "err", err)
   571  				default:
   572  					sw.Logger.Error("Error dialing peer", "err", err)
   573  				}
   574  			}
   575  		}(i)
   576  	}
   577  }
   578  
   579  // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects
   580  // and authenticates successfully.
   581  // If we're currently dialing this address or it belongs to an existing peer,
   582  // ErrCurrentlyDialingOrExistingAddress is returned.
   583  func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error {
   584  	if sw.IsDialingOrExistingAddress(addr) {
   585  		return ErrCurrentlyDialingOrExistingAddress{addr.String()}
   586  	}
   587  
   588  	sw.dialing.Set(string(addr.ID), addr)
   589  	defer sw.dialing.Delete(string(addr.ID))
   590  
   591  	return sw.addOutboundPeerWithConfig(addr, sw.config)
   592  }
   593  
   594  // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
   595  func (sw *Switch) randomSleep(interval time.Duration) {
   596  	r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond
   597  	time.Sleep(r + interval)
   598  }
   599  
   600  // IsDialingOrExistingAddress returns true if switch has a peer with the given
   601  // address or dialing it at the moment.
   602  func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool {
   603  	return sw.dialing.Has(string(addr.ID)) ||
   604  		sw.peers.Has(addr.ID) ||
   605  		(!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP))
   606  }
   607  
   608  // AddPersistentPeers allows you to set persistent peers. It ignores
   609  // ErrNetAddressLookup. However, if there are other errors, first encounter is
   610  // returned.
   611  func (sw *Switch) AddPersistentPeers(addrs []string) error {
   612  	sw.Logger.Info("Adding persistent peers", "addrs", addrs)
   613  	netAddrs, errs := NewNetAddressStrings(addrs)
   614  	// report all the errors
   615  	for _, err := range errs {
   616  		sw.Logger.Error("Error in peer's address", "err", err)
   617  	}
   618  	// return first non-ErrNetAddressLookup error
   619  	for _, err := range errs {
   620  		if _, ok := err.(ErrNetAddressLookup); ok {
   621  			continue
   622  		}
   623  		return err
   624  	}
   625  	sw.persistentPeersAddrs = netAddrs
   626  	return nil
   627  }
   628  
   629  func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error {
   630  	sw.Logger.Info("Adding unconditional peer ids", "ids", ids)
   631  	for i, id := range ids {
   632  		err := validateID(ID(id))
   633  		if err != nil {
   634  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   635  		}
   636  		sw.unconditionalPeerIDs[ID(id)] = struct{}{}
   637  	}
   638  	return nil
   639  }
   640  
   641  func (sw *Switch) AddPrivatePeerIDs(ids []string) error {
   642  	validIDs := make([]string, 0, len(ids))
   643  	for i, id := range ids {
   644  		err := validateID(ID(id))
   645  		if err != nil {
   646  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   647  		}
   648  		validIDs = append(validIDs, id)
   649  	}
   650  
   651  	sw.addrBook.AddPrivateIDs(validIDs)
   652  
   653  	return nil
   654  }
   655  
   656  func (sw *Switch) IsPeerPersistent(na *NetAddress) bool {
   657  	for _, pa := range sw.persistentPeersAddrs {
   658  		if pa.Equals(na) {
   659  			return true
   660  		}
   661  	}
   662  	return false
   663  }
   664  
   665  func (sw *Switch) acceptRoutine() {
   666  	for {
   667  		p, err := sw.transport.Accept(peerConfig{
   668  			chDescs:       sw.chDescs,
   669  			onPeerError:   sw.StopPeerForError,
   670  			reactorsByCh:  sw.reactorsByCh,
   671  			msgTypeByChID: sw.msgTypeByChID,
   672  			metrics:       sw.metrics,
   673  			mlc:           sw.mlc,
   674  			isPersistent:  sw.IsPeerPersistent,
   675  		})
   676  		if err != nil {
   677  			switch err := err.(type) {
   678  			case ErrRejected:
   679  				if err.IsSelf() {
   680  					// Remove the given address from the address book and add to our addresses
   681  					// to avoid dialing in the future.
   682  					addr := err.Addr()
   683  					sw.addrBook.RemoveAddress(&addr)
   684  					sw.addrBook.AddOurAddress(&addr)
   685  				}
   686  
   687  				sw.Logger.Info(
   688  					"Inbound Peer rejected",
   689  					"err", err,
   690  					"numPeers", sw.peers.Size(),
   691  				)
   692  
   693  				continue
   694  			case ErrFilterTimeout:
   695  				sw.Logger.Error(
   696  					"Peer filter timed out",
   697  					"err", err,
   698  				)
   699  
   700  				continue
   701  			case ErrTransportClosed:
   702  				sw.Logger.Error(
   703  					"Stopped accept routine, as transport is closed",
   704  					"numPeers", sw.peers.Size(),
   705  				)
   706  			default:
   707  				sw.Logger.Error(
   708  					"Accept on transport errored",
   709  					"err", err,
   710  					"numPeers", sw.peers.Size(),
   711  				)
   712  				// We could instead have a retry loop around the acceptRoutine,
   713  				// but that would need to stop and let the node shutdown eventually.
   714  				// So might as well panic and let process managers restart the node.
   715  				// There's no point in letting the node run without the acceptRoutine,
   716  				// since it won't be able to accept new connections.
   717  				panic(fmt.Errorf("accept routine exited: %v", err))
   718  			}
   719  
   720  			break
   721  		}
   722  
   723  		if !sw.IsPeerUnconditional(p.NodeInfo().ID()) {
   724  			// Ignore connection if we already have enough peers.
   725  			_, in, _ := sw.NumPeers()
   726  			if in >= sw.config.MaxNumInboundPeers {
   727  				sw.Logger.Info(
   728  					"Ignoring inbound connection: already have enough inbound peers",
   729  					"address", p.SocketAddr(),
   730  					"have", in,
   731  					"max", sw.config.MaxNumInboundPeers,
   732  				)
   733  
   734  				sw.transport.Cleanup(p)
   735  
   736  				continue
   737  			}
   738  
   739  		}
   740  
   741  		if err := sw.addPeer(p); err != nil {
   742  			sw.transport.Cleanup(p)
   743  			if p.IsRunning() {
   744  				_ = p.Stop()
   745  			}
   746  			sw.Logger.Info(
   747  				"Ignoring inbound connection: error while adding peer",
   748  				"err", err,
   749  				"id", p.ID(),
   750  			)
   751  		}
   752  	}
   753  }
   754  
   755  // dial the peer; make secret connection; authenticate against the dialed ID;
   756  // add the peer.
   757  // if dialing fails, start the reconnect loop. If handshake fails, it's over.
   758  // If peer is started successfully, reconnectLoop will start when
   759  // StopPeerForError is called.
   760  func (sw *Switch) addOutboundPeerWithConfig(
   761  	addr *NetAddress,
   762  	cfg *config.P2PConfig,
   763  ) error {
   764  	sw.Logger.Info("Dialing peer", "address", addr)
   765  
   766  	// XXX(xla): Remove the leakage of test concerns in implementation.
   767  	if cfg.TestDialFail {
   768  		go sw.reconnectToPeer(addr)
   769  		return fmt.Errorf("dial err (peerConfig.DialFail == true)")
   770  	}
   771  
   772  	p, err := sw.transport.Dial(*addr, peerConfig{
   773  		chDescs:       sw.chDescs,
   774  		onPeerError:   sw.StopPeerForError,
   775  		isPersistent:  sw.IsPeerPersistent,
   776  		reactorsByCh:  sw.reactorsByCh,
   777  		msgTypeByChID: sw.msgTypeByChID,
   778  		metrics:       sw.metrics,
   779  		mlc:           sw.mlc,
   780  	})
   781  	if err != nil {
   782  		if e, ok := err.(ErrRejected); ok {
   783  			if e.IsSelf() {
   784  				// Remove the given address from the address book and add to our addresses
   785  				// to avoid dialing in the future.
   786  				sw.addrBook.RemoveAddress(addr)
   787  				sw.addrBook.AddOurAddress(addr)
   788  
   789  				return err
   790  			}
   791  		}
   792  
   793  		// retry persistent peers after
   794  		// any dial error besides IsSelf()
   795  		if sw.IsPeerPersistent(addr) {
   796  			go sw.reconnectToPeer(addr)
   797  		}
   798  
   799  		return err
   800  	}
   801  
   802  	if err := sw.addPeer(p); err != nil {
   803  		sw.transport.Cleanup(p)
   804  		if p.IsRunning() {
   805  			_ = p.Stop()
   806  		}
   807  		return err
   808  	}
   809  
   810  	return nil
   811  }
   812  
   813  func (sw *Switch) filterPeer(p Peer) error {
   814  	// Avoid duplicate
   815  	if sw.peers.Has(p.ID()) {
   816  		return ErrRejected{id: p.ID(), isDuplicate: true}
   817  	}
   818  
   819  	errc := make(chan error, len(sw.peerFilters))
   820  
   821  	for _, f := range sw.peerFilters {
   822  		go func(f PeerFilterFunc, p Peer, errc chan<- error) {
   823  			errc <- f(sw.peers, p)
   824  		}(f, p, errc)
   825  	}
   826  
   827  	for i := 0; i < cap(errc); i++ {
   828  		select {
   829  		case err := <-errc:
   830  			if err != nil {
   831  				return ErrRejected{id: p.ID(), err: err, isFiltered: true}
   832  			}
   833  		case <-time.After(sw.filterTimeout):
   834  			return ErrFilterTimeout{}
   835  		}
   836  	}
   837  
   838  	return nil
   839  }
   840  
   841  // addPeer starts up the Peer and adds it to the Switch. Error is returned if
   842  // the peer is filtered out or failed to start or can't be added.
   843  func (sw *Switch) addPeer(p Peer) error {
   844  	if err := sw.filterPeer(p); err != nil {
   845  		return err
   846  	}
   847  
   848  	p.SetLogger(sw.Logger.With("peer", p.SocketAddr()))
   849  
   850  	// Handle the shut down case where the switch has stopped but we're
   851  	// concurrently trying to add a peer.
   852  	if !sw.IsRunning() {
   853  		// XXX should this return an error or just log and terminate?
   854  		sw.Logger.Error("Won't start a peer - switch is not running", "peer", p)
   855  		return nil
   856  	}
   857  
   858  	// Add some data to the peer, which is required by reactors.
   859  	for _, reactor := range sw.reactors {
   860  		p = reactor.InitPeer(p)
   861  	}
   862  
   863  	// Start the peer's send/recv routines.
   864  	// Must start it before adding it to the peer set
   865  	// to prevent Start and Stop from being called concurrently.
   866  	err := p.Start()
   867  	if err != nil {
   868  		// Should never happen
   869  		sw.Logger.Error("Error starting peer", "err", err, "peer", p)
   870  		return err
   871  	}
   872  
   873  	// Add the peer to PeerSet. Do this before starting the reactors
   874  	// so that if Receive errors, we will find the peer and remove it.
   875  	// Add should not err since we already checked peers.Has().
   876  	if err := sw.peers.Add(p); err != nil {
   877  		switch err.(type) {
   878  		case ErrPeerRemoval:
   879  			sw.Logger.Error("Error starting peer ",
   880  				" err ", "Peer has already errored and removal was attempted.",
   881  				"peer", p.ID())
   882  		}
   883  		return err
   884  	}
   885  	sw.metrics.Peers.Add(float64(1))
   886  
   887  	// Start all the reactor protocols on the peer.
   888  	for _, reactor := range sw.reactors {
   889  		reactor.AddPeer(p)
   890  	}
   891  
   892  	sw.Logger.Info("Added peer", "peer", p)
   893  
   894  	return nil
   895  }