github.com/Finschia/ostracon@v1.1.5/p2p/switch.go (about)

     1  package p2p
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/gogo/protobuf/proto"
    10  
    11  	"github.com/Finschia/ostracon/config"
    12  	"github.com/Finschia/ostracon/libs/cmap"
    13  	"github.com/Finschia/ostracon/libs/rand"
    14  	"github.com/Finschia/ostracon/libs/service"
    15  	"github.com/Finschia/ostracon/p2p/conn"
    16  )
    17  
    18  const (
    19  	// wait a random amount of time from this interval
    20  	// before dialing peers or reconnecting to help prevent DoS
    21  	dialRandomizerIntervalMilliseconds = 3000
    22  
    23  	// repeatedly try to reconnect for a few minutes
    24  	// ie. 5 * 20 = 100s
    25  	reconnectAttempts = 20
    26  	reconnectInterval = 5 * time.Second
    27  
    28  	// then move into exponential backoff mode for ~1day
    29  	// ie. 3**10 = 16hrs
    30  	reconnectBackOffAttempts    = 10
    31  	reconnectBackOffBaseSeconds = 3
    32  )
    33  
    34  // MConnConfig returns an MConnConfig with fields updated
    35  // from the P2PConfig.
    36  func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig {
    37  	mConfig := conn.DefaultMConnConfig()
    38  	mConfig.FlushThrottle = cfg.FlushThrottleTimeout
    39  	mConfig.SendRate = cfg.SendRate
    40  	mConfig.RecvRate = cfg.RecvRate
    41  	mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize
    42  	mConfig.RecvAsync = cfg.RecvAsync
    43  	return mConfig
    44  }
    45  
    46  //-----------------------------------------------------------------------------
    47  
    48  // An AddrBook represents an address book from the pex package, which is used
    49  // to store peer addresses.
    50  type AddrBook interface {
    51  	AddAddress(addr *NetAddress, src *NetAddress) error
    52  	AddPrivateIDs([]string)
    53  	AddOurAddress(*NetAddress)
    54  	OurAddress(*NetAddress) bool
    55  	MarkGood(ID)
    56  	RemoveAddress(*NetAddress)
    57  	HasAddress(*NetAddress) bool
    58  	Save()
    59  }
    60  
    61  // PeerFilterFunc to be implemented by filter hooks after a new Peer has been
    62  // fully setup.
    63  type PeerFilterFunc func(IPeerSet, Peer) error
    64  
    65  //-----------------------------------------------------------------------------
    66  
    67  // Switch handles peer connections and exposes an API to receive incoming messages
    68  // on `Reactors`.  Each `Reactor` is responsible for handling incoming messages of one
    69  // or more `Channels`.  So while sending outgoing messages is typically performed on the peer,
    70  // incoming messages are received on the reactor.
    71  type Switch struct {
    72  	service.BaseService
    73  
    74  	config        *config.P2PConfig
    75  	reactors      map[string]Reactor
    76  	chDescs       []*conn.ChannelDescriptor
    77  	reactorsByCh  map[byte]Reactor
    78  	msgTypeByChID map[byte]proto.Message
    79  	peers         *PeerSet
    80  	dialing       *cmap.CMap
    81  	reconnecting  *cmap.CMap
    82  	nodeInfo      NodeInfo // our node info
    83  	nodeKey       *NodeKey // our node privkey
    84  	addrBook      AddrBook
    85  	// peers addresses with whom we'll maintain constant connection
    86  	persistentPeersAddrs []*NetAddress
    87  	unconditionalPeerIDs map[ID]struct{}
    88  
    89  	transport Transport
    90  
    91  	filterTimeout time.Duration
    92  	peerFilters   []PeerFilterFunc
    93  
    94  	rng *rand.Rand // seed for randomizing dial times and orders
    95  
    96  	metrics *Metrics
    97  	mlc     *metricsLabelCache
    98  }
    99  
   100  // NetAddress returns the address the switch is listening on.
   101  func (sw *Switch) NetAddress() *NetAddress {
   102  	addr := sw.transport.NetAddress()
   103  	return &addr
   104  }
   105  
   106  // SwitchOption sets an optional parameter on the Switch.
   107  type SwitchOption func(*Switch)
   108  
   109  // NewSwitch creates a new Switch with the given config.
   110  func NewSwitch(
   111  	cfg *config.P2PConfig,
   112  	transport Transport,
   113  	options ...SwitchOption,
   114  ) *Switch {
   115  
   116  	sw := &Switch{
   117  		config:               cfg,
   118  		reactors:             make(map[string]Reactor),
   119  		chDescs:              make([]*conn.ChannelDescriptor, 0),
   120  		reactorsByCh:         make(map[byte]Reactor),
   121  		msgTypeByChID:        make(map[byte]proto.Message),
   122  		peers:                NewPeerSet(),
   123  		dialing:              cmap.NewCMap(),
   124  		reconnecting:         cmap.NewCMap(),
   125  		metrics:              NopMetrics(),
   126  		transport:            transport,
   127  		filterTimeout:        defaultFilterTimeout,
   128  		persistentPeersAddrs: make([]*NetAddress, 0),
   129  		unconditionalPeerIDs: make(map[ID]struct{}),
   130  		mlc:                  newMetricsLabelCache(),
   131  	}
   132  
   133  	// Ensure we have a completely undeterministic PRNG.
   134  	sw.rng = rand.NewRand()
   135  
   136  	sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw)
   137  
   138  	for _, option := range options {
   139  		option(sw)
   140  	}
   141  
   142  	return sw
   143  }
   144  
   145  // SwitchFilterTimeout sets the timeout used for peer filters.
   146  func SwitchFilterTimeout(timeout time.Duration) SwitchOption {
   147  	return func(sw *Switch) { sw.filterTimeout = timeout }
   148  }
   149  
   150  // SwitchPeerFilters sets the filters for rejection of new peers.
   151  func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption {
   152  	return func(sw *Switch) { sw.peerFilters = filters }
   153  }
   154  
   155  // WithMetrics sets the metrics.
   156  func WithMetrics(metrics *Metrics) SwitchOption {
   157  	return func(sw *Switch) { sw.metrics = metrics }
   158  }
   159  
   160  //---------------------------------------------------------------------
   161  // Switch setup
   162  
   163  // AddReactor adds the given reactor to the switch.
   164  // NOTE: Not goroutine safe.
   165  func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
   166  	for _, chDesc := range reactor.GetChannels() {
   167  		chID := chDesc.ID
   168  		// No two reactors can share the same channel.
   169  		if sw.reactorsByCh[chID] != nil {
   170  			panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
   171  		}
   172  		sw.chDescs = append(sw.chDescs, chDesc)
   173  		sw.reactorsByCh[chID] = reactor
   174  		sw.msgTypeByChID[chID] = chDesc.MessageType
   175  	}
   176  	sw.reactors[name] = reactor
   177  	reactor.SetSwitch(sw)
   178  	return reactor
   179  }
   180  
   181  // RemoveReactor removes the given Reactor from the Switch.
   182  // NOTE: Not goroutine safe.
   183  func (sw *Switch) RemoveReactor(name string, reactor Reactor) {
   184  	for _, chDesc := range reactor.GetChannels() {
   185  		// remove channel description
   186  		for i := 0; i < len(sw.chDescs); i++ {
   187  			if chDesc.ID == sw.chDescs[i].ID {
   188  				sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...)
   189  				break
   190  			}
   191  		}
   192  		delete(sw.reactorsByCh, chDesc.ID)
   193  		delete(sw.msgTypeByChID, chDesc.ID)
   194  	}
   195  	delete(sw.reactors, name)
   196  	reactor.SetSwitch(nil)
   197  }
   198  
   199  // Reactors returns a map of reactors registered on the switch.
   200  // NOTE: Not goroutine safe.
   201  func (sw *Switch) Reactors() map[string]Reactor {
   202  	return sw.reactors
   203  }
   204  
   205  // Reactor returns the reactor with the given name.
   206  // NOTE: Not goroutine safe.
   207  func (sw *Switch) Reactor(name string) Reactor {
   208  	return sw.reactors[name]
   209  }
   210  
   211  // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
   212  // NOTE: Not goroutine safe.
   213  func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) {
   214  	sw.nodeInfo = nodeInfo
   215  }
   216  
   217  // NodeInfo returns the switch's NodeInfo.
   218  // NOTE: Not goroutine safe.
   219  func (sw *Switch) NodeInfo() NodeInfo {
   220  	return sw.nodeInfo
   221  }
   222  
   223  // SetNodeKey sets the switch's private key for authenticated encryption.
   224  // NOTE: Not goroutine safe.
   225  func (sw *Switch) SetNodeKey(nodeKey *NodeKey) {
   226  	sw.nodeKey = nodeKey
   227  }
   228  
   229  //---------------------------------------------------------------------
   230  // Service start/stop
   231  
   232  // OnStart implements BaseService. It starts all the reactors and peers.
   233  func (sw *Switch) OnStart() error {
   234  	// Start reactors
   235  	for _, reactor := range sw.reactors {
   236  		err := reactor.Start()
   237  		if err != nil {
   238  			return fmt.Errorf("failed to start %v: %w", reactor, err)
   239  		}
   240  	}
   241  
   242  	// Start accepting Peers.
   243  	go sw.acceptRoutine()
   244  
   245  	return nil
   246  }
   247  
   248  // OnStop implements BaseService. It stops all peers and reactors.
   249  func (sw *Switch) OnStop() {
   250  	// Stop peers
   251  	for _, p := range sw.peers.List() {
   252  		sw.stopAndRemovePeer(p, nil)
   253  	}
   254  
   255  	// Stop reactors
   256  	sw.Logger.Debug("Switch: Stopping reactors")
   257  	for _, reactor := range sw.reactors {
   258  		if err := reactor.Stop(); err != nil {
   259  			sw.Logger.Error("error while stopped reactor", "reactor", reactor, "error", err)
   260  		}
   261  	}
   262  }
   263  
   264  //---------------------------------------------------------------------
   265  // Peers
   266  
   267  // BroadcastEnvelope runs a go routine for each attempted send, which will block trying
   268  // to send for defaultSendTimeoutSeconds. Returns a channel which receives
   269  // success values for each attempted send (false if times out). Channel will be
   270  // closed once msg bytes are sent to all peers (or time out).
   271  // BroadcastEnvelope sends to the peers using the SendEnvelope method.
   272  //
   273  // NOTE: BroadcastEnvelope uses goroutines, so order of broadcast may not be preserved.
   274  func (sw *Switch) BroadcastEnvelope(e Envelope) chan bool {
   275  	sw.Logger.Debug("Broadcast", "channel", e.ChannelID)
   276  
   277  	peers := sw.peers.List()
   278  	var wg sync.WaitGroup
   279  	wg.Add(len(peers))
   280  	successChan := make(chan bool, len(peers))
   281  
   282  	for _, peer := range peers {
   283  		go func(p Peer) {
   284  			defer wg.Done()
   285  			success := SendEnvelopeShim(p, e, sw.Logger)
   286  			successChan <- success
   287  		}(peer)
   288  	}
   289  
   290  	go func() {
   291  		wg.Wait()
   292  		close(successChan)
   293  	}()
   294  
   295  	return successChan
   296  }
   297  
   298  // Broadcast runs a go routine for each attempted send, which will block trying
   299  // to send for defaultSendTimeoutSeconds. Returns a channel which receives
   300  // success values for each attempted send (false if times out). Channel will be
   301  // closed once msg bytes are sent to all peers (or time out).
   302  // Broadcast sends to the peers using the Send method.
   303  //
   304  // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
   305  //
   306  // Deprecated: code looking to broadcast data to all peers should use BroadcastEnvelope.
   307  // Broadcast will be removed in 0.37.
   308  func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool {
   309  	sw.Logger.Debug("Broadcast", "channel", chID)
   310  
   311  	peers := sw.peers.List()
   312  	var wg sync.WaitGroup
   313  	wg.Add(len(peers))
   314  	successChan := make(chan bool, len(peers))
   315  
   316  	for _, peer := range peers {
   317  		go func(p Peer) {
   318  			defer wg.Done()
   319  			success := p.Send(chID, msgBytes)
   320  			successChan <- success
   321  		}(peer)
   322  	}
   323  
   324  	go func() {
   325  		wg.Wait()
   326  		close(successChan)
   327  	}()
   328  
   329  	return successChan
   330  }
   331  
   332  // NumPeers returns the count of outbound/inbound and outbound-dialing peers.
   333  // unconditional peers are not counted here.
   334  func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
   335  	peers := sw.peers.List()
   336  	for _, peer := range peers {
   337  		if peer.IsOutbound() {
   338  			if !sw.IsPeerUnconditional(peer.ID()) {
   339  				outbound++
   340  			}
   341  		} else {
   342  			if !sw.IsPeerUnconditional(peer.ID()) {
   343  				inbound++
   344  			}
   345  		}
   346  	}
   347  	dialing = sw.dialing.Size()
   348  	return
   349  }
   350  
   351  func (sw *Switch) IsPeerUnconditional(id ID) bool {
   352  	_, ok := sw.unconditionalPeerIDs[id]
   353  	return ok
   354  }
   355  
   356  // MaxNumOutboundPeers returns a maximum number of outbound peers.
   357  func (sw *Switch) MaxNumOutboundPeers() int {
   358  	return sw.config.MaxNumOutboundPeers
   359  }
   360  
   361  // Peers returns the set of peers that are connected to the switch.
   362  func (sw *Switch) Peers() IPeerSet {
   363  	return sw.peers
   364  }
   365  
   366  // StopPeerForError disconnects from a peer due to external error.
   367  // If the peer is persistent, it will attempt to reconnect.
   368  // TODO: make record depending on reason.
   369  func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
   370  	if !peer.IsRunning() {
   371  		return
   372  	}
   373  
   374  	sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
   375  	sw.stopAndRemovePeer(peer, reason)
   376  
   377  	if peer.IsPersistent() {
   378  		var addr *NetAddress
   379  		if peer.IsOutbound() { // socket address for outbound peers
   380  			addr = peer.SocketAddr()
   381  		} else { // self-reported address for inbound peers
   382  			var err error
   383  			addr, err = peer.NodeInfo().NetAddress()
   384  			if err != nil {
   385  				sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong",
   386  					"peer", peer, "err", err)
   387  				return
   388  			}
   389  		}
   390  		go sw.reconnectToPeer(addr)
   391  	}
   392  }
   393  
   394  // StopPeerGracefully disconnects from a peer gracefully.
   395  // TODO: handle graceful disconnects.
   396  func (sw *Switch) StopPeerGracefully(peer Peer) {
   397  	sw.Logger.Info("Stopping peer gracefully")
   398  	sw.stopAndRemovePeer(peer, nil)
   399  }
   400  
   401  func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
   402  	sw.transport.Cleanup(peer)
   403  	if err := peer.Stop(); err != nil {
   404  		sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly
   405  	}
   406  
   407  	for _, reactor := range sw.reactors {
   408  		reactor.RemovePeer(peer, reason)
   409  	}
   410  
   411  	// Removing a peer should go last to avoid a situation where a peer
   412  	// reconnect to our node and the switch calls InitPeer before
   413  	// RemovePeer is finished.
   414  	// https://github.com/tendermint/tendermint/issues/3338
   415  	if sw.peers.Remove(peer) {
   416  		sw.metrics.Peers.Add(float64(-1))
   417  	} else {
   418  		// Removal of the peer has failed. The function above sets a flag within the peer to mark this.
   419  		// We keep this message here as information to the developer.
   420  		sw.Logger.Debug("error on peer removal", ",", "peer", peer.ID())
   421  	}
   422  }
   423  
   424  // reconnectToPeer tries to reconnect to the addr, first repeatedly
   425  // with a fixed interval, then with exponential backoff.
   426  // If no success after all that, it stops trying, and leaves it
   427  // to the PEX/Addrbook to find the peer with the addr again
   428  // NOTE: this will keep trying even if the handshake or auth fails.
   429  // TODO: be more explicit with error types so we only retry on certain failures
   430  //   - ie. if we're getting ErrDuplicatePeer we can stop
   431  //     because the addrbook got us the peer back already
   432  func (sw *Switch) reconnectToPeer(addr *NetAddress) {
   433  	if sw.reconnecting.Has(string(addr.ID)) {
   434  		return
   435  	}
   436  	sw.reconnecting.Set(string(addr.ID), addr)
   437  	defer sw.reconnecting.Delete(string(addr.ID))
   438  
   439  	start := time.Now()
   440  	sw.Logger.Info("Reconnecting to peer", "addr", addr)
   441  	for i := 0; i < reconnectAttempts; i++ {
   442  		if !sw.IsRunning() {
   443  			return
   444  		}
   445  
   446  		err := sw.DialPeerWithAddress(addr)
   447  		if err == nil {
   448  			return // success
   449  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   450  			return
   451  		}
   452  
   453  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   454  		// sleep a set amount
   455  		sw.randomSleep(reconnectInterval)
   456  		continue
   457  	}
   458  
   459  	sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
   460  		"addr", addr, "elapsed", time.Since(start))
   461  	for i := 0; i < reconnectBackOffAttempts; i++ {
   462  		if !sw.IsRunning() {
   463  			return
   464  		}
   465  
   466  		// sleep an exponentially increasing amount
   467  		sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
   468  		sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
   469  
   470  		err := sw.DialPeerWithAddress(addr)
   471  		if err == nil {
   472  			return // success
   473  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   474  			return
   475  		}
   476  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   477  	}
   478  	sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start))
   479  }
   480  
   481  // SetAddrBook allows to set address book on Switch.
   482  func (sw *Switch) SetAddrBook(addrBook AddrBook) {
   483  	sw.addrBook = addrBook
   484  }
   485  
   486  // MarkPeerAsGood marks the given peer as good when it did something useful
   487  // like contributed to consensus.
   488  func (sw *Switch) MarkPeerAsGood(peer Peer) {
   489  	if sw.addrBook != nil {
   490  		sw.addrBook.MarkGood(peer.ID())
   491  	}
   492  }
   493  
   494  //---------------------------------------------------------------------
   495  // Dialing
   496  
   497  type privateAddr interface {
   498  	PrivateAddr() bool
   499  }
   500  
   501  func isPrivateAddr(err error) bool {
   502  	te, ok := err.(privateAddr)
   503  	return ok && te.PrivateAddr()
   504  }
   505  
   506  // DialPeersAsync dials a list of peers asynchronously in random order.
   507  // Used to dial peers from config on startup or from unsafe-RPC (trusted sources).
   508  // It ignores ErrNetAddressLookup. However, if there are other errors, first
   509  // encounter is returned.
   510  // Nop if there are no peers.
   511  func (sw *Switch) DialPeersAsync(peers []string) error {
   512  	netAddrs, errs := NewNetAddressStrings(peers)
   513  	// report all the errors
   514  	for _, err := range errs {
   515  		sw.Logger.Error("Error in peer's address", "err", err)
   516  	}
   517  	// return first non-ErrNetAddressLookup error
   518  	for _, err := range errs {
   519  		if _, ok := err.(ErrNetAddressLookup); ok {
   520  			continue
   521  		}
   522  		return err
   523  	}
   524  	sw.dialPeersAsync(netAddrs)
   525  	return nil
   526  }
   527  
   528  func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) {
   529  	ourAddr := sw.NetAddress()
   530  
   531  	// TODO: this code feels like it's in the wrong place.
   532  	// The integration tests depend on the addrBook being saved
   533  	// right away but maybe we can change that. Recall that
   534  	// the addrBook is only written to disk every 2min
   535  	if sw.addrBook != nil {
   536  		// add peers to `addrBook`
   537  		for _, netAddr := range netAddrs {
   538  			// do not add our address or ID
   539  			if !netAddr.Same(ourAddr) {
   540  				if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil {
   541  					if isPrivateAddr(err) {
   542  						sw.Logger.Debug("Won't add peer's address to addrbook", "err", err)
   543  					} else {
   544  						sw.Logger.Error("Can't add peer's address to addrbook", "err", err)
   545  					}
   546  				}
   547  			}
   548  		}
   549  		// Persist some peers to disk right away.
   550  		// NOTE: integration tests depend on this
   551  		sw.addrBook.Save()
   552  	}
   553  
   554  	// permute the list, dial them in random order.
   555  	perm := sw.rng.Perm(len(netAddrs))
   556  	for i := 0; i < len(perm); i++ {
   557  		go func(i int) {
   558  			j := perm[i]
   559  			addr := netAddrs[j]
   560  
   561  			if addr.Same(ourAddr) {
   562  				sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr)
   563  				return
   564  			}
   565  
   566  			sw.randomSleep(0)
   567  
   568  			err := sw.DialPeerWithAddress(addr)
   569  			if err != nil {
   570  				switch err.(type) {
   571  				case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress:
   572  					sw.Logger.Debug("Error dialing peer", "err", err)
   573  				default:
   574  					sw.Logger.Error("Error dialing peer", "err", err)
   575  				}
   576  			}
   577  		}(i)
   578  	}
   579  }
   580  
   581  // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects
   582  // and authenticates successfully.
   583  // If we're currently dialing this address or it belongs to an existing peer,
   584  // ErrCurrentlyDialingOrExistingAddress is returned.
   585  func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error {
   586  	if sw.IsDialingOrExistingAddress(addr) {
   587  		return ErrCurrentlyDialingOrExistingAddress{addr.String()}
   588  	}
   589  
   590  	sw.dialing.Set(string(addr.ID), addr)
   591  	defer sw.dialing.Delete(string(addr.ID))
   592  
   593  	return sw.addOutboundPeerWithConfig(addr, sw.config)
   594  }
   595  
   596  // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
   597  func (sw *Switch) randomSleep(interval time.Duration) {
   598  	r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond
   599  	time.Sleep(r + interval)
   600  }
   601  
   602  // IsDialingOrExistingAddress returns true if switch has a peer with the given
   603  // address or dialing it at the moment.
   604  func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool {
   605  	return sw.dialing.Has(string(addr.ID)) ||
   606  		sw.peers.Has(addr.ID) ||
   607  		(!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP))
   608  }
   609  
   610  // AddPersistentPeers allows you to set persistent peers. It ignores
   611  // ErrNetAddressLookup. However, if there are other errors, first encounter is
   612  // returned.
   613  func (sw *Switch) AddPersistentPeers(addrs []string) error {
   614  	sw.Logger.Info("Adding persistent peers", "addrs", addrs)
   615  	netAddrs, errs := NewNetAddressStrings(addrs)
   616  	// report all the errors
   617  	for _, err := range errs {
   618  		sw.Logger.Error("Error in peer's address", "err", err)
   619  	}
   620  	// return first non-ErrNetAddressLookup error
   621  	for _, err := range errs {
   622  		if _, ok := err.(ErrNetAddressLookup); ok {
   623  			continue
   624  		}
   625  		return err
   626  	}
   627  	sw.persistentPeersAddrs = netAddrs
   628  	return nil
   629  }
   630  
   631  func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error {
   632  	sw.Logger.Info("Adding unconditional peer ids", "ids", ids)
   633  	for i, id := range ids {
   634  		err := validateID(ID(id))
   635  		if err != nil {
   636  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   637  		}
   638  		sw.unconditionalPeerIDs[ID(id)] = struct{}{}
   639  	}
   640  	return nil
   641  }
   642  
   643  func (sw *Switch) AddPrivatePeerIDs(ids []string) error {
   644  	validIDs := make([]string, 0, len(ids))
   645  	for i, id := range ids {
   646  		err := validateID(ID(id))
   647  		if err != nil {
   648  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   649  		}
   650  		validIDs = append(validIDs, id)
   651  	}
   652  
   653  	sw.addrBook.AddPrivateIDs(validIDs)
   654  
   655  	return nil
   656  }
   657  
   658  func (sw *Switch) IsPeerPersistent(na *NetAddress) bool {
   659  	for _, pa := range sw.persistentPeersAddrs {
   660  		if pa.Equals(na) {
   661  			return true
   662  		}
   663  	}
   664  	return false
   665  }
   666  
   667  func (sw *Switch) acceptRoutine() {
   668  	for {
   669  		p, err := sw.transport.Accept(peerConfig{
   670  			chDescs:       sw.chDescs,
   671  			onPeerError:   sw.StopPeerForError,
   672  			reactorsByCh:  sw.reactorsByCh,
   673  			msgTypeByChID: sw.msgTypeByChID,
   674  			metrics:       sw.metrics,
   675  			mlc:           sw.mlc,
   676  			isPersistent:  sw.IsPeerPersistent,
   677  		})
   678  		if err != nil {
   679  			switch err := err.(type) {
   680  			case ErrRejected:
   681  				if err.IsSelf() {
   682  					// Remove the given address from the address book and add to our addresses
   683  					// to avoid dialing in the future.
   684  					addr := err.Addr()
   685  					sw.addrBook.RemoveAddress(&addr)
   686  					sw.addrBook.AddOurAddress(&addr)
   687  				}
   688  
   689  				sw.Logger.Info(
   690  					"Inbound Peer rejected",
   691  					"err", err,
   692  					"numPeers", sw.peers.Size(),
   693  				)
   694  
   695  				continue
   696  			case ErrFilterTimeout:
   697  				sw.Logger.Error(
   698  					"Peer filter timed out",
   699  					"err", err,
   700  				)
   701  
   702  				continue
   703  			case ErrTransportClosed:
   704  				sw.Logger.Error(
   705  					"Stopped accept routine, as transport is closed",
   706  					"numPeers", sw.peers.Size(),
   707  				)
   708  			default:
   709  				sw.Logger.Error(
   710  					"Accept on transport errored",
   711  					"err", err,
   712  					"numPeers", sw.peers.Size(),
   713  				)
   714  				// We could instead have a retry loop around the acceptRoutine,
   715  				// but that would need to stop and let the node shutdown eventually.
   716  				// So might as well panic and let process managers restart the node.
   717  				// There's no point in letting the node run without the acceptRoutine,
   718  				// since it won't be able to accept new connections.
   719  				panic(fmt.Errorf("accept routine exited: %v", err))
   720  			}
   721  
   722  			break
   723  		}
   724  
   725  		if !sw.IsPeerUnconditional(p.NodeInfo().ID()) {
   726  			// Ignore connection if we already have enough peers.
   727  			_, in, _ := sw.NumPeers()
   728  			if in >= sw.config.MaxNumInboundPeers {
   729  				sw.Logger.Info(
   730  					"Ignoring inbound connection: already have enough inbound peers",
   731  					"address", p.SocketAddr(),
   732  					"have", in,
   733  					"max", sw.config.MaxNumInboundPeers,
   734  				)
   735  
   736  				sw.transport.Cleanup(p)
   737  
   738  				continue
   739  			}
   740  
   741  		}
   742  
   743  		if err := sw.addPeer(p); err != nil {
   744  			sw.transport.Cleanup(p)
   745  			if p.IsRunning() {
   746  				_ = p.Stop()
   747  			}
   748  			sw.Logger.Info(
   749  				"Ignoring inbound connection: error while adding peer",
   750  				"err", err,
   751  				"id", p.ID(),
   752  			)
   753  		}
   754  	}
   755  }
   756  
   757  // dial the peer; make secret connection; authenticate against the dialed ID;
   758  // add the peer.
   759  // if dialing fails, start the reconnect loop. If handshake fails, it's over.
   760  // If peer is started successfully, reconnectLoop will start when
   761  // StopPeerForError is called.
   762  func (sw *Switch) addOutboundPeerWithConfig(
   763  	addr *NetAddress,
   764  	cfg *config.P2PConfig,
   765  ) error {
   766  	sw.Logger.Info("Dialing peer", "address", addr)
   767  
   768  	// XXX(xla): Remove the leakage of test concerns in implementation.
   769  	if cfg.TestDialFail {
   770  		go sw.reconnectToPeer(addr)
   771  		return fmt.Errorf("dial err (peerConfig.DialFail == true)")
   772  	}
   773  
   774  	p, err := sw.transport.Dial(*addr, peerConfig{
   775  		chDescs:       sw.chDescs,
   776  		onPeerError:   sw.StopPeerForError,
   777  		isPersistent:  sw.IsPeerPersistent,
   778  		reactorsByCh:  sw.reactorsByCh,
   779  		msgTypeByChID: sw.msgTypeByChID,
   780  		metrics:       sw.metrics,
   781  		mlc:           sw.mlc,
   782  	})
   783  	if err != nil {
   784  		if e, ok := err.(ErrRejected); ok {
   785  			if e.IsSelf() {
   786  				// Remove the given address from the address book and add to our addresses
   787  				// to avoid dialing in the future.
   788  				sw.addrBook.RemoveAddress(addr)
   789  				sw.addrBook.AddOurAddress(addr)
   790  
   791  				return err
   792  			}
   793  		}
   794  
   795  		// retry persistent peers after
   796  		// any dial error besides IsSelf()
   797  		if sw.IsPeerPersistent(addr) {
   798  			go sw.reconnectToPeer(addr)
   799  		}
   800  
   801  		return err
   802  	}
   803  
   804  	if err := sw.addPeer(p); err != nil {
   805  		sw.transport.Cleanup(p)
   806  		if p.IsRunning() {
   807  			_ = p.Stop()
   808  		}
   809  		return err
   810  	}
   811  
   812  	return nil
   813  }
   814  
   815  func (sw *Switch) filterPeer(p Peer) error {
   816  	// Avoid duplicate
   817  	if sw.peers.Has(p.ID()) {
   818  		return ErrRejected{id: p.ID(), isDuplicate: true}
   819  	}
   820  
   821  	errc := make(chan error, len(sw.peerFilters))
   822  
   823  	for _, f := range sw.peerFilters {
   824  		go func(f PeerFilterFunc, p Peer, errc chan<- error) {
   825  			errc <- f(sw.peers, p)
   826  		}(f, p, errc)
   827  	}
   828  
   829  	for i := 0; i < cap(errc); i++ {
   830  		select {
   831  		case err := <-errc:
   832  			if err != nil {
   833  				return ErrRejected{id: p.ID(), err: err, isFiltered: true}
   834  			}
   835  		case <-time.After(sw.filterTimeout):
   836  			return ErrFilterTimeout{}
   837  		}
   838  	}
   839  
   840  	return nil
   841  }
   842  
   843  // addPeer starts up the Peer and adds it to the Switch. Error is returned if
   844  // the peer is filtered out or failed to start or can't be added.
   845  func (sw *Switch) addPeer(p Peer) error {
   846  	if err := sw.filterPeer(p); err != nil {
   847  		return err
   848  	}
   849  
   850  	p.SetLogger(sw.Logger.With("peer", p.SocketAddr()))
   851  
   852  	// Handle the shut down case where the switch has stopped but we're
   853  	// concurrently trying to add a peer.
   854  	if !sw.IsRunning() {
   855  		// XXX should this return an error or just log and terminate?
   856  		sw.Logger.Error("Won't start a peer - switch is not running", "peer", p)
   857  		return nil
   858  	}
   859  
   860  	// Add some data to the peer, which is required by reactors.
   861  	for _, reactor := range sw.reactors {
   862  		p = reactor.InitPeer(p)
   863  	}
   864  
   865  	// Start the peer's send/recv routines.
   866  	// Must start it before adding it to the peer set
   867  	// to prevent Start and Stop from being called concurrently.
   868  	err := p.Start()
   869  	if err != nil {
   870  		// Should never happen
   871  		sw.Logger.Error("Error starting peer", "err", err, "peer", p)
   872  		return err
   873  	}
   874  
   875  	// Add the peer to PeerSet. Do this before starting the reactors
   876  	// so that if Receive errors, we will find the peer and remove it.
   877  	// Add should not err since we already checked peers.Has().
   878  	if err := sw.peers.Add(p); err != nil {
   879  		switch err.(type) {
   880  		case ErrPeerRemoval:
   881  			sw.Logger.Error("Error starting peer ",
   882  				" err ", "Peer has already errored and removal was attempted.",
   883  				"peer", p.ID())
   884  		}
   885  		return err
   886  	}
   887  	sw.metrics.Peers.Add(float64(1))
   888  
   889  	// Start all the reactor protocols on the peer.
   890  	for _, reactor := range sw.reactors {
   891  		reactor.AddPeer(p)
   892  	}
   893  
   894  	sw.Logger.Info("Added peer", "peer", p)
   895  
   896  	return nil
   897  }