github.com/line/ostracon@v1.0.10-0.20230328032236-7f20145f065d/p2p/switch.go (about)

     1  package p2p
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/line/ostracon/config"
    10  	"github.com/line/ostracon/libs/cmap"
    11  	"github.com/line/ostracon/libs/rand"
    12  	"github.com/line/ostracon/libs/service"
    13  	"github.com/line/ostracon/p2p/conn"
    14  )
    15  
    16  const (
    17  	// wait a random amount of time from this interval
    18  	// before dialing peers or reconnecting to help prevent DoS
    19  	dialRandomizerIntervalMilliseconds = 3000
    20  
    21  	// repeatedly try to reconnect for a few minutes
    22  	// ie. 5 * 20 = 100s
    23  	reconnectAttempts = 20
    24  	reconnectInterval = 5 * time.Second
    25  
    26  	// then move into exponential backoff mode for ~1day
    27  	// ie. 3**10 = 16hrs
    28  	reconnectBackOffAttempts    = 10
    29  	reconnectBackOffBaseSeconds = 3
    30  )
    31  
    32  // MConnConfig returns an MConnConfig with fields updated
    33  // from the P2PConfig.
    34  func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig {
    35  	mConfig := conn.DefaultMConnConfig()
    36  	mConfig.FlushThrottle = cfg.FlushThrottleTimeout
    37  	mConfig.SendRate = cfg.SendRate
    38  	mConfig.RecvRate = cfg.RecvRate
    39  	mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize
    40  	mConfig.RecvAsync = cfg.RecvAsync
    41  	return mConfig
    42  }
    43  
    44  //-----------------------------------------------------------------------------
    45  
    46  // An AddrBook represents an address book from the pex package, which is used
    47  // to store peer addresses.
    48  type AddrBook interface {
    49  	AddAddress(addr *NetAddress, src *NetAddress) error
    50  	AddPrivateIDs([]string)
    51  	AddOurAddress(*NetAddress)
    52  	OurAddress(*NetAddress) bool
    53  	MarkGood(ID)
    54  	RemoveAddress(*NetAddress)
    55  	HasAddress(*NetAddress) bool
    56  	Save()
    57  }
    58  
    59  // PeerFilterFunc to be implemented by filter hooks after a new Peer has been
    60  // fully setup.
    61  type PeerFilterFunc func(IPeerSet, Peer) error
    62  
    63  //-----------------------------------------------------------------------------
    64  
    65  // Switch handles peer connections and exposes an API to receive incoming messages
    66  // on `Reactors`.  Each `Reactor` is responsible for handling incoming messages of one
    67  // or more `Channels`.  So while sending outgoing messages is typically performed on the peer,
    68  // incoming messages are received on the reactor.
    69  type Switch struct {
    70  	service.BaseService
    71  
    72  	config       *config.P2PConfig
    73  	reactors     map[string]Reactor
    74  	chDescs      []*conn.ChannelDescriptor
    75  	reactorsByCh map[byte]Reactor
    76  	peers        *PeerSet
    77  	dialing      *cmap.CMap
    78  	reconnecting *cmap.CMap
    79  	nodeInfo     NodeInfo // our node info
    80  	nodeKey      *NodeKey // our node privkey
    81  	addrBook     AddrBook
    82  	// peers addresses with whom we'll maintain constant connection
    83  	persistentPeersAddrs []*NetAddress
    84  	unconditionalPeerIDs map[ID]struct{}
    85  
    86  	transport Transport
    87  
    88  	filterTimeout time.Duration
    89  	peerFilters   []PeerFilterFunc
    90  
    91  	rng *rand.Rand // seed for randomizing dial times and orders
    92  
    93  	metrics *Metrics
    94  }
    95  
    96  // NetAddress returns the address the switch is listening on.
    97  func (sw *Switch) NetAddress() *NetAddress {
    98  	addr := sw.transport.NetAddress()
    99  	return &addr
   100  }
   101  
   102  // SwitchOption sets an optional parameter on the Switch.
   103  type SwitchOption func(*Switch)
   104  
   105  // NewSwitch creates a new Switch with the given config.
   106  func NewSwitch(
   107  	cfg *config.P2PConfig,
   108  	transport Transport,
   109  	options ...SwitchOption,
   110  ) *Switch {
   111  	sw := &Switch{
   112  		config:               cfg,
   113  		reactors:             make(map[string]Reactor),
   114  		chDescs:              make([]*conn.ChannelDescriptor, 0),
   115  		reactorsByCh:         make(map[byte]Reactor),
   116  		peers:                NewPeerSet(),
   117  		dialing:              cmap.NewCMap(),
   118  		reconnecting:         cmap.NewCMap(),
   119  		metrics:              NopMetrics(),
   120  		transport:            transport,
   121  		filterTimeout:        defaultFilterTimeout,
   122  		persistentPeersAddrs: make([]*NetAddress, 0),
   123  		unconditionalPeerIDs: make(map[ID]struct{}),
   124  	}
   125  
   126  	// Ensure we have a completely undeterministic PRNG.
   127  	sw.rng = rand.NewRand()
   128  
   129  	sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw)
   130  
   131  	for _, option := range options {
   132  		option(sw)
   133  	}
   134  
   135  	return sw
   136  }
   137  
   138  // SwitchFilterTimeout sets the timeout used for peer filters.
   139  func SwitchFilterTimeout(timeout time.Duration) SwitchOption {
   140  	return func(sw *Switch) { sw.filterTimeout = timeout }
   141  }
   142  
   143  // SwitchPeerFilters sets the filters for rejection of new peers.
   144  func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption {
   145  	return func(sw *Switch) { sw.peerFilters = filters }
   146  }
   147  
   148  // WithMetrics sets the metrics.
   149  func WithMetrics(metrics *Metrics) SwitchOption {
   150  	return func(sw *Switch) { sw.metrics = metrics }
   151  }
   152  
   153  //---------------------------------------------------------------------
   154  // Switch setup
   155  
   156  // AddReactor adds the given reactor to the switch.
   157  // NOTE: Not goroutine safe.
   158  func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
   159  	for _, chDesc := range reactor.GetChannels() {
   160  		chID := chDesc.ID
   161  		// No two reactors can share the same channel.
   162  		if sw.reactorsByCh[chID] != nil {
   163  			panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
   164  		}
   165  		sw.chDescs = append(sw.chDescs, chDesc)
   166  		sw.reactorsByCh[chID] = reactor
   167  	}
   168  	sw.reactors[name] = reactor
   169  	reactor.SetSwitch(sw)
   170  	return reactor
   171  }
   172  
   173  // RemoveReactor removes the given Reactor from the Switch.
   174  // NOTE: Not goroutine safe.
   175  func (sw *Switch) RemoveReactor(name string, reactor Reactor) {
   176  	for _, chDesc := range reactor.GetChannels() {
   177  		// remove channel description
   178  		for i := 0; i < len(sw.chDescs); i++ {
   179  			if chDesc.ID == sw.chDescs[i].ID {
   180  				sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...)
   181  				break
   182  			}
   183  		}
   184  		delete(sw.reactorsByCh, chDesc.ID)
   185  	}
   186  	delete(sw.reactors, name)
   187  	reactor.SetSwitch(nil)
   188  }
   189  
   190  // Reactors returns a map of reactors registered on the switch.
   191  // NOTE: Not goroutine safe.
   192  func (sw *Switch) Reactors() map[string]Reactor {
   193  	return sw.reactors
   194  }
   195  
   196  // Reactor returns the reactor with the given name.
   197  // NOTE: Not goroutine safe.
   198  func (sw *Switch) Reactor(name string) Reactor {
   199  	return sw.reactors[name]
   200  }
   201  
   202  // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
   203  // NOTE: Not goroutine safe.
   204  func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) {
   205  	sw.nodeInfo = nodeInfo
   206  }
   207  
   208  // NodeInfo returns the switch's NodeInfo.
   209  // NOTE: Not goroutine safe.
   210  func (sw *Switch) NodeInfo() NodeInfo {
   211  	return sw.nodeInfo
   212  }
   213  
   214  // SetNodeKey sets the switch's private key for authenticated encryption.
   215  // NOTE: Not goroutine safe.
   216  func (sw *Switch) SetNodeKey(nodeKey *NodeKey) {
   217  	sw.nodeKey = nodeKey
   218  }
   219  
   220  //---------------------------------------------------------------------
   221  // Service start/stop
   222  
   223  // OnStart implements BaseService. It starts all the reactors and peers.
   224  func (sw *Switch) OnStart() error {
   225  	// Start reactors
   226  	for _, reactor := range sw.reactors {
   227  		err := reactor.Start()
   228  		if err != nil {
   229  			return fmt.Errorf("failed to start %v: %w", reactor, err)
   230  		}
   231  	}
   232  
   233  	// Start accepting Peers.
   234  	go sw.acceptRoutine()
   235  
   236  	return nil
   237  }
   238  
   239  // OnStop implements BaseService. It stops all peers and reactors.
   240  func (sw *Switch) OnStop() {
   241  	// Stop peers
   242  	for _, p := range sw.peers.List() {
   243  		sw.stopAndRemovePeer(p, nil)
   244  	}
   245  
   246  	// Stop reactors
   247  	sw.Logger.Debug("Switch: Stopping reactors")
   248  	for _, reactor := range sw.reactors {
   249  		if err := reactor.Stop(); err != nil {
   250  			sw.Logger.Error("error while stopped reactor", "reactor", reactor, "error", err)
   251  		}
   252  	}
   253  }
   254  
   255  //---------------------------------------------------------------------
   256  // Peers
   257  
   258  // Broadcast runs a go routine for each attempted send, which will block trying
   259  // to send for defaultSendTimeoutSeconds. Returns a channel which receives
   260  // success values for each attempted send (false if times out). Channel will be
   261  // closed once msg bytes are sent to all peers (or time out).
   262  //
   263  // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
   264  func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool {
   265  	sw.Logger.Debug("Broadcast", "channel", chID, "msgBytes", fmt.Sprintf("%X", msgBytes))
   266  
   267  	peers := sw.peers.List()
   268  	var wg sync.WaitGroup
   269  	wg.Add(len(peers))
   270  	successChan := make(chan bool, len(peers))
   271  
   272  	for _, peer := range peers {
   273  		go func(p Peer) {
   274  			defer wg.Done()
   275  			success := p.Send(chID, msgBytes)
   276  			successChan <- success
   277  		}(peer)
   278  	}
   279  
   280  	go func() {
   281  		wg.Wait()
   282  		close(successChan)
   283  	}()
   284  
   285  	return successChan
   286  }
   287  
   288  // NumPeers returns the count of outbound/inbound and outbound-dialing peers.
   289  // unconditional peers are not counted here.
   290  func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
   291  	peers := sw.peers.List()
   292  	for _, peer := range peers {
   293  		if peer.IsOutbound() {
   294  			if !sw.IsPeerUnconditional(peer.ID()) {
   295  				outbound++
   296  			}
   297  		} else {
   298  			if !sw.IsPeerUnconditional(peer.ID()) {
   299  				inbound++
   300  			}
   301  		}
   302  	}
   303  	dialing = sw.dialing.Size()
   304  	return
   305  }
   306  
   307  func (sw *Switch) IsPeerUnconditional(id ID) bool {
   308  	_, ok := sw.unconditionalPeerIDs[id]
   309  	return ok
   310  }
   311  
   312  // MaxNumOutboundPeers returns a maximum number of outbound peers.
   313  func (sw *Switch) MaxNumOutboundPeers() int {
   314  	return sw.config.MaxNumOutboundPeers
   315  }
   316  
   317  // Peers returns the set of peers that are connected to the switch.
   318  func (sw *Switch) Peers() IPeerSet {
   319  	return sw.peers
   320  }
   321  
   322  // StopPeerForError disconnects from a peer due to external error.
   323  // If the peer is persistent, it will attempt to reconnect.
   324  // TODO: make record depending on reason.
   325  func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
   326  	if !peer.IsRunning() {
   327  		return
   328  	}
   329  
   330  	sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
   331  	sw.stopAndRemovePeer(peer, reason)
   332  
   333  	if peer.IsPersistent() {
   334  		var addr *NetAddress
   335  		if peer.IsOutbound() { // socket address for outbound peers
   336  			addr = peer.SocketAddr()
   337  		} else { // self-reported address for inbound peers
   338  			var err error
   339  			addr, err = peer.NodeInfo().NetAddress()
   340  			if err != nil {
   341  				sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong",
   342  					"peer", peer, "err", err)
   343  				return
   344  			}
   345  		}
   346  		go sw.reconnectToPeer(addr)
   347  	}
   348  }
   349  
   350  // StopPeerGracefully disconnects from a peer gracefully.
   351  // TODO: handle graceful disconnects.
   352  func (sw *Switch) StopPeerGracefully(peer Peer) {
   353  	sw.Logger.Info("Stopping peer gracefully")
   354  	sw.stopAndRemovePeer(peer, nil)
   355  }
   356  
   357  func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
   358  	sw.transport.Cleanup(peer)
   359  	if err := peer.Stop(); err != nil {
   360  		sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly
   361  	}
   362  
   363  	for _, reactor := range sw.reactors {
   364  		reactor.RemovePeer(peer, reason)
   365  	}
   366  
   367  	// Removing a peer should go last to avoid a situation where a peer
   368  	// reconnect to our node and the switch calls InitPeer before
   369  	// RemovePeer is finished.
   370  	// https://github.com/tendermint/tendermint/issues/3338
   371  	if sw.peers.Remove(peer) {
   372  		sw.metrics.Peers.Add(float64(-1))
   373  	}
   374  }
   375  
   376  // reconnectToPeer tries to reconnect to the addr, first repeatedly
   377  // with a fixed interval, then with exponential backoff.
   378  // If no success after all that, it stops trying, and leaves it
   379  // to the PEX/Addrbook to find the peer with the addr again
   380  // NOTE: this will keep trying even if the handshake or auth fails.
   381  // TODO: be more explicit with error types so we only retry on certain failures
   382  //  - ie. if we're getting ErrDuplicatePeer we can stop
   383  //  	because the addrbook got us the peer back already
   384  func (sw *Switch) reconnectToPeer(addr *NetAddress) {
   385  	if sw.reconnecting.Has(string(addr.ID)) {
   386  		return
   387  	}
   388  	sw.reconnecting.Set(string(addr.ID), addr)
   389  	defer sw.reconnecting.Delete(string(addr.ID))
   390  
   391  	start := time.Now()
   392  	sw.Logger.Info("Reconnecting to peer", "addr", addr)
   393  	for i := 0; i < reconnectAttempts; i++ {
   394  		if !sw.IsRunning() {
   395  			return
   396  		}
   397  
   398  		err := sw.DialPeerWithAddress(addr)
   399  		if err == nil {
   400  			return // success
   401  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   402  			return
   403  		}
   404  
   405  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   406  		// sleep a set amount
   407  		sw.randomSleep(reconnectInterval)
   408  		continue
   409  	}
   410  
   411  	sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
   412  		"addr", addr, "elapsed", time.Since(start))
   413  	for i := 0; i < reconnectBackOffAttempts; i++ {
   414  		if !sw.IsRunning() {
   415  			return
   416  		}
   417  
   418  		// sleep an exponentially increasing amount
   419  		sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
   420  		sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
   421  
   422  		err := sw.DialPeerWithAddress(addr)
   423  		if err == nil {
   424  			return // success
   425  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   426  			return
   427  		}
   428  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   429  	}
   430  	sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start))
   431  }
   432  
   433  // SetAddrBook allows to set address book on Switch.
   434  func (sw *Switch) SetAddrBook(addrBook AddrBook) {
   435  	sw.addrBook = addrBook
   436  }
   437  
   438  // MarkPeerAsGood marks the given peer as good when it did something useful
   439  // like contributed to consensus.
   440  func (sw *Switch) MarkPeerAsGood(peer Peer) {
   441  	if sw.addrBook != nil {
   442  		sw.addrBook.MarkGood(peer.ID())
   443  	}
   444  }
   445  
   446  //---------------------------------------------------------------------
   447  // Dialing
   448  
   449  type privateAddr interface {
   450  	PrivateAddr() bool
   451  }
   452  
   453  func isPrivateAddr(err error) bool {
   454  	te, ok := err.(privateAddr)
   455  	return ok && te.PrivateAddr()
   456  }
   457  
   458  // DialPeersAsync dials a list of peers asynchronously in random order.
   459  // Used to dial peers from config on startup or from unsafe-RPC (trusted sources).
   460  // It ignores ErrNetAddressLookup. However, if there are other errors, first
   461  // encounter is returned.
   462  // Nop if there are no peers.
   463  func (sw *Switch) DialPeersAsync(peers []string) error {
   464  	netAddrs, errs := NewNetAddressStrings(peers)
   465  	// report all the errors
   466  	for _, err := range errs {
   467  		sw.Logger.Error("Error in peer's address", "err", err)
   468  	}
   469  	// return first non-ErrNetAddressLookup error
   470  	for _, err := range errs {
   471  		if _, ok := err.(ErrNetAddressLookup); ok {
   472  			continue
   473  		}
   474  		return err
   475  	}
   476  	sw.dialPeersAsync(netAddrs)
   477  	return nil
   478  }
   479  
   480  func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) {
   481  	ourAddr := sw.NetAddress()
   482  
   483  	// TODO: this code feels like it's in the wrong place.
   484  	// The integration tests depend on the addrBook being saved
   485  	// right away but maybe we can change that. Recall that
   486  	// the addrBook is only written to disk every 2min
   487  	if sw.addrBook != nil {
   488  		// add peers to `addrBook`
   489  		for _, netAddr := range netAddrs {
   490  			// do not add our address or ID
   491  			if !netAddr.Same(ourAddr) {
   492  				if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil {
   493  					if isPrivateAddr(err) {
   494  						sw.Logger.Debug("Won't add peer's address to addrbook", "err", err)
   495  					} else {
   496  						sw.Logger.Error("Can't add peer's address to addrbook", "err", err)
   497  					}
   498  				}
   499  			}
   500  		}
   501  		// Persist some peers to disk right away.
   502  		// NOTE: integration tests depend on this
   503  		sw.addrBook.Save()
   504  	}
   505  
   506  	// permute the list, dial them in random order.
   507  	perm := sw.rng.Perm(len(netAddrs))
   508  	for i := 0; i < len(perm); i++ {
   509  		go func(i int) {
   510  			j := perm[i]
   511  			addr := netAddrs[j]
   512  
   513  			if addr.Same(ourAddr) {
   514  				sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr)
   515  				return
   516  			}
   517  
   518  			sw.randomSleep(0)
   519  
   520  			err := sw.DialPeerWithAddress(addr)
   521  			if err != nil {
   522  				switch err.(type) {
   523  				case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress:
   524  					sw.Logger.Debug("Error dialing peer", "err", err)
   525  				default:
   526  					sw.Logger.Error("Error dialing peer", "err", err)
   527  				}
   528  			}
   529  		}(i)
   530  	}
   531  }
   532  
   533  // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects
   534  // and authenticates successfully.
   535  // If we're currently dialing this address or it belongs to an existing peer,
   536  // ErrCurrentlyDialingOrExistingAddress is returned.
   537  func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error {
   538  	if sw.IsDialingOrExistingAddress(addr) {
   539  		return ErrCurrentlyDialingOrExistingAddress{addr.String()}
   540  	}
   541  
   542  	sw.dialing.Set(string(addr.ID), addr)
   543  	defer sw.dialing.Delete(string(addr.ID))
   544  
   545  	return sw.addOutboundPeerWithConfig(addr, sw.config)
   546  }
   547  
   548  // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
   549  func (sw *Switch) randomSleep(interval time.Duration) {
   550  	r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond
   551  	time.Sleep(r + interval)
   552  }
   553  
   554  // IsDialingOrExistingAddress returns true if switch has a peer with the given
   555  // address or dialing it at the moment.
   556  func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool {
   557  	return sw.dialing.Has(string(addr.ID)) ||
   558  		sw.peers.Has(addr.ID) ||
   559  		(!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP))
   560  }
   561  
   562  // AddPersistentPeers allows you to set persistent peers. It ignores
   563  // ErrNetAddressLookup. However, if there are other errors, first encounter is
   564  // returned.
   565  func (sw *Switch) AddPersistentPeers(addrs []string) error {
   566  	sw.Logger.Info("Adding persistent peers", "addrs", addrs)
   567  	netAddrs, errs := NewNetAddressStrings(addrs)
   568  	// report all the errors
   569  	for _, err := range errs {
   570  		sw.Logger.Error("Error in peer's address", "err", err)
   571  	}
   572  	// return first non-ErrNetAddressLookup error
   573  	for _, err := range errs {
   574  		if _, ok := err.(ErrNetAddressLookup); ok {
   575  			continue
   576  		}
   577  		return err
   578  	}
   579  	sw.persistentPeersAddrs = netAddrs
   580  	return nil
   581  }
   582  
   583  func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error {
   584  	sw.Logger.Info("Adding unconditional peer ids", "ids", ids)
   585  	for i, id := range ids {
   586  		err := validateID(ID(id))
   587  		if err != nil {
   588  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   589  		}
   590  		sw.unconditionalPeerIDs[ID(id)] = struct{}{}
   591  	}
   592  	return nil
   593  }
   594  
   595  func (sw *Switch) AddPrivatePeerIDs(ids []string) error {
   596  	validIDs := make([]string, 0, len(ids))
   597  	for i, id := range ids {
   598  		err := validateID(ID(id))
   599  		if err != nil {
   600  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   601  		}
   602  		validIDs = append(validIDs, id)
   603  	}
   604  
   605  	sw.addrBook.AddPrivateIDs(validIDs)
   606  
   607  	return nil
   608  }
   609  
   610  func (sw *Switch) IsPeerPersistent(na *NetAddress) bool {
   611  	for _, pa := range sw.persistentPeersAddrs {
   612  		if pa.Equals(na) {
   613  			return true
   614  		}
   615  	}
   616  	return false
   617  }
   618  
   619  func (sw *Switch) acceptRoutine() {
   620  	for {
   621  		p, err := sw.transport.Accept(peerConfig{
   622  			chDescs:      sw.chDescs,
   623  			onPeerError:  sw.StopPeerForError,
   624  			reactorsByCh: sw.reactorsByCh,
   625  			metrics:      sw.metrics,
   626  			isPersistent: sw.IsPeerPersistent,
   627  		})
   628  		if err != nil {
   629  			switch err := err.(type) {
   630  			case ErrRejected:
   631  				if err.IsSelf() {
   632  					// Remove the given address from the address book and add to our addresses
   633  					// to avoid dialing in the future.
   634  					addr := err.Addr()
   635  					sw.addrBook.RemoveAddress(&addr)
   636  					sw.addrBook.AddOurAddress(&addr)
   637  				}
   638  
   639  				sw.Logger.Info(
   640  					"Inbound Peer rejected",
   641  					"err", err,
   642  					"numPeers", sw.peers.Size(),
   643  				)
   644  
   645  				continue
   646  			case ErrFilterTimeout:
   647  				sw.Logger.Error(
   648  					"Peer filter timed out",
   649  					"err", err,
   650  				)
   651  
   652  				continue
   653  			case ErrTransportClosed:
   654  				sw.Logger.Error(
   655  					"Stopped accept routine, as transport is closed",
   656  					"numPeers", sw.peers.Size(),
   657  				)
   658  			default:
   659  				sw.Logger.Error(
   660  					"Accept on transport errored",
   661  					"err", err,
   662  					"numPeers", sw.peers.Size(),
   663  				)
   664  				// We could instead have a retry loop around the acceptRoutine,
   665  				// but that would need to stop and let the node shutdown eventually.
   666  				// So might as well panic and let process managers restart the node.
   667  				// There's no point in letting the node run without the acceptRoutine,
   668  				// since it won't be able to accept new connections.
   669  				panic(fmt.Errorf("accept routine exited: %v", err))
   670  			}
   671  
   672  			break
   673  		}
   674  
   675  		if !sw.IsPeerUnconditional(p.NodeInfo().ID()) {
   676  			// Ignore connection if we already have enough peers.
   677  			_, in, _ := sw.NumPeers()
   678  			if in >= sw.config.MaxNumInboundPeers {
   679  				sw.Logger.Info(
   680  					"Ignoring inbound connection: already have enough inbound peers",
   681  					"address", p.SocketAddr(),
   682  					"have", in,
   683  					"max", sw.config.MaxNumInboundPeers,
   684  				)
   685  
   686  				sw.transport.Cleanup(p)
   687  
   688  				continue
   689  			}
   690  
   691  		}
   692  
   693  		if err := sw.addPeer(p); err != nil {
   694  			sw.transport.Cleanup(p)
   695  			if p.IsRunning() {
   696  				_ = p.Stop()
   697  			}
   698  			sw.Logger.Info(
   699  				"Ignoring inbound connection: error while adding peer",
   700  				"err", err,
   701  				"id", p.ID(),
   702  			)
   703  		}
   704  	}
   705  }
   706  
   707  // dial the peer; make secret connection; authenticate against the dialed ID;
   708  // add the peer.
   709  // if dialing fails, start the reconnect loop. If handshake fails, it's over.
   710  // If peer is started successfully, reconnectLoop will start when
   711  // StopPeerForError is called.
   712  func (sw *Switch) addOutboundPeerWithConfig(
   713  	addr *NetAddress,
   714  	cfg *config.P2PConfig,
   715  ) error {
   716  	sw.Logger.Info("Dialing peer", "address", addr)
   717  
   718  	// XXX(xla): Remove the leakage of test concerns in implementation.
   719  	if cfg.TestDialFail {
   720  		go sw.reconnectToPeer(addr)
   721  		return fmt.Errorf("dial err (peerConfig.DialFail == true)")
   722  	}
   723  
   724  	p, err := sw.transport.Dial(*addr, peerConfig{
   725  		chDescs:      sw.chDescs,
   726  		onPeerError:  sw.StopPeerForError,
   727  		isPersistent: sw.IsPeerPersistent,
   728  		reactorsByCh: sw.reactorsByCh,
   729  		metrics:      sw.metrics,
   730  	})
   731  	if err != nil {
   732  		if e, ok := err.(ErrRejected); ok {
   733  			if e.IsSelf() {
   734  				// Remove the given address from the address book and add to our addresses
   735  				// to avoid dialing in the future.
   736  				sw.addrBook.RemoveAddress(addr)
   737  				sw.addrBook.AddOurAddress(addr)
   738  
   739  				return err
   740  			}
   741  		}
   742  
   743  		// retry persistent peers after
   744  		// any dial error besides IsSelf()
   745  		if sw.IsPeerPersistent(addr) {
   746  			go sw.reconnectToPeer(addr)
   747  		}
   748  
   749  		return err
   750  	}
   751  
   752  	if err := sw.addPeer(p); err != nil {
   753  		sw.transport.Cleanup(p)
   754  		if p.IsRunning() {
   755  			_ = p.Stop()
   756  		}
   757  		return err
   758  	}
   759  
   760  	return nil
   761  }
   762  
   763  func (sw *Switch) filterPeer(p Peer) error {
   764  	// Avoid duplicate
   765  	if sw.peers.Has(p.ID()) {
   766  		return ErrRejected{id: p.ID(), isDuplicate: true}
   767  	}
   768  
   769  	errc := make(chan error, len(sw.peerFilters))
   770  
   771  	for _, f := range sw.peerFilters {
   772  		go func(f PeerFilterFunc, p Peer, errc chan<- error) {
   773  			errc <- f(sw.peers, p)
   774  		}(f, p, errc)
   775  	}
   776  
   777  	for i := 0; i < cap(errc); i++ {
   778  		select {
   779  		case err := <-errc:
   780  			if err != nil {
   781  				return ErrRejected{id: p.ID(), err: err, isFiltered: true}
   782  			}
   783  		case <-time.After(sw.filterTimeout):
   784  			return ErrFilterTimeout{}
   785  		}
   786  	}
   787  
   788  	return nil
   789  }
   790  
   791  // addPeer starts up the Peer and adds it to the Switch. Error is returned if
   792  // the peer is filtered out or failed to start or can't be added.
   793  func (sw *Switch) addPeer(p Peer) error {
   794  	if err := sw.filterPeer(p); err != nil {
   795  		return err
   796  	}
   797  
   798  	p.SetLogger(sw.Logger.With("peer", p.SocketAddr()))
   799  
   800  	// Handle the shut down case where the switch has stopped but we're
   801  	// concurrently trying to add a peer.
   802  	if !sw.IsRunning() {
   803  		// XXX should this return an error or just log and terminate?
   804  		sw.Logger.Error("Won't start a peer - switch is not running", "peer", p)
   805  		return nil
   806  	}
   807  
   808  	// Add some data to the peer, which is required by reactors.
   809  	for _, reactor := range sw.reactors {
   810  		p = reactor.InitPeer(p)
   811  	}
   812  
   813  	// Start the peer's send/recv routines.
   814  	// Must start it before adding it to the peer set
   815  	// to prevent Start and Stop from being called concurrently.
   816  	err := p.Start()
   817  	if err != nil {
   818  		// Should never happen
   819  		sw.Logger.Error("Error starting peer", "err", err, "peer", p)
   820  		return err
   821  	}
   822  
   823  	// Add the peer to PeerSet. Do this before starting the reactors
   824  	// so that if Receive errors, we will find the peer and remove it.
   825  	// Add should not err since we already checked peers.Has().
   826  	if err := sw.peers.Add(p); err != nil {
   827  		return err
   828  	}
   829  	sw.metrics.Peers.Add(float64(1))
   830  
   831  	// Start all the reactor protocols on the peer.
   832  	for _, reactor := range sw.reactors {
   833  		reactor.AddPeer(p)
   834  	}
   835  
   836  	sw.Logger.Info("Added peer", "peer", p)
   837  
   838  	return nil
   839  }