bitbucket.org/number571/tendermint@v0.8.14/internal/p2p/switch.go (about)

     1  package p2p
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"math"
     8  	mrand "math/rand"
     9  	"net"
    10  	"sync"
    11  	"time"
    12  
    13  	"bitbucket.org/number571/tendermint/config"
    14  	"bitbucket.org/number571/tendermint/crypto"
    15  	"bitbucket.org/number571/tendermint/internal/p2p/conn"
    16  	"bitbucket.org/number571/tendermint/libs/cmap"
    17  	tmrand "bitbucket.org/number571/tendermint/libs/rand"
    18  	"bitbucket.org/number571/tendermint/libs/service"
    19  	"bitbucket.org/number571/tendermint/types"
    20  )
    21  
    22  const (
    23  	// wait a random amount of time from this interval
    24  	// before dialing peers or reconnecting to help prevent DoS
    25  	dialRandomizerIntervalMilliseconds = 3000
    26  
    27  	// repeatedly try to reconnect for a few minutes
    28  	// ie. 5 * 20 = 100s
    29  	reconnectAttempts = 20
    30  	reconnectInterval = 5 * time.Second
    31  
    32  	// then move into exponential backoff mode for ~1day
    33  	// ie. 3**10 = 16hrs
    34  	reconnectBackOffAttempts    = 10
    35  	reconnectBackOffBaseSeconds = 3
    36  
    37  	defaultFilterTimeout = 5 * time.Second
    38  )
    39  
    40  // MConnConfig returns an MConnConfig with fields updated
    41  // from the P2PConfig.
    42  func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig {
    43  	mConfig := conn.DefaultMConnConfig()
    44  	mConfig.FlushThrottle = cfg.FlushThrottleTimeout
    45  	mConfig.SendRate = cfg.SendRate
    46  	mConfig.RecvRate = cfg.RecvRate
    47  	mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize
    48  	return mConfig
    49  }
    50  
    51  //-----------------------------------------------------------------------------
    52  
    53  // An AddrBook represents an address book from the pex package, which is used
    54  // to store peer addresses.
    55  type AddrBook interface {
    56  	AddAddress(addr *NetAddress, src *NetAddress) error
    57  	AddPrivateIDs([]string)
    58  	AddOurAddress(*NetAddress)
    59  	OurAddress(*NetAddress) bool
    60  	MarkGood(types.NodeID)
    61  	RemoveAddress(*NetAddress)
    62  	HasAddress(*NetAddress) bool
    63  	Save()
    64  }
    65  
    66  // ConnFilterFunc is a callback for connection filtering. If it returns an
    67  // error, the connection is rejected. The set of existing connections is passed
    68  // along with the new connection and all resolved IPs.
    69  type ConnFilterFunc func(ConnSet, net.Conn, []net.IP) error
    70  
    71  // PeerFilterFunc to be implemented by filter hooks after a new Peer has been
    72  // fully setup.
    73  type PeerFilterFunc func(IPeerSet, Peer) error
    74  
    75  // ConnDuplicateIPFilter resolves and keeps all ips for an incoming connection
    76  // and refuses new ones if they come from a known ip.
    77  var ConnDuplicateIPFilter ConnFilterFunc = func(cs ConnSet, c net.Conn, ips []net.IP) error {
    78  	for _, ip := range ips {
    79  		if cs.HasIP(ip) {
    80  			return ErrRejected{
    81  				conn:        c,
    82  				err:         fmt.Errorf("ip<%v> already connected", ip),
    83  				isDuplicate: true,
    84  			}
    85  		}
    86  	}
    87  	return nil
    88  }
    89  
    90  //-----------------------------------------------------------------------------
    91  
    92  // Switch handles peer connections and exposes an API to receive incoming messages
    93  // on `Reactors`.  Each `Reactor` is responsible for handling incoming messages of one
    94  // or more `Channels`.  So while sending outgoing messages is typically performed on the peer,
    95  // incoming messages are received on the reactor.
    96  type Switch struct {
    97  	service.BaseService
    98  
    99  	config       *config.P2PConfig
   100  	reactors     map[string]Reactor
   101  	chDescs      []*conn.ChannelDescriptor
   102  	reactorsByCh map[byte]Reactor
   103  	peers        *PeerSet
   104  	dialing      *cmap.CMap
   105  	reconnecting *cmap.CMap
   106  	nodeInfo     types.NodeInfo // our node info
   107  	nodeKey      types.NodeKey  // our node privkey
   108  	addrBook     AddrBook
   109  	// peers addresses with whom we'll maintain constant connection
   110  	persistentPeersAddrs []*NetAddress
   111  	unconditionalPeerIDs map[types.NodeID]struct{}
   112  
   113  	transport Transport
   114  
   115  	filterTimeout time.Duration
   116  	peerFilters   []PeerFilterFunc
   117  	connFilters   []ConnFilterFunc
   118  	conns         ConnSet
   119  
   120  	metrics *Metrics
   121  }
   122  
   123  // NetAddress returns the first address the switch is listening on,
   124  // or nil if no addresses are found.
   125  func (sw *Switch) NetAddress() *NetAddress {
   126  	endpoints := sw.transport.Endpoints()
   127  	if len(endpoints) == 0 {
   128  		return nil
   129  	}
   130  	return &NetAddress{
   131  		ID:   sw.nodeInfo.NodeID,
   132  		IP:   endpoints[0].IP,
   133  		Port: endpoints[0].Port,
   134  	}
   135  }
   136  
   137  // SwitchOption sets an optional parameter on the Switch.
   138  type SwitchOption func(*Switch)
   139  
   140  // NewSwitch creates a new Switch with the given config.
   141  func NewSwitch(
   142  	cfg *config.P2PConfig,
   143  	transport Transport,
   144  	options ...SwitchOption,
   145  ) *Switch {
   146  	sw := &Switch{
   147  		config:               cfg,
   148  		reactors:             make(map[string]Reactor),
   149  		chDescs:              make([]*conn.ChannelDescriptor, 0),
   150  		reactorsByCh:         make(map[byte]Reactor),
   151  		peers:                NewPeerSet(),
   152  		dialing:              cmap.NewCMap(),
   153  		reconnecting:         cmap.NewCMap(),
   154  		metrics:              NopMetrics(),
   155  		transport:            transport,
   156  		persistentPeersAddrs: make([]*NetAddress, 0),
   157  		unconditionalPeerIDs: make(map[types.NodeID]struct{}),
   158  		filterTimeout:        defaultFilterTimeout,
   159  		conns:                NewConnSet(),
   160  	}
   161  
   162  	// Ensure PRNG is reseeded.
   163  	tmrand.Reseed()
   164  
   165  	sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw)
   166  
   167  	for _, option := range options {
   168  		option(sw)
   169  	}
   170  
   171  	return sw
   172  }
   173  
   174  // SwitchFilterTimeout sets the timeout used for peer filters.
   175  func SwitchFilterTimeout(timeout time.Duration) SwitchOption {
   176  	return func(sw *Switch) { sw.filterTimeout = timeout }
   177  }
   178  
   179  // SwitchPeerFilters sets the filters for rejection of new peers.
   180  func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption {
   181  	return func(sw *Switch) { sw.peerFilters = filters }
   182  }
   183  
   184  // SwitchConnFilters sets the filters for rejection of connections.
   185  func SwitchConnFilters(filters ...ConnFilterFunc) SwitchOption {
   186  	return func(sw *Switch) { sw.connFilters = filters }
   187  }
   188  
   189  // WithMetrics sets the metrics.
   190  func WithMetrics(metrics *Metrics) SwitchOption {
   191  	return func(sw *Switch) { sw.metrics = metrics }
   192  }
   193  
   194  //---------------------------------------------------------------------
   195  // Switch setup
   196  
   197  // AddReactor adds the given reactor to the switch.
   198  // NOTE: Not goroutine safe.
   199  func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
   200  	for _, chDesc := range reactor.GetChannels() {
   201  		chID := chDesc.ID
   202  		// No two reactors can share the same channel.
   203  		if sw.reactorsByCh[chID] != nil {
   204  			panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
   205  		}
   206  		sw.chDescs = append(sw.chDescs, chDesc)
   207  		sw.reactorsByCh[chID] = reactor
   208  	}
   209  	sw.reactors[name] = reactor
   210  	reactor.SetSwitch(sw)
   211  	return reactor
   212  }
   213  
   214  // RemoveReactor removes the given Reactor from the Switch.
   215  // NOTE: Not goroutine safe.
   216  func (sw *Switch) RemoveReactor(name string, reactor Reactor) {
   217  	for _, chDesc := range reactor.GetChannels() {
   218  		// remove channel description
   219  		for i := 0; i < len(sw.chDescs); i++ {
   220  			if chDesc.ID == sw.chDescs[i].ID {
   221  				sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...)
   222  				break
   223  			}
   224  		}
   225  		delete(sw.reactorsByCh, chDesc.ID)
   226  	}
   227  	delete(sw.reactors, name)
   228  	reactor.SetSwitch(nil)
   229  }
   230  
   231  // Reactors returns a map of reactors registered on the switch.
   232  // NOTE: Not goroutine safe.
   233  func (sw *Switch) Reactors() map[string]Reactor {
   234  	return sw.reactors
   235  }
   236  
   237  // Reactor returns the reactor with the given name.
   238  // NOTE: Not goroutine safe.
   239  func (sw *Switch) Reactor(name string) Reactor {
   240  	return sw.reactors[name]
   241  }
   242  
   243  // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
   244  // NOTE: Not goroutine safe.
   245  func (sw *Switch) SetNodeInfo(nodeInfo types.NodeInfo) {
   246  	sw.nodeInfo = nodeInfo
   247  }
   248  
   249  // NodeInfo returns the switch's NodeInfo.
   250  // NOTE: Not goroutine safe.
   251  func (sw *Switch) NodeInfo() types.NodeInfo {
   252  	return sw.nodeInfo
   253  }
   254  
   255  // SetNodeKey sets the switch's private key for authenticated encryption.
   256  // NOTE: Not goroutine safe.
   257  func (sw *Switch) SetNodeKey(nodeKey types.NodeKey) {
   258  	sw.nodeKey = nodeKey
   259  }
   260  
   261  //---------------------------------------------------------------------
   262  // Service start/stop
   263  
   264  // OnStart implements BaseService. It starts all the reactors and peers.
   265  func (sw *Switch) OnStart() error {
   266  
   267  	// FIXME: Temporary hack to pass channel descriptors to MConn transport,
   268  	// since they are not available when it is constructed. This will be
   269  	// fixed when we implement the new router abstraction.
   270  	if t, ok := sw.transport.(*MConnTransport); ok {
   271  		t.channelDescs = sw.chDescs
   272  	}
   273  
   274  	// Start reactors
   275  	for _, reactor := range sw.reactors {
   276  		err := reactor.Start()
   277  		if err != nil {
   278  			return fmt.Errorf("failed to start %v: %w", reactor, err)
   279  		}
   280  	}
   281  
   282  	// Start accepting Peers.
   283  	go sw.acceptRoutine()
   284  
   285  	return nil
   286  }
   287  
   288  // OnStop implements BaseService. It stops all peers and reactors.
   289  func (sw *Switch) OnStop() {
   290  	// Stop peers
   291  	for _, p := range sw.peers.List() {
   292  		sw.stopAndRemovePeer(p, nil)
   293  	}
   294  
   295  	// Stop reactors
   296  	sw.Logger.Debug("Switch: Stopping reactors")
   297  	for _, reactor := range sw.reactors {
   298  		if err := reactor.Stop(); err != nil {
   299  			sw.Logger.Error("error while stopping reactor", "reactor", reactor, "error", err)
   300  		}
   301  	}
   302  }
   303  
   304  //---------------------------------------------------------------------
   305  // Peers
   306  
   307  // Broadcast runs a go routine for each attempted send, which will block trying
   308  // to send for defaultSendTimeoutSeconds. Returns a channel which receives
   309  // success values for each attempted send (false if times out). Channel will be
   310  // closed once msg bytes are sent to all peers (or time out).
   311  //
   312  // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
   313  func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool {
   314  	sw.Logger.Debug("Broadcast", "channel", chID, "msgBytes", msgBytes)
   315  
   316  	peers := sw.peers.List()
   317  	var wg sync.WaitGroup
   318  	wg.Add(len(peers))
   319  	successChan := make(chan bool, len(peers))
   320  
   321  	for _, peer := range peers {
   322  		go func(p Peer) {
   323  			defer wg.Done()
   324  			success := p.Send(chID, msgBytes)
   325  			successChan <- success
   326  		}(peer)
   327  	}
   328  
   329  	go func() {
   330  		wg.Wait()
   331  		close(successChan)
   332  	}()
   333  
   334  	return successChan
   335  }
   336  
   337  // NumPeers returns the count of outbound/inbound and outbound-dialing peers.
   338  // unconditional peers are not counted here.
   339  func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
   340  	peers := sw.peers.List()
   341  	for _, peer := range peers {
   342  		if peer.IsOutbound() {
   343  			if !sw.IsPeerUnconditional(peer.ID()) {
   344  				outbound++
   345  			}
   346  		} else {
   347  			if !sw.IsPeerUnconditional(peer.ID()) {
   348  				inbound++
   349  			}
   350  		}
   351  	}
   352  	dialing = sw.dialing.Size()
   353  	return
   354  }
   355  
   356  func (sw *Switch) IsPeerUnconditional(id types.NodeID) bool {
   357  	_, ok := sw.unconditionalPeerIDs[id]
   358  	return ok
   359  }
   360  
   361  // MaxNumOutboundPeers returns a maximum number of outbound peers.
   362  func (sw *Switch) MaxNumOutboundPeers() int {
   363  	return sw.config.MaxNumOutboundPeers
   364  }
   365  
   366  // Peers returns the set of peers that are connected to the switch.
   367  func (sw *Switch) Peers() IPeerSet {
   368  	return sw.peers
   369  }
   370  
   371  // StopPeerForError disconnects from a peer due to external error.
   372  // If the peer is persistent, it will attempt to reconnect.
   373  // TODO: make record depending on reason.
   374  func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
   375  	if !peer.IsRunning() {
   376  		return
   377  	}
   378  
   379  	sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
   380  	sw.stopAndRemovePeer(peer, reason)
   381  
   382  	if peer.IsPersistent() {
   383  		var addr *NetAddress
   384  		if peer.IsOutbound() { // socket address for outbound peers
   385  			addr = peer.SocketAddr()
   386  		} else { // self-reported address for inbound peers
   387  			var err error
   388  			addr, err = peer.NodeInfo().NetAddress()
   389  			if err != nil {
   390  				sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong",
   391  					"peer", peer, "err", err)
   392  				return
   393  			}
   394  		}
   395  		go sw.reconnectToPeer(addr)
   396  	}
   397  }
   398  
   399  // StopPeerGracefully disconnects from a peer gracefully.
   400  // TODO: handle graceful disconnects.
   401  func (sw *Switch) StopPeerGracefully(peer Peer) {
   402  	sw.Logger.Info("Stopping peer gracefully")
   403  	sw.stopAndRemovePeer(peer, nil)
   404  }
   405  
   406  func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
   407  	if err := peer.Stop(); err != nil {
   408  		sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly
   409  	}
   410  
   411  	for _, reactor := range sw.reactors {
   412  		reactor.RemovePeer(peer, reason)
   413  	}
   414  
   415  	// Removing a peer should go last to avoid a situation where a peer
   416  	// reconnect to our node and the switch calls InitPeer before
   417  	// RemovePeer is finished.
   418  	// https://bitbucket.org/number571/tendermint/issues/3338
   419  	if sw.peers.Remove(peer) {
   420  		sw.metrics.Peers.Add(float64(-1))
   421  	}
   422  
   423  	sw.conns.RemoveAddr(peer.RemoteAddr())
   424  }
   425  
   426  // reconnectToPeer tries to reconnect to the addr, first repeatedly
   427  // with a fixed interval, then with exponential backoff.
   428  // If no success after all that, it stops trying, and leaves it
   429  // to the PEX/Addrbook to find the peer with the addr again
   430  // NOTE: this will keep trying even if the handshake or auth fails.
   431  // TODO: be more explicit with error types so we only retry on certain failures
   432  //  - ie. if we're getting ErrDuplicatePeer we can stop
   433  //  	because the addrbook got us the peer back already
   434  func (sw *Switch) reconnectToPeer(addr *NetAddress) {
   435  	if sw.reconnecting.Has(string(addr.ID)) {
   436  		return
   437  	}
   438  	sw.reconnecting.Set(string(addr.ID), addr)
   439  	defer sw.reconnecting.Delete(string(addr.ID))
   440  
   441  	start := time.Now()
   442  	sw.Logger.Info("Reconnecting to peer", "addr", addr)
   443  	for i := 0; i < reconnectAttempts; i++ {
   444  		if !sw.IsRunning() {
   445  			return
   446  		}
   447  
   448  		err := sw.DialPeerWithAddress(addr)
   449  		if err == nil {
   450  			return // success
   451  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   452  			return
   453  		}
   454  
   455  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   456  		// sleep a set amount
   457  		sw.randomSleep(reconnectInterval)
   458  		continue
   459  	}
   460  
   461  	sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
   462  		"addr", addr, "elapsed", time.Since(start))
   463  	for i := 0; i < reconnectBackOffAttempts; i++ {
   464  		if !sw.IsRunning() {
   465  			return
   466  		}
   467  
   468  		// sleep an exponentially increasing amount
   469  		sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
   470  		sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
   471  
   472  		err := sw.DialPeerWithAddress(addr)
   473  		if err == nil {
   474  			return // success
   475  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   476  			return
   477  		}
   478  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   479  	}
   480  	sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start))
   481  }
   482  
   483  // SetAddrBook allows to set address book on Switch.
   484  func (sw *Switch) SetAddrBook(addrBook AddrBook) {
   485  	sw.addrBook = addrBook
   486  }
   487  
   488  // MarkPeerAsGood marks the given peer as good when it did something useful
   489  // like contributed to consensus.
   490  func (sw *Switch) MarkPeerAsGood(peer Peer) {
   491  	if sw.addrBook != nil {
   492  		sw.addrBook.MarkGood(peer.ID())
   493  	}
   494  }
   495  
   496  //---------------------------------------------------------------------
   497  // Dialing
   498  
   499  type privateAddr interface {
   500  	PrivateAddr() bool
   501  }
   502  
   503  func isPrivateAddr(err error) bool {
   504  	te, ok := err.(privateAddr)
   505  	return ok && te.PrivateAddr()
   506  }
   507  
   508  // DialPeersAsync dials a list of peers asynchronously in random order.
   509  // Used to dial peers from config on startup or from unsafe-RPC (trusted sources).
   510  // It ignores ErrNetAddressLookup. However, if there are other errors, first
   511  // encounter is returned.
   512  // Nop if there are no peers.
   513  func (sw *Switch) DialPeersAsync(peers []string) error {
   514  	netAddrs, errs := NewNetAddressStrings(peers)
   515  	// report all the errors
   516  	for _, err := range errs {
   517  		sw.Logger.Error("Error in peer's address", "err", err)
   518  	}
   519  	// return first non-ErrNetAddressLookup error
   520  	for _, err := range errs {
   521  		if _, ok := err.(types.ErrNetAddressLookup); ok {
   522  			continue
   523  		}
   524  		return err
   525  	}
   526  	sw.dialPeersAsync(netAddrs)
   527  	return nil
   528  }
   529  
   530  func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) {
   531  	ourAddr := sw.NetAddress()
   532  
   533  	// TODO: this code feels like it's in the wrong place.
   534  	// The integration tests depend on the addrBook being saved
   535  	// right away but maybe we can change that. Recall that
   536  	// the addrBook is only written to disk every 2min
   537  	if sw.addrBook != nil {
   538  		// add peers to `addrBook`
   539  		for _, netAddr := range netAddrs {
   540  			// do not add our address or ID
   541  			if !netAddr.Same(ourAddr) {
   542  				if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil {
   543  					if isPrivateAddr(err) {
   544  						sw.Logger.Debug("Won't add peer's address to addrbook", "err", err)
   545  					} else {
   546  						sw.Logger.Error("Can't add peer's address to addrbook", "err", err)
   547  					}
   548  				}
   549  			}
   550  		}
   551  		// Persist some peers to disk right away.
   552  		// NOTE: integration tests depend on this
   553  		sw.addrBook.Save()
   554  	}
   555  
   556  	// permute the list, dial them in random order.
   557  	perm := mrand.Perm(len(netAddrs))
   558  	for i := 0; i < len(perm); i++ {
   559  		go func(i int) {
   560  			j := perm[i]
   561  			addr := netAddrs[j]
   562  
   563  			if addr.Same(ourAddr) {
   564  				sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr)
   565  				return
   566  			}
   567  
   568  			sw.randomSleep(0)
   569  
   570  			err := sw.DialPeerWithAddress(addr)
   571  			if err != nil {
   572  				switch err.(type) {
   573  				case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress:
   574  					sw.Logger.Debug("Error dialing peer", "err", err)
   575  				default:
   576  					sw.Logger.Error("Error dialing peer", "err", err)
   577  				}
   578  			}
   579  		}(i)
   580  	}
   581  }
   582  
   583  // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects
   584  // and authenticates successfully.
   585  // If we're currently dialing this address or it belongs to an existing peer,
   586  // ErrCurrentlyDialingOrExistingAddress is returned.
   587  func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error {
   588  	if sw.IsDialingOrExistingAddress(addr) {
   589  		return ErrCurrentlyDialingOrExistingAddress{addr.String()}
   590  	}
   591  
   592  	sw.dialing.Set(string(addr.ID), addr)
   593  	defer sw.dialing.Delete(string(addr.ID))
   594  
   595  	return sw.addOutboundPeerWithConfig(addr, sw.config)
   596  }
   597  
   598  // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
   599  func (sw *Switch) randomSleep(interval time.Duration) {
   600  	// nolint:gosec // G404: Use of weak random number generator
   601  	r := time.Duration(mrand.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond
   602  	time.Sleep(r + interval)
   603  }
   604  
   605  // IsDialingOrExistingAddress returns true if switch has a peer with the given
   606  // address or dialing it at the moment.
   607  func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool {
   608  	return sw.dialing.Has(string(addr.ID)) ||
   609  		sw.peers.Has(addr.ID) ||
   610  		(!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP))
   611  }
   612  
   613  // AddPersistentPeers allows you to set persistent peers. It ignores
   614  // ErrNetAddressLookup. However, if there are other errors, first encounter is
   615  // returned.
   616  func (sw *Switch) AddPersistentPeers(addrs []string) error {
   617  	sw.Logger.Info("Adding persistent peers", "addrs", addrs)
   618  	netAddrs, errs := NewNetAddressStrings(addrs)
   619  	// report all the errors
   620  	for _, err := range errs {
   621  		sw.Logger.Error("Error in peer's address", "err", err)
   622  	}
   623  	// return first non-ErrNetAddressLookup error
   624  	for _, err := range errs {
   625  		if _, ok := err.(types.ErrNetAddressLookup); ok {
   626  			continue
   627  		}
   628  		return err
   629  	}
   630  	sw.persistentPeersAddrs = netAddrs
   631  	return nil
   632  }
   633  
   634  func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error {
   635  	sw.Logger.Info("Adding unconditional peer ids", "ids", ids)
   636  	for i, id := range ids {
   637  		err := types.NodeID(id).Validate()
   638  		if err != nil {
   639  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   640  		}
   641  		sw.unconditionalPeerIDs[types.NodeID(id)] = struct{}{}
   642  	}
   643  	return nil
   644  }
   645  
   646  func (sw *Switch) AddPrivatePeerIDs(ids []string) error {
   647  	validIDs := make([]string, 0, len(ids))
   648  	for i, id := range ids {
   649  		err := types.NodeID(id).Validate()
   650  		if err != nil {
   651  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   652  		}
   653  		validIDs = append(validIDs, id)
   654  	}
   655  
   656  	sw.addrBook.AddPrivateIDs(validIDs)
   657  
   658  	return nil
   659  }
   660  
   661  func (sw *Switch) IsPeerPersistent(na *NetAddress) bool {
   662  	for _, pa := range sw.persistentPeersAddrs {
   663  		if pa.Equals(na) {
   664  			return true
   665  		}
   666  	}
   667  	return false
   668  }
   669  
   670  func (sw *Switch) acceptRoutine() {
   671  	for {
   672  		var peerNodeInfo types.NodeInfo
   673  		c, err := sw.transport.Accept()
   674  		if err == nil {
   675  			// NOTE: The legacy MConn transport did handshaking in Accept(),
   676  			// which was asynchronous and avoided head-of-line-blocking.
   677  			// However, as handshakes are being migrated out from the transport,
   678  			// we just do it synchronously here for now.
   679  			peerNodeInfo, _, err = sw.handshakePeer(c, "")
   680  		}
   681  		if err == nil {
   682  			err = sw.filterConn(c.(*mConnConnection).conn)
   683  		}
   684  		if err != nil {
   685  			if c != nil {
   686  				_ = c.Close()
   687  			}
   688  			if err == io.EOF {
   689  				err = ErrTransportClosed{}
   690  			}
   691  			switch err := err.(type) {
   692  			case ErrRejected:
   693  				addr := err.Addr()
   694  				if err.IsSelf() {
   695  					// Remove the given address from the address book and add to our addresses
   696  					// to avoid dialing in the future.
   697  					sw.addrBook.RemoveAddress(&addr)
   698  					sw.addrBook.AddOurAddress(&addr)
   699  				}
   700  				if err.IsIncompatible() {
   701  					sw.addrBook.RemoveAddress(&addr)
   702  				}
   703  
   704  				sw.Logger.Info(
   705  					"Inbound Peer rejected",
   706  					"err", err,
   707  					"numPeers", sw.peers.Size(),
   708  				)
   709  
   710  				continue
   711  			case ErrFilterTimeout:
   712  				sw.Logger.Error(
   713  					"Peer filter timed out",
   714  					"err", err,
   715  				)
   716  
   717  				continue
   718  			case ErrTransportClosed:
   719  				sw.Logger.Error(
   720  					"Stopped accept routine, as transport is closed",
   721  					"numPeers", sw.peers.Size(),
   722  				)
   723  			default:
   724  				sw.Logger.Error(
   725  					"Accept on transport errored",
   726  					"err", err,
   727  					"numPeers", sw.peers.Size(),
   728  				)
   729  				// We could instead have a retry loop around the acceptRoutine,
   730  				// but that would need to stop and let the node shutdown eventually.
   731  				// So might as well panic and let process managers restart the node.
   732  				// There's no point in letting the node run without the acceptRoutine,
   733  				// since it won't be able to accept new connections.
   734  				panic(fmt.Errorf("accept routine exited: %v", err))
   735  			}
   736  
   737  			break
   738  		}
   739  
   740  		isPersistent := false
   741  		addr, err := peerNodeInfo.NetAddress()
   742  		if err == nil {
   743  			isPersistent = sw.IsPeerPersistent(addr)
   744  		}
   745  
   746  		p := newPeer(
   747  			peerNodeInfo,
   748  			newPeerConn(false, isPersistent, c),
   749  			sw.reactorsByCh,
   750  			sw.StopPeerForError,
   751  			PeerMetrics(sw.metrics),
   752  		)
   753  
   754  		if !sw.IsPeerUnconditional(p.NodeInfo().ID()) {
   755  			// Ignore connection if we already have enough peers.
   756  			_, in, _ := sw.NumPeers()
   757  			if in >= sw.config.MaxNumInboundPeers {
   758  				sw.Logger.Info(
   759  					"Ignoring inbound connection: already have enough inbound peers",
   760  					"address", p.SocketAddr(),
   761  					"have", in,
   762  					"max", sw.config.MaxNumInboundPeers,
   763  				)
   764  				_ = p.CloseConn()
   765  				continue
   766  			}
   767  
   768  		}
   769  
   770  		if err := sw.addPeer(p); err != nil {
   771  			_ = p.CloseConn()
   772  			if p.IsRunning() {
   773  				_ = p.Stop()
   774  			}
   775  			sw.conns.RemoveAddr(p.RemoteAddr())
   776  			sw.Logger.Info(
   777  				"Ignoring inbound connection: error while adding peer",
   778  				"err", err,
   779  				"id", p.ID(),
   780  			)
   781  		}
   782  	}
   783  }
   784  
   785  // dial the peer; make secret connection; authenticate against the dialed ID;
   786  // add the peer.
   787  // if dialing fails, start the reconnect loop. If handshake fails, it's over.
   788  // If peer is started successfully, reconnectLoop will start when
   789  // StopPeerForError is called.
   790  func (sw *Switch) addOutboundPeerWithConfig(
   791  	addr *NetAddress,
   792  	cfg *config.P2PConfig,
   793  ) error {
   794  	sw.Logger.Info("Dialing peer", "address", addr)
   795  
   796  	// XXX(xla): Remove the leakage of test concerns in implementation.
   797  	if cfg.TestDialFail {
   798  		go sw.reconnectToPeer(addr)
   799  		return fmt.Errorf("dial err (peerConfig.DialFail == true)")
   800  	}
   801  
   802  	// Hardcoded timeout moved from MConn transport during refactoring.
   803  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
   804  	defer cancel()
   805  
   806  	var peerNodeInfo types.NodeInfo
   807  	c, err := sw.transport.Dial(ctx, Endpoint{
   808  		Protocol: MConnProtocol,
   809  		IP:       addr.IP,
   810  		Port:     addr.Port,
   811  	})
   812  	if err == nil {
   813  		peerNodeInfo, _, err = sw.handshakePeer(c, addr.ID)
   814  	}
   815  	if err == nil {
   816  		err = sw.filterConn(c.(*mConnConnection).conn)
   817  	}
   818  	if err != nil {
   819  		if c != nil {
   820  			_ = c.Close()
   821  		}
   822  		if e, ok := err.(ErrRejected); ok {
   823  			if e.IsSelf() {
   824  				// Remove the given address from the address book and add to our addresses
   825  				// to avoid dialing in the future.
   826  				sw.addrBook.RemoveAddress(addr)
   827  				sw.addrBook.AddOurAddress(addr)
   828  			}
   829  			if e.IsIncompatible() {
   830  				sw.addrBook.RemoveAddress(addr)
   831  			}
   832  
   833  			return err
   834  		}
   835  
   836  		// retry persistent peers after
   837  		// any dial error besides IsSelf()
   838  		if sw.IsPeerPersistent(addr) {
   839  			go sw.reconnectToPeer(addr)
   840  		}
   841  
   842  		return err
   843  	}
   844  
   845  	p := newPeer(
   846  		peerNodeInfo,
   847  		newPeerConn(true, sw.IsPeerPersistent(addr), c),
   848  		sw.reactorsByCh,
   849  		sw.StopPeerForError,
   850  		PeerMetrics(sw.metrics),
   851  	)
   852  
   853  	if err := sw.addPeer(p); err != nil {
   854  		_ = p.CloseConn()
   855  		if p.IsRunning() {
   856  			_ = p.Stop()
   857  		}
   858  		sw.conns.RemoveAddr(p.RemoteAddr())
   859  		return err
   860  	}
   861  
   862  	return nil
   863  }
   864  
   865  func (sw *Switch) handshakePeer(
   866  	c Connection,
   867  	expectPeerID types.NodeID,
   868  ) (types.NodeInfo, crypto.PubKey, error) {
   869  	// Moved from transport and hardcoded until legacy P2P stack removal.
   870  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
   871  	defer cancel()
   872  
   873  	peerInfo, peerKey, err := c.Handshake(ctx, sw.nodeInfo, sw.nodeKey.PrivKey)
   874  	if err != nil {
   875  		return peerInfo, peerKey, ErrRejected{
   876  			conn:          c.(*mConnConnection).conn,
   877  			err:           fmt.Errorf("handshake failed: %v", err),
   878  			isAuthFailure: true,
   879  		}
   880  	}
   881  
   882  	if err = peerInfo.Validate(); err != nil {
   883  		return peerInfo, peerKey, ErrRejected{
   884  			conn:              c.(*mConnConnection).conn,
   885  			err:               err,
   886  			isNodeInfoInvalid: true,
   887  		}
   888  	}
   889  
   890  	// For outgoing conns, ensure connection key matches dialed key.
   891  	if expectPeerID != "" {
   892  		peerID := types.NodeIDFromPubKey(peerKey)
   893  		if expectPeerID != peerID {
   894  			return peerInfo, peerKey, ErrRejected{
   895  				conn: c.(*mConnConnection).conn,
   896  				id:   peerID,
   897  				err: fmt.Errorf(
   898  					"conn.ID (%v) dialed ID (%v) mismatch",
   899  					peerID,
   900  					expectPeerID,
   901  				),
   902  				isAuthFailure: true,
   903  			}
   904  		}
   905  	}
   906  
   907  	if sw.nodeInfo.ID() == peerInfo.ID() {
   908  		return peerInfo, peerKey, ErrRejected{
   909  			addr:   *types.NewNetAddress(peerInfo.ID(), c.(*mConnConnection).conn.RemoteAddr()),
   910  			conn:   c.(*mConnConnection).conn,
   911  			id:     peerInfo.ID(),
   912  			isSelf: true,
   913  		}
   914  	}
   915  
   916  	if err = sw.nodeInfo.CompatibleWith(peerInfo); err != nil {
   917  		return peerInfo, peerKey, ErrRejected{
   918  			conn:           c.(*mConnConnection).conn,
   919  			err:            err,
   920  			id:             peerInfo.ID(),
   921  			isIncompatible: true,
   922  		}
   923  	}
   924  
   925  	return peerInfo, peerKey, nil
   926  }
   927  
   928  func (sw *Switch) filterPeer(p Peer) error {
   929  	// Avoid duplicate
   930  	if sw.peers.Has(p.ID()) {
   931  		return ErrRejected{id: p.ID(), isDuplicate: true}
   932  	}
   933  
   934  	errc := make(chan error, len(sw.peerFilters))
   935  
   936  	for _, f := range sw.peerFilters {
   937  		go func(f PeerFilterFunc, p Peer, errc chan<- error) {
   938  			errc <- f(sw.peers, p)
   939  		}(f, p, errc)
   940  	}
   941  
   942  	for i := 0; i < cap(errc); i++ {
   943  		select {
   944  		case err := <-errc:
   945  			if err != nil {
   946  				return ErrRejected{id: p.ID(), err: err, isFiltered: true}
   947  			}
   948  		case <-time.After(sw.filterTimeout):
   949  			return ErrFilterTimeout{}
   950  		}
   951  	}
   952  
   953  	return nil
   954  }
   955  
   956  // filterConn filters a connection, rejecting it if this function errors.
   957  //
   958  // FIXME: This is only here for compatibility with the current Switch code. In
   959  // the new P2P stack, peer/connection filtering should be moved into the Router
   960  // or PeerManager and removed from here.
   961  func (sw *Switch) filterConn(conn net.Conn) error {
   962  	if sw.conns.Has(conn) {
   963  		return ErrRejected{conn: conn, isDuplicate: true}
   964  	}
   965  
   966  	host, _, err := net.SplitHostPort(conn.RemoteAddr().String())
   967  	if err != nil {
   968  		return err
   969  	}
   970  	ip := net.ParseIP(host)
   971  	if ip == nil {
   972  		return fmt.Errorf("connection address has invalid IP address %q", host)
   973  	}
   974  
   975  	// Apply filter callbacks.
   976  	chErr := make(chan error, len(sw.connFilters))
   977  	for _, connFilter := range sw.connFilters {
   978  		go func(connFilter ConnFilterFunc) {
   979  			chErr <- connFilter(sw.conns, conn, []net.IP{ip})
   980  		}(connFilter)
   981  	}
   982  
   983  	for i := 0; i < cap(chErr); i++ {
   984  		select {
   985  		case err := <-chErr:
   986  			if err != nil {
   987  				return ErrRejected{conn: conn, err: err, isFiltered: true}
   988  			}
   989  		case <-time.After(sw.filterTimeout):
   990  			return ErrFilterTimeout{}
   991  		}
   992  
   993  	}
   994  
   995  	// FIXME: Doesn't really make sense to set this here, but we preserve the
   996  	// behavior from the previous P2P transport implementation.
   997  	sw.conns.Set(conn, []net.IP{ip})
   998  	return nil
   999  }
  1000  
  1001  // addPeer starts up the Peer and adds it to the Switch. Error is returned if
  1002  // the peer is filtered out or failed to start or can't be added.
  1003  func (sw *Switch) addPeer(p Peer) error {
  1004  	if err := sw.filterPeer(p); err != nil {
  1005  		return err
  1006  	}
  1007  
  1008  	p.SetLogger(sw.Logger.With("peer", p.SocketAddr()))
  1009  
  1010  	// Handle the shut down case where the switch has stopped but we're
  1011  	// concurrently trying to add a peer.
  1012  	if !sw.IsRunning() {
  1013  		// XXX should this return an error or just log and terminate?
  1014  		sw.Logger.Error("Won't start a peer - switch is not running", "peer", p)
  1015  		return nil
  1016  	}
  1017  
  1018  	// Add some data to the peer, which is required by reactors.
  1019  	for _, reactor := range sw.reactors {
  1020  		p = reactor.InitPeer(p)
  1021  	}
  1022  
  1023  	// Start the peer's send/recv routines.
  1024  	// Must start it before adding it to the peer set
  1025  	// to prevent Start and Stop from being called concurrently.
  1026  	err := p.Start()
  1027  	if err != nil {
  1028  		// Should never happen
  1029  		sw.Logger.Error("Error starting peer", "err", err, "peer", p)
  1030  		return err
  1031  	}
  1032  
  1033  	// Add the peer to PeerSet. Do this before starting the reactors
  1034  	// so that if Receive errors, we will find the peer and remove it.
  1035  	// Add should not err since we already checked peers.Has().
  1036  	if err := sw.peers.Add(p); err != nil {
  1037  		return err
  1038  	}
  1039  	sw.metrics.Peers.Add(float64(1))
  1040  
  1041  	// Start all the reactor protocols on the peer.
  1042  	for _, reactor := range sw.reactors {
  1043  		reactor.AddPeer(p)
  1044  	}
  1045  
  1046  	sw.Logger.Info("Added peer", "peer", p)
  1047  
  1048  	return nil
  1049  }
  1050  
  1051  // NewNetAddressStrings returns an array of NetAddress'es build using
  1052  // the provided strings.
  1053  func NewNetAddressStrings(addrs []string) ([]*NetAddress, []error) {
  1054  	netAddrs := make([]*NetAddress, 0)
  1055  	errs := make([]error, 0)
  1056  	for _, addr := range addrs {
  1057  		netAddr, err := types.NewNetAddressString(addr)
  1058  		if err != nil {
  1059  			errs = append(errs, err)
  1060  		} else {
  1061  			netAddrs = append(netAddrs, netAddr)
  1062  		}
  1063  	}
  1064  	return netAddrs, errs
  1065  }