github.com/DFWallet/tendermint-cosmos@v0.0.2/p2p/switch.go (about)

     1  package p2p
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/DFWallet/tendermint-cosmos/config"
    10  	"github.com/DFWallet/tendermint-cosmos/libs/cmap"
    11  	"github.com/DFWallet/tendermint-cosmos/libs/rand"
    12  	"github.com/DFWallet/tendermint-cosmos/libs/service"
    13  	"github.com/DFWallet/tendermint-cosmos/p2p/conn"
    14  )
    15  
    16  const (
    17  	// wait a random amount of time from this interval
    18  	// before dialing peers or reconnecting to help prevent DoS
    19  	dialRandomizerIntervalMilliseconds = 3000
    20  
    21  	// repeatedly try to reconnect for a few minutes
    22  	// ie. 5 * 20 = 100s
    23  	reconnectAttempts = 20
    24  	reconnectInterval = 5 * time.Second
    25  
    26  	// then move into exponential backoff mode for ~1day
    27  	// ie. 3**10 = 16hrs
    28  	reconnectBackOffAttempts    = 10
    29  	reconnectBackOffBaseSeconds = 3
    30  )
    31  
    32  // MConnConfig returns an MConnConfig with fields updated
    33  // from the P2PConfig.
    34  func MConnConfig(cfg *config.P2PConfig) conn.MConnConfig {
    35  	mConfig := conn.DefaultMConnConfig()
    36  	mConfig.FlushThrottle = cfg.FlushThrottleTimeout
    37  	mConfig.SendRate = cfg.SendRate
    38  	mConfig.RecvRate = cfg.RecvRate
    39  	mConfig.MaxPacketMsgPayloadSize = cfg.MaxPacketMsgPayloadSize
    40  	return mConfig
    41  }
    42  
    43  //-----------------------------------------------------------------------------
    44  
    45  // An AddrBook represents an address book from the pex package, which is used
    46  // to store peer addresses.
    47  type AddrBook interface {
    48  	AddAddress(addr *NetAddress, src *NetAddress) error
    49  	AddPrivateIDs([]string)
    50  	AddOurAddress(*NetAddress)
    51  	OurAddress(*NetAddress) bool
    52  	MarkGood(ID)
    53  	RemoveAddress(*NetAddress)
    54  	HasAddress(*NetAddress) bool
    55  	Save()
    56  }
    57  
    58  // PeerFilterFunc to be implemented by filter hooks after a new Peer has been
    59  // fully setup.
    60  type PeerFilterFunc func(IPeerSet, Peer) error
    61  
    62  //-----------------------------------------------------------------------------
    63  
    64  // Switch handles peer connections and exposes an API to receive incoming messages
    65  // on `Reactors`.  Each `Reactor` is responsible for handling incoming messages of one
    66  // or more `Channels`.  So while sending outgoing messages is typically performed on the peer,
    67  // incoming messages are received on the reactor.
    68  type Switch struct {
    69  	service.BaseService
    70  
    71  	config       *config.P2PConfig
    72  	reactors     map[string]Reactor
    73  	chDescs      []*conn.ChannelDescriptor
    74  	reactorsByCh map[byte]Reactor
    75  	peers        *PeerSet
    76  	dialing      *cmap.CMap
    77  	reconnecting *cmap.CMap
    78  	nodeInfo     NodeInfo // our node info
    79  	nodeKey      *NodeKey // our node privkey
    80  	addrBook     AddrBook
    81  	// peers addresses with whom we'll maintain constant connection
    82  	persistentPeersAddrs []*NetAddress
    83  	unconditionalPeerIDs map[ID]struct{}
    84  
    85  	transport Transport
    86  
    87  	filterTimeout time.Duration
    88  	peerFilters   []PeerFilterFunc
    89  
    90  	rng *rand.Rand // seed for randomizing dial times and orders
    91  
    92  	metrics *Metrics
    93  }
    94  
    95  // NetAddress returns the address the switch is listening on.
    96  func (sw *Switch) NetAddress() *NetAddress {
    97  	addr := sw.transport.NetAddress()
    98  	return &addr
    99  }
   100  
   101  // SwitchOption sets an optional parameter on the Switch.
   102  type SwitchOption func(*Switch)
   103  
   104  // NewSwitch creates a new Switch with the given config.
   105  func NewSwitch(
   106  	cfg *config.P2PConfig,
   107  	transport Transport,
   108  	options ...SwitchOption,
   109  ) *Switch {
   110  	sw := &Switch{
   111  		config:               cfg,
   112  		reactors:             make(map[string]Reactor),
   113  		chDescs:              make([]*conn.ChannelDescriptor, 0),
   114  		reactorsByCh:         make(map[byte]Reactor),
   115  		peers:                NewPeerSet(),
   116  		dialing:              cmap.NewCMap(),
   117  		reconnecting:         cmap.NewCMap(),
   118  		metrics:              NopMetrics(),
   119  		transport:            transport,
   120  		filterTimeout:        defaultFilterTimeout,
   121  		persistentPeersAddrs: make([]*NetAddress, 0),
   122  		unconditionalPeerIDs: make(map[ID]struct{}),
   123  	}
   124  
   125  	// Ensure we have a completely undeterministic PRNG.
   126  	sw.rng = rand.NewRand()
   127  
   128  	sw.BaseService = *service.NewBaseService(nil, "P2P Switch", sw)
   129  
   130  	for _, option := range options {
   131  		option(sw)
   132  	}
   133  
   134  	return sw
   135  }
   136  
   137  // SwitchFilterTimeout sets the timeout used for peer filters.
   138  func SwitchFilterTimeout(timeout time.Duration) SwitchOption {
   139  	return func(sw *Switch) { sw.filterTimeout = timeout }
   140  }
   141  
   142  // SwitchPeerFilters sets the filters for rejection of new peers.
   143  func SwitchPeerFilters(filters ...PeerFilterFunc) SwitchOption {
   144  	return func(sw *Switch) { sw.peerFilters = filters }
   145  }
   146  
   147  // WithMetrics sets the metrics.
   148  func WithMetrics(metrics *Metrics) SwitchOption {
   149  	return func(sw *Switch) { sw.metrics = metrics }
   150  }
   151  
   152  //---------------------------------------------------------------------
   153  // Switch setup
   154  
   155  // AddReactor adds the given reactor to the switch.
   156  // NOTE: Not goroutine safe.
   157  func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
   158  	for _, chDesc := range reactor.GetChannels() {
   159  		chID := chDesc.ID
   160  		// No two reactors can share the same channel.
   161  		if sw.reactorsByCh[chID] != nil {
   162  			panic(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
   163  		}
   164  		sw.chDescs = append(sw.chDescs, chDesc)
   165  		sw.reactorsByCh[chID] = reactor
   166  	}
   167  	sw.reactors[name] = reactor
   168  	reactor.SetSwitch(sw)
   169  	return reactor
   170  }
   171  
   172  // RemoveReactor removes the given Reactor from the Switch.
   173  // NOTE: Not goroutine safe.
   174  func (sw *Switch) RemoveReactor(name string, reactor Reactor) {
   175  	for _, chDesc := range reactor.GetChannels() {
   176  		// remove channel description
   177  		for i := 0; i < len(sw.chDescs); i++ {
   178  			if chDesc.ID == sw.chDescs[i].ID {
   179  				sw.chDescs = append(sw.chDescs[:i], sw.chDescs[i+1:]...)
   180  				break
   181  			}
   182  		}
   183  		delete(sw.reactorsByCh, chDesc.ID)
   184  	}
   185  	delete(sw.reactors, name)
   186  	reactor.SetSwitch(nil)
   187  }
   188  
   189  // Reactors returns a map of reactors registered on the switch.
   190  // NOTE: Not goroutine safe.
   191  func (sw *Switch) Reactors() map[string]Reactor {
   192  	return sw.reactors
   193  }
   194  
   195  // Reactor returns the reactor with the given name.
   196  // NOTE: Not goroutine safe.
   197  func (sw *Switch) Reactor(name string) Reactor {
   198  	return sw.reactors[name]
   199  }
   200  
   201  // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
   202  // NOTE: Not goroutine safe.
   203  func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) {
   204  	sw.nodeInfo = nodeInfo
   205  }
   206  
   207  // NodeInfo returns the switch's NodeInfo.
   208  // NOTE: Not goroutine safe.
   209  func (sw *Switch) NodeInfo() NodeInfo {
   210  	return sw.nodeInfo
   211  }
   212  
   213  // SetNodeKey sets the switch's private key for authenticated encryption.
   214  // NOTE: Not goroutine safe.
   215  func (sw *Switch) SetNodeKey(nodeKey *NodeKey) {
   216  	sw.nodeKey = nodeKey
   217  }
   218  
   219  //---------------------------------------------------------------------
   220  // Service start/stop
   221  
   222  // OnStart implements BaseService. It starts all the reactors and peers.
   223  func (sw *Switch) OnStart() error {
   224  	// Start reactors
   225  	for _, reactor := range sw.reactors {
   226  		err := reactor.Start()
   227  		if err != nil {
   228  			return fmt.Errorf("failed to start %v: %w", reactor, err)
   229  		}
   230  	}
   231  
   232  	// Start accepting Peers.
   233  	go sw.acceptRoutine()
   234  
   235  	return nil
   236  }
   237  
   238  // OnStop implements BaseService. It stops all peers and reactors.
   239  func (sw *Switch) OnStop() {
   240  	// Stop peers
   241  	for _, p := range sw.peers.List() {
   242  		sw.stopAndRemovePeer(p, nil)
   243  	}
   244  
   245  	// Stop reactors
   246  	sw.Logger.Debug("Switch: Stopping reactors")
   247  	for _, reactor := range sw.reactors {
   248  		if err := reactor.Stop(); err != nil {
   249  			sw.Logger.Error("error while stopped reactor", "reactor", reactor, "error", err)
   250  		}
   251  	}
   252  }
   253  
   254  //---------------------------------------------------------------------
   255  // Peers
   256  
   257  // Broadcast runs a go routine for each attempted send, which will block trying
   258  // to send for defaultSendTimeoutSeconds. Returns a channel which receives
   259  // success values for each attempted send (false if times out). Channel will be
   260  // closed once msg bytes are sent to all peers (or time out).
   261  //
   262  // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
   263  func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool {
   264  	sw.Logger.Debug("Broadcast", "channel", chID, "msgBytes", fmt.Sprintf("%X", msgBytes))
   265  
   266  	peers := sw.peers.List()
   267  	var wg sync.WaitGroup
   268  	wg.Add(len(peers))
   269  	successChan := make(chan bool, len(peers))
   270  
   271  	for _, peer := range peers {
   272  		go func(p Peer) {
   273  			defer wg.Done()
   274  			success := p.Send(chID, msgBytes)
   275  			successChan <- success
   276  		}(peer)
   277  	}
   278  
   279  	go func() {
   280  		wg.Wait()
   281  		close(successChan)
   282  	}()
   283  
   284  	return successChan
   285  }
   286  
   287  // NumPeers returns the count of outbound/inbound and outbound-dialing peers.
   288  // unconditional peers are not counted here.
   289  func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
   290  	peers := sw.peers.List()
   291  	for _, peer := range peers {
   292  		if peer.IsOutbound() {
   293  			if !sw.IsPeerUnconditional(peer.ID()) {
   294  				outbound++
   295  			}
   296  		} else {
   297  			if !sw.IsPeerUnconditional(peer.ID()) {
   298  				inbound++
   299  			}
   300  		}
   301  	}
   302  	dialing = sw.dialing.Size()
   303  	return
   304  }
   305  
   306  func (sw *Switch) IsPeerUnconditional(id ID) bool {
   307  	_, ok := sw.unconditionalPeerIDs[id]
   308  	return ok
   309  }
   310  
   311  // MaxNumOutboundPeers returns a maximum number of outbound peers.
   312  func (sw *Switch) MaxNumOutboundPeers() int {
   313  	return sw.config.MaxNumOutboundPeers
   314  }
   315  
   316  // Peers returns the set of peers that are connected to the switch.
   317  func (sw *Switch) Peers() IPeerSet {
   318  	return sw.peers
   319  }
   320  
   321  // StopPeerForError disconnects from a peer due to external error.
   322  // If the peer is persistent, it will attempt to reconnect.
   323  // TODO: make record depending on reason.
   324  func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
   325  	if !peer.IsRunning() {
   326  		return
   327  	}
   328  
   329  	sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
   330  	sw.stopAndRemovePeer(peer, reason)
   331  
   332  	if peer.IsPersistent() {
   333  		var addr *NetAddress
   334  		if peer.IsOutbound() { // socket address for outbound peers
   335  			addr = peer.SocketAddr()
   336  		} else { // self-reported address for inbound peers
   337  			var err error
   338  			addr, err = peer.NodeInfo().NetAddress()
   339  			if err != nil {
   340  				sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong",
   341  					"peer", peer, "err", err)
   342  				return
   343  			}
   344  		}
   345  		go sw.reconnectToPeer(addr)
   346  	}
   347  }
   348  
   349  // StopPeerGracefully disconnects from a peer gracefully.
   350  // TODO: handle graceful disconnects.
   351  func (sw *Switch) StopPeerGracefully(peer Peer) {
   352  	sw.Logger.Info("Stopping peer gracefully")
   353  	sw.stopAndRemovePeer(peer, nil)
   354  }
   355  
   356  func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
   357  	sw.transport.Cleanup(peer)
   358  	if err := peer.Stop(); err != nil {
   359  		sw.Logger.Error("error while stopping peer", "error", err) // TODO: should return error to be handled accordingly
   360  	}
   361  
   362  	for _, reactor := range sw.reactors {
   363  		reactor.RemovePeer(peer, reason)
   364  	}
   365  
   366  	// Removing a peer should go last to avoid a situation where a peer
   367  	// reconnect to our node and the switch calls InitPeer before
   368  	// RemovePeer is finished.
   369  	// https://github.com/DFWallet/tendermint-cosmos/issues/3338
   370  	if sw.peers.Remove(peer) {
   371  		sw.metrics.Peers.Add(float64(-1))
   372  	}
   373  }
   374  
   375  // reconnectToPeer tries to reconnect to the addr, first repeatedly
   376  // with a fixed interval, then with exponential backoff.
   377  // If no success after all that, it stops trying, and leaves it
   378  // to the PEX/Addrbook to find the peer with the addr again
   379  // NOTE: this will keep trying even if the handshake or auth fails.
   380  // TODO: be more explicit with error types so we only retry on certain failures
   381  //  - ie. if we're getting ErrDuplicatePeer we can stop
   382  //  	because the addrbook got us the peer back already
   383  func (sw *Switch) reconnectToPeer(addr *NetAddress) {
   384  	if sw.reconnecting.Has(string(addr.ID)) {
   385  		return
   386  	}
   387  	sw.reconnecting.Set(string(addr.ID), addr)
   388  	defer sw.reconnecting.Delete(string(addr.ID))
   389  
   390  	start := time.Now()
   391  	sw.Logger.Info("Reconnecting to peer", "addr", addr)
   392  	for i := 0; i < reconnectAttempts; i++ {
   393  		if !sw.IsRunning() {
   394  			return
   395  		}
   396  
   397  		err := sw.DialPeerWithAddress(addr)
   398  		if err == nil {
   399  			return // success
   400  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   401  			return
   402  		}
   403  
   404  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   405  		// sleep a set amount
   406  		sw.randomSleep(reconnectInterval)
   407  		continue
   408  	}
   409  
   410  	sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
   411  		"addr", addr, "elapsed", time.Since(start))
   412  	for i := 0; i < reconnectBackOffAttempts; i++ {
   413  		if !sw.IsRunning() {
   414  			return
   415  		}
   416  
   417  		// sleep an exponentially increasing amount
   418  		sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
   419  		sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
   420  
   421  		err := sw.DialPeerWithAddress(addr)
   422  		if err == nil {
   423  			return // success
   424  		} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
   425  			return
   426  		}
   427  		sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
   428  	}
   429  	sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start))
   430  }
   431  
   432  // SetAddrBook allows to set address book on Switch.
   433  func (sw *Switch) SetAddrBook(addrBook AddrBook) {
   434  	sw.addrBook = addrBook
   435  }
   436  
   437  // MarkPeerAsGood marks the given peer as good when it did something useful
   438  // like contributed to consensus.
   439  func (sw *Switch) MarkPeerAsGood(peer Peer) {
   440  	if sw.addrBook != nil {
   441  		sw.addrBook.MarkGood(peer.ID())
   442  	}
   443  }
   444  
   445  //---------------------------------------------------------------------
   446  // Dialing
   447  
   448  type privateAddr interface {
   449  	PrivateAddr() bool
   450  }
   451  
   452  func isPrivateAddr(err error) bool {
   453  	te, ok := err.(privateAddr)
   454  	return ok && te.PrivateAddr()
   455  }
   456  
   457  // DialPeersAsync dials a list of peers asynchronously in random order.
   458  // Used to dial peers from config on startup or from unsafe-RPC (trusted sources).
   459  // It ignores ErrNetAddressLookup. However, if there are other errors, first
   460  // encounter is returned.
   461  // Nop if there are no peers.
   462  func (sw *Switch) DialPeersAsync(peers []string) error {
   463  	netAddrs, errs := NewNetAddressStrings(peers)
   464  	// report all the errors
   465  	for _, err := range errs {
   466  		sw.Logger.Error("Error in peer's address", "err", err)
   467  	}
   468  	// return first non-ErrNetAddressLookup error
   469  	for _, err := range errs {
   470  		if _, ok := err.(ErrNetAddressLookup); ok {
   471  			continue
   472  		}
   473  		return err
   474  	}
   475  	sw.dialPeersAsync(netAddrs)
   476  	return nil
   477  }
   478  
   479  func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) {
   480  	ourAddr := sw.NetAddress()
   481  
   482  	// TODO: this code feels like it's in the wrong place.
   483  	// The integration tests depend on the addrBook being saved
   484  	// right away but maybe we can change that. Recall that
   485  	// the addrBook is only written to disk every 2min
   486  	if sw.addrBook != nil {
   487  		// add peers to `addrBook`
   488  		for _, netAddr := range netAddrs {
   489  			// do not add our address or ID
   490  			if !netAddr.Same(ourAddr) {
   491  				if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil {
   492  					if isPrivateAddr(err) {
   493  						sw.Logger.Debug("Won't add peer's address to addrbook", "err", err)
   494  					} else {
   495  						sw.Logger.Error("Can't add peer's address to addrbook", "err", err)
   496  					}
   497  				}
   498  			}
   499  		}
   500  		// Persist some peers to disk right away.
   501  		// NOTE: integration tests depend on this
   502  		sw.addrBook.Save()
   503  	}
   504  
   505  	// permute the list, dial them in random order.
   506  	perm := sw.rng.Perm(len(netAddrs))
   507  	for i := 0; i < len(perm); i++ {
   508  		go func(i int) {
   509  			j := perm[i]
   510  			addr := netAddrs[j]
   511  
   512  			if addr.Same(ourAddr) {
   513  				sw.Logger.Debug("Ignore attempt to connect to ourselves", "addr", addr, "ourAddr", ourAddr)
   514  				return
   515  			}
   516  
   517  			sw.randomSleep(0)
   518  
   519  			err := sw.DialPeerWithAddress(addr)
   520  			if err != nil {
   521  				switch err.(type) {
   522  				case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress:
   523  					sw.Logger.Debug("Error dialing peer", "err", err)
   524  				default:
   525  					sw.Logger.Error("Error dialing peer", "err", err)
   526  				}
   527  			}
   528  		}(i)
   529  	}
   530  }
   531  
   532  // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects
   533  // and authenticates successfully.
   534  // If we're currently dialing this address or it belongs to an existing peer,
   535  // ErrCurrentlyDialingOrExistingAddress is returned.
   536  func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error {
   537  	if sw.IsDialingOrExistingAddress(addr) {
   538  		return ErrCurrentlyDialingOrExistingAddress{addr.String()}
   539  	}
   540  
   541  	sw.dialing.Set(string(addr.ID), addr)
   542  	defer sw.dialing.Delete(string(addr.ID))
   543  
   544  	return sw.addOutboundPeerWithConfig(addr, sw.config)
   545  }
   546  
   547  // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
   548  func (sw *Switch) randomSleep(interval time.Duration) {
   549  	r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond
   550  	time.Sleep(r + interval)
   551  }
   552  
   553  // IsDialingOrExistingAddress returns true if switch has a peer with the given
   554  // address or dialing it at the moment.
   555  func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool {
   556  	return sw.dialing.Has(string(addr.ID)) ||
   557  		sw.peers.Has(addr.ID) ||
   558  		(!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP))
   559  }
   560  
   561  // AddPersistentPeers allows you to set persistent peers. It ignores
   562  // ErrNetAddressLookup. However, if there are other errors, first encounter is
   563  // returned.
   564  func (sw *Switch) AddPersistentPeers(addrs []string) error {
   565  	sw.Logger.Info("Adding persistent peers", "addrs", addrs)
   566  	netAddrs, errs := NewNetAddressStrings(addrs)
   567  	// report all the errors
   568  	for _, err := range errs {
   569  		sw.Logger.Error("Error in peer's address", "err", err)
   570  	}
   571  	// return first non-ErrNetAddressLookup error
   572  	for _, err := range errs {
   573  		if _, ok := err.(ErrNetAddressLookup); ok {
   574  			continue
   575  		}
   576  		return err
   577  	}
   578  	sw.persistentPeersAddrs = netAddrs
   579  	return nil
   580  }
   581  
   582  func (sw *Switch) AddUnconditionalPeerIDs(ids []string) error {
   583  	sw.Logger.Info("Adding unconditional peer ids", "ids", ids)
   584  	for i, id := range ids {
   585  		err := validateID(ID(id))
   586  		if err != nil {
   587  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   588  		}
   589  		sw.unconditionalPeerIDs[ID(id)] = struct{}{}
   590  	}
   591  	return nil
   592  }
   593  
   594  func (sw *Switch) AddPrivatePeerIDs(ids []string) error {
   595  	validIDs := make([]string, 0, len(ids))
   596  	for i, id := range ids {
   597  		err := validateID(ID(id))
   598  		if err != nil {
   599  			return fmt.Errorf("wrong ID #%d: %w", i, err)
   600  		}
   601  		validIDs = append(validIDs, id)
   602  	}
   603  
   604  	sw.addrBook.AddPrivateIDs(validIDs)
   605  
   606  	return nil
   607  }
   608  
   609  func (sw *Switch) IsPeerPersistent(na *NetAddress) bool {
   610  	for _, pa := range sw.persistentPeersAddrs {
   611  		if pa.Equals(na) {
   612  			return true
   613  		}
   614  	}
   615  	return false
   616  }
   617  
   618  func (sw *Switch) acceptRoutine() {
   619  	for {
   620  		p, err := sw.transport.Accept(peerConfig{
   621  			chDescs:      sw.chDescs,
   622  			onPeerError:  sw.StopPeerForError,
   623  			reactorsByCh: sw.reactorsByCh,
   624  			metrics:      sw.metrics,
   625  			isPersistent: sw.IsPeerPersistent,
   626  		})
   627  		if err != nil {
   628  			switch err := err.(type) {
   629  			case ErrRejected:
   630  				if err.IsSelf() {
   631  					// Remove the given address from the address book and add to our addresses
   632  					// to avoid dialing in the future.
   633  					addr := err.Addr()
   634  					sw.addrBook.RemoveAddress(&addr)
   635  					sw.addrBook.AddOurAddress(&addr)
   636  				}
   637  
   638  				sw.Logger.Info(
   639  					"Inbound Peer rejected",
   640  					"err", err,
   641  					"numPeers", sw.peers.Size(),
   642  				)
   643  
   644  				continue
   645  			case ErrFilterTimeout:
   646  				sw.Logger.Error(
   647  					"Peer filter timed out",
   648  					"err", err,
   649  				)
   650  
   651  				continue
   652  			case ErrTransportClosed:
   653  				sw.Logger.Error(
   654  					"Stopped accept routine, as transport is closed",
   655  					"numPeers", sw.peers.Size(),
   656  				)
   657  			default:
   658  				sw.Logger.Error(
   659  					"Accept on transport errored",
   660  					"err", err,
   661  					"numPeers", sw.peers.Size(),
   662  				)
   663  				// We could instead have a retry loop around the acceptRoutine,
   664  				// but that would need to stop and let the node shutdown eventually.
   665  				// So might as well panic and let process managers restart the node.
   666  				// There's no point in letting the node run without the acceptRoutine,
   667  				// since it won't be able to accept new connections.
   668  				panic(fmt.Errorf("accept routine exited: %v", err))
   669  			}
   670  
   671  			break
   672  		}
   673  
   674  		if !sw.IsPeerUnconditional(p.NodeInfo().ID()) {
   675  			// Ignore connection if we already have enough peers.
   676  			_, in, _ := sw.NumPeers()
   677  			if in >= sw.config.MaxNumInboundPeers {
   678  				sw.Logger.Info(
   679  					"Ignoring inbound connection: already have enough inbound peers",
   680  					"address", p.SocketAddr(),
   681  					"have", in,
   682  					"max", sw.config.MaxNumInboundPeers,
   683  				)
   684  
   685  				sw.transport.Cleanup(p)
   686  
   687  				continue
   688  			}
   689  
   690  		}
   691  
   692  		if err := sw.addPeer(p); err != nil {
   693  			sw.transport.Cleanup(p)
   694  			if p.IsRunning() {
   695  				_ = p.Stop()
   696  			}
   697  			sw.Logger.Info(
   698  				"Ignoring inbound connection: error while adding peer",
   699  				"err", err,
   700  				"id", p.ID(),
   701  			)
   702  		}
   703  	}
   704  }
   705  
   706  // dial the peer; make secret connection; authenticate against the dialed ID;
   707  // add the peer.
   708  // if dialing fails, start the reconnect loop. If handshake fails, it's over.
   709  // If peer is started successfully, reconnectLoop will start when
   710  // StopPeerForError is called.
   711  func (sw *Switch) addOutboundPeerWithConfig(
   712  	addr *NetAddress,
   713  	cfg *config.P2PConfig,
   714  ) error {
   715  	sw.Logger.Info("Dialing peer", "address", addr)
   716  
   717  	// XXX(xla): Remove the leakage of test concerns in implementation.
   718  	if cfg.TestDialFail {
   719  		go sw.reconnectToPeer(addr)
   720  		return fmt.Errorf("dial err (peerConfig.DialFail == true)")
   721  	}
   722  
   723  	p, err := sw.transport.Dial(*addr, peerConfig{
   724  		chDescs:      sw.chDescs,
   725  		onPeerError:  sw.StopPeerForError,
   726  		isPersistent: sw.IsPeerPersistent,
   727  		reactorsByCh: sw.reactorsByCh,
   728  		metrics:      sw.metrics,
   729  	})
   730  	if err != nil {
   731  		if e, ok := err.(ErrRejected); ok {
   732  			if e.IsSelf() {
   733  				// Remove the given address from the address book and add to our addresses
   734  				// to avoid dialing in the future.
   735  				sw.addrBook.RemoveAddress(addr)
   736  				sw.addrBook.AddOurAddress(addr)
   737  
   738  				return err
   739  			}
   740  		}
   741  
   742  		// retry persistent peers after
   743  		// any dial error besides IsSelf()
   744  		if sw.IsPeerPersistent(addr) {
   745  			go sw.reconnectToPeer(addr)
   746  		}
   747  
   748  		return err
   749  	}
   750  
   751  	if err := sw.addPeer(p); err != nil {
   752  		sw.transport.Cleanup(p)
   753  		if p.IsRunning() {
   754  			_ = p.Stop()
   755  		}
   756  		return err
   757  	}
   758  
   759  	return nil
   760  }
   761  
   762  func (sw *Switch) filterPeer(p Peer) error {
   763  	// Avoid duplicate
   764  	if sw.peers.Has(p.ID()) {
   765  		return ErrRejected{id: p.ID(), isDuplicate: true}
   766  	}
   767  
   768  	errc := make(chan error, len(sw.peerFilters))
   769  
   770  	for _, f := range sw.peerFilters {
   771  		go func(f PeerFilterFunc, p Peer, errc chan<- error) {
   772  			errc <- f(sw.peers, p)
   773  		}(f, p, errc)
   774  	}
   775  
   776  	for i := 0; i < cap(errc); i++ {
   777  		select {
   778  		case err := <-errc:
   779  			if err != nil {
   780  				return ErrRejected{id: p.ID(), err: err, isFiltered: true}
   781  			}
   782  		case <-time.After(sw.filterTimeout):
   783  			return ErrFilterTimeout{}
   784  		}
   785  	}
   786  
   787  	return nil
   788  }
   789  
   790  // addPeer starts up the Peer and adds it to the Switch. Error is returned if
   791  // the peer is filtered out or failed to start or can't be added.
   792  func (sw *Switch) addPeer(p Peer) error {
   793  	if err := sw.filterPeer(p); err != nil {
   794  		return err
   795  	}
   796  
   797  	p.SetLogger(sw.Logger.With("peer", p.SocketAddr()))
   798  
   799  	// Handle the shut down case where the switch has stopped but we're
   800  	// concurrently trying to add a peer.
   801  	if !sw.IsRunning() {
   802  		// XXX should this return an error or just log and terminate?
   803  		sw.Logger.Error("Won't start a peer - switch is not running", "peer", p)
   804  		return nil
   805  	}
   806  
   807  	// Add some data to the peer, which is required by reactors.
   808  	for _, reactor := range sw.reactors {
   809  		p = reactor.InitPeer(p)
   810  	}
   811  
   812  	// Start the peer's send/recv routines.
   813  	// Must start it before adding it to the peer set
   814  	// to prevent Start and Stop from being called concurrently.
   815  	err := p.Start()
   816  	if err != nil {
   817  		// Should never happen
   818  		sw.Logger.Error("Error starting peer", "err", err, "peer", p)
   819  		return err
   820  	}
   821  
   822  	// Add the peer to PeerSet. Do this before starting the reactors
   823  	// so that if Receive errors, we will find the peer and remove it.
   824  	// Add should not err since we already checked peers.Has().
   825  	if err := sw.peers.Add(p); err != nil {
   826  		return err
   827  	}
   828  	sw.metrics.Peers.Add(float64(1))
   829  
   830  	// Start all the reactor protocols on the peer.
   831  	for _, reactor := range sw.reactors {
   832  		reactor.AddPeer(p)
   833  	}
   834  
   835  	sw.Logger.Info("Added peer", "peer", p)
   836  
   837  	return nil
   838  }