github.com/unicornultrafoundation/go-u2u@v1.0.0-rc1.0.20240205080301-e74a83d3fadc/p2p/dial.go (about)

     1  // Copyright 2015 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package p2p
    18  
    19  import (
    20  	"context"
    21  	crand "crypto/rand"
    22  	"encoding/binary"
    23  	"errors"
    24  	"fmt"
    25  	mrand "math/rand"
    26  	"net"
    27  	"sync"
    28  	"time"
    29  
    30  	"github.com/unicornultrafoundation/go-u2u/common/mclock"
    31  	"github.com/unicornultrafoundation/go-u2u/log"
    32  	"github.com/unicornultrafoundation/go-u2u/p2p/enode"
    33  	"github.com/unicornultrafoundation/go-u2u/p2p/netutil"
    34  )
    35  
    36  const (
    37  	// This is the amount of time spent waiting in between redialing a certain node. The
    38  	// limit is a bit higher than inboundThrottleTime to prevent failing dials in small
    39  	// private networks.
    40  	dialHistoryExpiration = inboundThrottleTime + 5*time.Second
    41  
    42  	// Config for the "Looking for peers" message.
    43  	dialStatsLogInterval = 120 * time.Second // printed at most this often
    44  	dialStatsPeerLimit   = 2                 // but not if more than this many dialed peers
    45  
    46  	// Endpoint resolution is throttled with bounded backoff.
    47  	initialResolveDelay = 60 * time.Second
    48  	maxResolveDelay     = time.Hour
    49  )
    50  
    51  // NodeDialer is used to connect to nodes in the network, typically by using
    52  // an underlying net.Dialer but also using net.Pipe in tests.
    53  type NodeDialer interface {
    54  	Dial(context.Context, *enode.Node) (net.Conn, error)
    55  }
    56  
    57  type nodeResolver interface {
    58  	Resolve(*enode.Node) *enode.Node
    59  }
    60  
    61  // tcpDialer implements NodeDialer using real TCP connections.
    62  type tcpDialer struct {
    63  	d *net.Dialer
    64  }
    65  
    66  func (t tcpDialer) Dial(ctx context.Context, dest *enode.Node) (net.Conn, error) {
    67  	return t.d.DialContext(ctx, "tcp", nodeAddr(dest).String())
    68  }
    69  
    70  func nodeAddr(n *enode.Node) net.Addr {
    71  	return &net.TCPAddr{IP: n.IP(), Port: n.TCP()}
    72  }
    73  
    74  // checkDial errors:
    75  var (
    76  	errSelf             = errors.New("is self")
    77  	errAlreadyDialing   = errors.New("already dialing")
    78  	errAlreadyConnected = errors.New("already connected")
    79  	errAlreadyListened  = errors.New("already listened")
    80  	errRecentlyDialed   = errors.New("recently dialed")
    81  	errNetRestrict      = errors.New("not contained in netrestrict list")
    82  	errIPRestrict       = errors.New("not contained in iprestrict list")
    83  	errNoPort           = errors.New("node does not provide TCP port")
    84  )
    85  
    86  // dialer creates outbound connections and submits them into Server.
    87  // Two types of peer connections can be created:
    88  //
    89  //   - static dials are pre-configured connections. The dialer attempts
    90  //     keep these nodes connected at all times.
    91  //
    92  //   - dynamic dials are created from node discovery results. The dialer
    93  //     continuously reads candidate nodes from its input iterator and attempts
    94  //     to create peer connections to nodes arriving through the iterator.
    95  type dialScheduler struct {
    96  	dialConfig
    97  	setupFunc   dialSetupFunc
    98  	wg          sync.WaitGroup
    99  	cancel      context.CancelFunc
   100  	ctx         context.Context
   101  	nodesIn     chan *enode.Node
   102  	doneCh      chan *dialTask
   103  	addStaticCh chan *enode.Node
   104  	remStaticCh chan *enode.Node
   105  	addPeerCh   chan *conn
   106  	remPeerCh   chan *conn
   107  
   108  	// Everything below here belongs to loop and
   109  	// should only be accessed by code on the loop goroutine.
   110  	dialing   map[enode.ID]*dialTask // active tasks
   111  	peers     map[enode.ID]connFlag  // all connected peers
   112  	dialPeers int                    // current number of dialed peers
   113  
   114  	// The static map tracks all static dial tasks. The subset of usable static dial tasks
   115  	// (i.e. those passing checkDial) is kept in staticPool. The scheduler prefers
   116  	// launching random static tasks from the pool over launching dynamic dials from the
   117  	// iterator.
   118  	static     map[enode.ID]*dialTask
   119  	staticPool []*dialTask
   120  
   121  	// The dial history keeps recently dialed nodes. Members of history are not dialed.
   122  	history          expHeap
   123  	historyTimer     mclock.Timer
   124  	historyTimerTime mclock.AbsTime
   125  
   126  	// for logStats
   127  	lastStatsLog     mclock.AbsTime
   128  	doneSinceLastLog int
   129  }
   130  
   131  type dialSetupFunc func(net.Conn, connFlag, *enode.Node) error
   132  
   133  type dialConfig struct {
   134  	self           enode.ID         // our own ID
   135  	maxDialPeers   int              // maximum number of dialed peers
   136  	maxActiveDials int              // maximum number of active dials
   137  	netRestrict    *netutil.Netlist // IP netrestrict list, disabled if nil
   138  	ipRestrict     []string         // IP address restrict list, disabled if nil
   139  	resolver       nodeResolver
   140  	dialer         NodeDialer
   141  	log            log.Logger
   142  	clock          mclock.Clock
   143  	rand           *mrand.Rand
   144  }
   145  
   146  func (cfg dialConfig) withDefaults() dialConfig {
   147  	if cfg.maxActiveDials == 0 {
   148  		cfg.maxActiveDials = defaultMaxPendingPeers
   149  	}
   150  	if cfg.log == nil {
   151  		cfg.log = log.Root()
   152  	}
   153  	if cfg.clock == nil {
   154  		cfg.clock = mclock.System{}
   155  	}
   156  	if cfg.rand == nil {
   157  		seedb := make([]byte, 8)
   158  		crand.Read(seedb)
   159  		seed := int64(binary.BigEndian.Uint64(seedb))
   160  		cfg.rand = mrand.New(mrand.NewSource(seed))
   161  	}
   162  	return cfg
   163  }
   164  
   165  func newDialScheduler(config dialConfig, it enode.Iterator, setupFunc dialSetupFunc) *dialScheduler {
   166  	d := &dialScheduler{
   167  		dialConfig:  config.withDefaults(),
   168  		setupFunc:   setupFunc,
   169  		dialing:     make(map[enode.ID]*dialTask),
   170  		static:      make(map[enode.ID]*dialTask),
   171  		peers:       make(map[enode.ID]connFlag),
   172  		doneCh:      make(chan *dialTask),
   173  		nodesIn:     make(chan *enode.Node),
   174  		addStaticCh: make(chan *enode.Node),
   175  		remStaticCh: make(chan *enode.Node),
   176  		addPeerCh:   make(chan *conn),
   177  		remPeerCh:   make(chan *conn),
   178  	}
   179  	d.lastStatsLog = d.clock.Now()
   180  	d.ctx, d.cancel = context.WithCancel(context.Background())
   181  	d.wg.Add(2)
   182  	go d.readNodes(it)
   183  	go d.loop(it)
   184  	return d
   185  }
   186  
   187  // stop shuts down the dialer, canceling all current dial tasks.
   188  func (d *dialScheduler) stop() {
   189  	d.cancel()
   190  	d.wg.Wait()
   191  }
   192  
   193  // addStatic adds a static dial candidate.
   194  func (d *dialScheduler) addStatic(n *enode.Node) {
   195  	select {
   196  	case d.addStaticCh <- n:
   197  	case <-d.ctx.Done():
   198  	}
   199  }
   200  
   201  // removeStatic removes a static dial candidate.
   202  func (d *dialScheduler) removeStatic(n *enode.Node) {
   203  	select {
   204  	case d.remStaticCh <- n:
   205  	case <-d.ctx.Done():
   206  	}
   207  }
   208  
   209  // peerAdded updates the peer set.
   210  func (d *dialScheduler) peerAdded(c *conn) {
   211  	select {
   212  	case d.addPeerCh <- c:
   213  	case <-d.ctx.Done():
   214  	}
   215  }
   216  
   217  // peerRemoved updates the peer set.
   218  func (d *dialScheduler) peerRemoved(c *conn) {
   219  	select {
   220  	case d.remPeerCh <- c:
   221  	case <-d.ctx.Done():
   222  	}
   223  }
   224  
   225  // loop is the main loop of the dialer.
   226  func (d *dialScheduler) loop(it enode.Iterator) {
   227  	var (
   228  		nodesCh    chan *enode.Node
   229  		historyExp = make(chan struct{}, 1)
   230  	)
   231  
   232  loop:
   233  	for {
   234  		// Launch new dials if slots are available.
   235  		slots := d.freeDialSlots()
   236  		slots -= d.startStaticDials(slots)
   237  		if slots > 0 {
   238  			nodesCh = d.nodesIn
   239  		} else {
   240  			nodesCh = nil
   241  		}
   242  		d.rearmHistoryTimer(historyExp)
   243  		d.logStats()
   244  
   245  		select {
   246  		case node := <-nodesCh:
   247  			if err := d.checkDial(node); err != nil {
   248  				d.log.Trace("Discarding dial candidate", "id", node.ID(), "ip", node.IP(), "reason", err)
   249  			} else {
   250  				d.startDial(newDialTask(node, dynDialedConn))
   251  			}
   252  
   253  		case task := <-d.doneCh:
   254  			id := task.dest.ID()
   255  			delete(d.dialing, id)
   256  			d.updateStaticPool(id)
   257  			d.doneSinceLastLog++
   258  
   259  		case c := <-d.addPeerCh:
   260  			if c.is(dynDialedConn) || c.is(staticDialedConn) {
   261  				d.dialPeers++
   262  			}
   263  			id := c.node.ID()
   264  			d.peers[id] = c.flags
   265  			// Remove from static pool because the node is now connected.
   266  			task := d.static[id]
   267  			if task != nil && task.staticPoolIndex >= 0 {
   268  				d.removeFromStaticPool(task.staticPoolIndex)
   269  			}
   270  			// TODO: cancel dials to connected peers
   271  
   272  		case c := <-d.remPeerCh:
   273  			if c.is(dynDialedConn) || c.is(staticDialedConn) {
   274  				d.dialPeers--
   275  			}
   276  			delete(d.peers, c.node.ID())
   277  			d.updateStaticPool(c.node.ID())
   278  
   279  		case node := <-d.addStaticCh:
   280  			id := node.ID()
   281  			_, exists := d.static[id]
   282  			d.log.Trace("Adding static node", "id", id, "ip", node.IP(), "added", !exists)
   283  			if exists {
   284  				continue loop
   285  			}
   286  			task := newDialTask(node, staticDialedConn)
   287  			d.static[id] = task
   288  			if d.checkDial(node) == nil {
   289  				d.addToStaticPool(task)
   290  			}
   291  
   292  		case node := <-d.remStaticCh:
   293  			id := node.ID()
   294  			task := d.static[id]
   295  			d.log.Trace("Removing static node", "id", id, "ok", task != nil)
   296  			if task != nil {
   297  				delete(d.static, id)
   298  				if task.staticPoolIndex >= 0 {
   299  					d.removeFromStaticPool(task.staticPoolIndex)
   300  				}
   301  			}
   302  
   303  		case <-historyExp:
   304  			d.expireHistory()
   305  
   306  		case <-d.ctx.Done():
   307  			it.Close()
   308  			break loop
   309  		}
   310  	}
   311  
   312  	d.stopHistoryTimer(historyExp)
   313  	for range d.dialing {
   314  		<-d.doneCh
   315  	}
   316  	d.wg.Done()
   317  }
   318  
   319  // readNodes runs in its own goroutine and delivers nodes from
   320  // the input iterator to the nodesIn channel.
   321  func (d *dialScheduler) readNodes(it enode.Iterator) {
   322  	defer d.wg.Done()
   323  
   324  	for it.Next() {
   325  		select {
   326  		case d.nodesIn <- it.Node():
   327  		case <-d.ctx.Done():
   328  		}
   329  	}
   330  }
   331  
   332  // logStats prints dialer statistics to the log. The message is suppressed when enough
   333  // peers are connected because users should only see it while their client is starting up
   334  // or comes back online.
   335  func (d *dialScheduler) logStats() {
   336  	now := d.clock.Now()
   337  	if d.lastStatsLog.Add(dialStatsLogInterval) > now {
   338  		return
   339  	}
   340  	if d.dialPeers < dialStatsPeerLimit && d.dialPeers < d.maxDialPeers {
   341  		d.log.Info("Looking for peers", "peercount", len(d.peers), "tried", d.doneSinceLastLog, "static", len(d.static))
   342  	}
   343  	d.doneSinceLastLog = 0
   344  	d.lastStatsLog = now
   345  }
   346  
   347  // rearmHistoryTimer configures d.historyTimer to fire when the
   348  // next item in d.history expires.
   349  func (d *dialScheduler) rearmHistoryTimer(ch chan struct{}) {
   350  	if len(d.history) == 0 || d.historyTimerTime == d.history.nextExpiry() {
   351  		return
   352  	}
   353  	d.stopHistoryTimer(ch)
   354  	d.historyTimerTime = d.history.nextExpiry()
   355  	timeout := time.Duration(d.historyTimerTime - d.clock.Now())
   356  	d.historyTimer = d.clock.AfterFunc(timeout, func() { ch <- struct{}{} })
   357  }
   358  
   359  // stopHistoryTimer stops the timer and drains the channel it sends on.
   360  func (d *dialScheduler) stopHistoryTimer(ch chan struct{}) {
   361  	if d.historyTimer != nil && !d.historyTimer.Stop() {
   362  		<-ch
   363  	}
   364  }
   365  
   366  // expireHistory removes expired items from d.history.
   367  func (d *dialScheduler) expireHistory() {
   368  	d.historyTimer.Stop()
   369  	d.historyTimer = nil
   370  	d.historyTimerTime = 0
   371  	d.history.expire(d.clock.Now(), func(hkey string) {
   372  		var id enode.ID
   373  		copy(id[:], hkey)
   374  		d.updateStaticPool(id)
   375  	})
   376  }
   377  
   378  // freeDialSlots returns the number of free dial slots. The result can be negative
   379  // when peers are connected while their task is still running.
   380  func (d *dialScheduler) freeDialSlots() int {
   381  	slots := (d.maxDialPeers - d.dialPeers) * 2
   382  	if slots > d.maxActiveDials {
   383  		slots = d.maxActiveDials
   384  	}
   385  	free := slots - len(d.dialing)
   386  	return free
   387  }
   388  
   389  // checkDial returns an error if node n should not be dialed.
   390  func (d *dialScheduler) checkDial(n *enode.Node) error {
   391  	if n.ID() == d.self {
   392  		return errSelf
   393  	}
   394  	if n.IP() != nil && n.TCP() == 0 {
   395  		// This check can trigger if a non-TCP node is found
   396  		// by discovery. If there is no IP, the node is a static
   397  		// node and the actual endpoint will be resolved later in dialTask.
   398  		return errNoPort
   399  	}
   400  	if _, ok := d.dialing[n.ID()]; ok {
   401  		return errAlreadyDialing
   402  	}
   403  	if _, ok := d.peers[n.ID()]; ok {
   404  		return errAlreadyConnected
   405  	}
   406  	if d.netRestrict != nil && !d.netRestrict.Contains(n.IP()) {
   407  		return errNetRestrict
   408  	}
   409  	if len(d.ipRestrict) > 0 && !contains(d.ipRestrict, n.IP().String()) {
   410  		return errIPRestrict
   411  	}
   412  	if d.history.contains(string(n.ID().Bytes())) {
   413  		return errRecentlyDialed
   414  	}
   415  	return nil
   416  }
   417  
   418  // startStaticDials starts n static dial tasks.
   419  func (d *dialScheduler) startStaticDials(n int) (started int) {
   420  	for started = 0; started < n && len(d.staticPool) > 0; started++ {
   421  		idx := d.rand.Intn(len(d.staticPool))
   422  		task := d.staticPool[idx]
   423  		d.startDial(task)
   424  		d.removeFromStaticPool(idx)
   425  	}
   426  	return started
   427  }
   428  
   429  // updateStaticPool attempts to move the given static dial back into staticPool.
   430  func (d *dialScheduler) updateStaticPool(id enode.ID) {
   431  	task, ok := d.static[id]
   432  	if ok && task.staticPoolIndex < 0 && d.checkDial(task.dest) == nil {
   433  		d.addToStaticPool(task)
   434  	}
   435  }
   436  
   437  func (d *dialScheduler) addToStaticPool(task *dialTask) {
   438  	if task.staticPoolIndex >= 0 {
   439  		panic("attempt to add task to staticPool twice")
   440  	}
   441  	d.staticPool = append(d.staticPool, task)
   442  	task.staticPoolIndex = len(d.staticPool) - 1
   443  }
   444  
   445  // removeFromStaticPool removes the task at idx from staticPool. It does that by moving the
   446  // current last element of the pool to idx and then shortening the pool by one.
   447  func (d *dialScheduler) removeFromStaticPool(idx int) {
   448  	task := d.staticPool[idx]
   449  	end := len(d.staticPool) - 1
   450  	d.staticPool[idx] = d.staticPool[end]
   451  	d.staticPool[idx].staticPoolIndex = idx
   452  	d.staticPool[end] = nil
   453  	d.staticPool = d.staticPool[:end]
   454  	task.staticPoolIndex = -1
   455  }
   456  
   457  // startDial runs the given dial task in a separate goroutine.
   458  func (d *dialScheduler) startDial(task *dialTask) {
   459  	d.log.Trace("Starting p2p dial", "id", task.dest.ID(), "ip", task.dest.IP(), "flag", task.flags)
   460  	hkey := string(task.dest.ID().Bytes())
   461  	d.history.add(hkey, d.clock.Now().Add(dialHistoryExpiration))
   462  	d.dialing[task.dest.ID()] = task
   463  	go func() {
   464  		task.run(d)
   465  		d.doneCh <- task
   466  	}()
   467  }
   468  
   469  // A dialTask generated for each node that is dialed.
   470  type dialTask struct {
   471  	staticPoolIndex int
   472  	flags           connFlag
   473  	// These fields are private to the task and should not be
   474  	// accessed by dialScheduler while the task is running.
   475  	dest         *enode.Node
   476  	lastResolved mclock.AbsTime
   477  	resolveDelay time.Duration
   478  }
   479  
   480  func newDialTask(dest *enode.Node, flags connFlag) *dialTask {
   481  	return &dialTask{dest: dest, flags: flags, staticPoolIndex: -1}
   482  }
   483  
   484  type dialError struct {
   485  	error
   486  }
   487  
   488  func (t *dialTask) run(d *dialScheduler) {
   489  	if t.needResolve() && !t.resolve(d) {
   490  		return
   491  	}
   492  
   493  	err := t.dial(d, t.dest)
   494  	if err != nil {
   495  		// For static nodes, resolve one more time if dialing fails.
   496  		if _, ok := err.(*dialError); ok && t.flags&staticDialedConn != 0 {
   497  			if t.resolve(d) {
   498  				t.dial(d, t.dest)
   499  			}
   500  		}
   501  	}
   502  }
   503  
   504  func (t *dialTask) needResolve() bool {
   505  	return t.flags&staticDialedConn != 0 && t.dest.IP() == nil
   506  }
   507  
   508  // resolve attempts to find the current endpoint for the destination
   509  // using discovery.
   510  //
   511  // Resolve operations are throttled with backoff to avoid flooding the
   512  // discovery network with useless queries for nodes that don't exist.
   513  // The backoff delay resets when the node is found.
   514  func (t *dialTask) resolve(d *dialScheduler) bool {
   515  	if d.resolver == nil {
   516  		return false
   517  	}
   518  	if t.resolveDelay == 0 {
   519  		t.resolveDelay = initialResolveDelay
   520  	}
   521  	if t.lastResolved > 0 && time.Duration(d.clock.Now()-t.lastResolved) < t.resolveDelay {
   522  		return false
   523  	}
   524  	resolved := d.resolver.Resolve(t.dest)
   525  	t.lastResolved = d.clock.Now()
   526  	if resolved == nil {
   527  		t.resolveDelay *= 2
   528  		if t.resolveDelay > maxResolveDelay {
   529  			t.resolveDelay = maxResolveDelay
   530  		}
   531  		d.log.Debug("Resolving node failed", "id", t.dest.ID(), "newdelay", t.resolveDelay)
   532  		return false
   533  	}
   534  	// The node was found.
   535  	t.resolveDelay = initialResolveDelay
   536  	t.dest = resolved
   537  	d.log.Debug("Resolved node", "id", t.dest.ID(), "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()})
   538  	return true
   539  }
   540  
   541  // dial performs the actual connection attempt.
   542  func (t *dialTask) dial(d *dialScheduler, dest *enode.Node) error {
   543  	fd, err := d.dialer.Dial(d.ctx, t.dest)
   544  	if err != nil {
   545  		d.log.Trace("Dial error", "id", t.dest.ID(), "addr", nodeAddr(t.dest), "conn", t.flags, "err", cleanupDialErr(err))
   546  		return &dialError{err}
   547  	}
   548  	mfd := newMeteredConn(fd, false, &net.TCPAddr{IP: dest.IP(), Port: dest.TCP()})
   549  	return d.setupFunc(mfd, t.flags, dest)
   550  }
   551  
   552  func (t *dialTask) String() string {
   553  	id := t.dest.ID()
   554  	return fmt.Sprintf("%v %x %v:%d", t.flags, id[:8], t.dest.IP(), t.dest.TCP())
   555  }
   556  
   557  func cleanupDialErr(err error) error {
   558  	if netErr, ok := err.(*net.OpError); ok && netErr.Op == "dial" {
   559  		return netErr.Err
   560  	}
   561  	return err
   562  }