github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/p2p/dial.go (about)

     1  // Copyright 2015 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package p2p
    18  
    19  import (
    20  	"context"
    21  	crand "crypto/rand"
    22  	"encoding/binary"
    23  	"errors"
    24  	"fmt"
    25  	mrand "math/rand"
    26  	"net"
    27  	"sync"
    28  	"sync/atomic"
    29  	"time"
    30  
    31  	"github.com/ethereum/go-ethereum/common/mclock"
    32  	"github.com/ethereum/go-ethereum/log"
    33  	"github.com/ethereum/go-ethereum/p2p/enode"
    34  	"github.com/ethereum/go-ethereum/p2p/netutil"
    35  )
    36  
    37  const (
    38  	// This is the amount of time spent waiting in between redialing a certain node. The
    39  	// limit is a bit higher than inboundThrottleTime to prevent failing dials in small
    40  	// private networks.
    41  	dialHistoryExpiration = inboundThrottleTime + 5*time.Second
    42  
    43  	// Config for the "Looking for peers" message.
    44  	dialStatsLogInterval = 10 * time.Second // printed at most this often
    45  	dialStatsPeerLimit   = 3                // but not if more than this many dialed peers
    46  
    47  	// Endpoint resolution is throttled with bounded backoff.
    48  	initialResolveDelay = 60 * time.Second
    49  	maxResolveDelay     = time.Hour
    50  )
    51  
    52  // NodeDialer is used to connect to nodes in the network, typically by using
    53  // an underlying net.Dialer but also using net.Pipe in tests.
    54  type NodeDialer interface {
    55  	Dial(context.Context, *enode.Node) (net.Conn, error)
    56  }
    57  
    58  type nodeResolver interface {
    59  	Resolve(*enode.Node) *enode.Node
    60  }
    61  
    62  // tcpDialer implements NodeDialer using real TCP connections.
    63  type tcpDialer struct {
    64  	d *net.Dialer
    65  }
    66  
    67  func (t tcpDialer) Dial(ctx context.Context, dest *enode.Node) (net.Conn, error) {
    68  	return t.d.DialContext(ctx, "tcp", nodeAddr(dest).String())
    69  }
    70  
    71  func nodeAddr(n *enode.Node) net.Addr {
    72  	return &net.TCPAddr{IP: n.IP(), Port: n.TCP()}
    73  }
    74  
    75  // checkDial errors:
    76  var (
    77  	errSelf             = errors.New("is self")
    78  	errAlreadyDialing   = errors.New("already dialing")
    79  	errAlreadyConnected = errors.New("already connected")
    80  	errRecentlyDialed   = errors.New("recently dialed")
    81  	errNetRestrict      = errors.New("not contained in netrestrict list")
    82  	errNoPort           = errors.New("node does not provide TCP port")
    83  )
    84  
    85  // dialer creates outbound connections and submits them into Server.
    86  // Two types of peer connections can be created:
    87  //
    88  //   - static dials are pre-configured connections. The dialer attempts
    89  //     keep these nodes connected at all times.
    90  //
    91  //   - dynamic dials are created from node discovery results. The dialer
    92  //     continuously reads candidate nodes from its input iterator and attempts
    93  //     to create peer connections to nodes arriving through the iterator.
    94  type dialScheduler struct {
    95  	dialConfig
    96  	setupFunc   dialSetupFunc
    97  	wg          sync.WaitGroup
    98  	cancel      context.CancelFunc
    99  	ctx         context.Context
   100  	nodesIn     chan *enode.Node
   101  	doneCh      chan *dialTask
   102  	addStaticCh chan *enode.Node
   103  	remStaticCh chan *enode.Node
   104  	addPeerCh   chan *conn
   105  	remPeerCh   chan *conn
   106  
   107  	// Everything below here belongs to loop and
   108  	// should only be accessed by code on the loop goroutine.
   109  	dialing   map[enode.ID]*dialTask // active tasks
   110  	peers     map[enode.ID]struct{}  // all connected peers
   111  	dialPeers int                    // current number of dialed peers
   112  
   113  	// The static map tracks all static dial tasks. The subset of usable static dial tasks
   114  	// (i.e. those passing checkDial) is kept in staticPool. The scheduler prefers
   115  	// launching random static tasks from the pool over launching dynamic dials from the
   116  	// iterator.
   117  	static     map[enode.ID]*dialTask
   118  	staticPool []*dialTask
   119  
   120  	// The dial history keeps recently dialed nodes. Members of history are not dialed.
   121  	history      expHeap
   122  	historyTimer *mclock.Alarm
   123  
   124  	// for logStats
   125  	lastStatsLog     mclock.AbsTime
   126  	doneSinceLastLog int
   127  }
   128  
   129  type dialSetupFunc func(net.Conn, connFlag, *enode.Node) error
   130  
   131  type dialConfig struct {
   132  	self           enode.ID         // our own ID
   133  	maxDialPeers   int              // maximum number of dialed peers
   134  	maxActiveDials int              // maximum number of active dials
   135  	netRestrict    *netutil.Netlist // IP netrestrict list, disabled if nil
   136  	resolver       nodeResolver
   137  	dialer         NodeDialer
   138  	log            log.Logger
   139  	clock          mclock.Clock
   140  	rand           *mrand.Rand
   141  }
   142  
   143  func (cfg dialConfig) withDefaults() dialConfig {
   144  	if cfg.maxActiveDials == 0 {
   145  		cfg.maxActiveDials = defaultMaxPendingPeers
   146  	}
   147  	if cfg.log == nil {
   148  		cfg.log = log.Root()
   149  	}
   150  	if cfg.clock == nil {
   151  		cfg.clock = mclock.System{}
   152  	}
   153  	if cfg.rand == nil {
   154  		seedb := make([]byte, 8)
   155  		crand.Read(seedb)
   156  		seed := int64(binary.BigEndian.Uint64(seedb))
   157  		cfg.rand = mrand.New(mrand.NewSource(seed))
   158  	}
   159  	return cfg
   160  }
   161  
   162  func newDialScheduler(config dialConfig, it enode.Iterator, setupFunc dialSetupFunc) *dialScheduler {
   163  	cfg := config.withDefaults()
   164  	d := &dialScheduler{
   165  		dialConfig:   cfg,
   166  		historyTimer: mclock.NewAlarm(cfg.clock),
   167  		setupFunc:    setupFunc,
   168  		dialing:      make(map[enode.ID]*dialTask),
   169  		static:       make(map[enode.ID]*dialTask),
   170  		peers:        make(map[enode.ID]struct{}),
   171  		doneCh:       make(chan *dialTask),
   172  		nodesIn:      make(chan *enode.Node),
   173  		addStaticCh:  make(chan *enode.Node),
   174  		remStaticCh:  make(chan *enode.Node),
   175  		addPeerCh:    make(chan *conn),
   176  		remPeerCh:    make(chan *conn),
   177  	}
   178  	d.lastStatsLog = d.clock.Now()
   179  	d.ctx, d.cancel = context.WithCancel(context.Background())
   180  	d.wg.Add(2)
   181  	go d.readNodes(it)
   182  	go d.loop(it)
   183  	return d
   184  }
   185  
   186  // stop shuts down the dialer, canceling all current dial tasks.
   187  func (d *dialScheduler) stop() {
   188  	d.cancel()
   189  	d.wg.Wait()
   190  }
   191  
   192  // addStatic adds a static dial candidate.
   193  func (d *dialScheduler) addStatic(n *enode.Node) {
   194  	select {
   195  	case d.addStaticCh <- n:
   196  	case <-d.ctx.Done():
   197  	}
   198  }
   199  
   200  // removeStatic removes a static dial candidate.
   201  func (d *dialScheduler) removeStatic(n *enode.Node) {
   202  	select {
   203  	case d.remStaticCh <- n:
   204  	case <-d.ctx.Done():
   205  	}
   206  }
   207  
   208  // peerAdded updates the peer set.
   209  func (d *dialScheduler) peerAdded(c *conn) {
   210  	select {
   211  	case d.addPeerCh <- c:
   212  	case <-d.ctx.Done():
   213  	}
   214  }
   215  
   216  // peerRemoved updates the peer set.
   217  func (d *dialScheduler) peerRemoved(c *conn) {
   218  	select {
   219  	case d.remPeerCh <- c:
   220  	case <-d.ctx.Done():
   221  	}
   222  }
   223  
   224  // loop is the main loop of the dialer.
   225  func (d *dialScheduler) loop(it enode.Iterator) {
   226  	var (
   227  		nodesCh chan *enode.Node
   228  	)
   229  
   230  loop:
   231  	for {
   232  		// Launch new dials if slots are available.
   233  		slots := d.freeDialSlots()
   234  		slots -= d.startStaticDials(slots)
   235  		if slots > 0 {
   236  			nodesCh = d.nodesIn
   237  		} else {
   238  			nodesCh = nil
   239  		}
   240  		d.rearmHistoryTimer()
   241  		d.logStats()
   242  
   243  		select {
   244  		case node := <-nodesCh:
   245  			if err := d.checkDial(node); err != nil {
   246  				d.log.Trace("Discarding dial candidate", "id", node.ID(), "ip", node.IP(), "reason", err)
   247  			} else {
   248  				d.startDial(newDialTask(node, dynDialedConn))
   249  			}
   250  
   251  		case task := <-d.doneCh:
   252  			id := task.dest().ID()
   253  			delete(d.dialing, id)
   254  			d.updateStaticPool(id)
   255  			d.doneSinceLastLog++
   256  
   257  		case c := <-d.addPeerCh:
   258  			if c.is(dynDialedConn) || c.is(staticDialedConn) {
   259  				d.dialPeers++
   260  			}
   261  			id := c.node.ID()
   262  			d.peers[id] = struct{}{}
   263  			// Remove from static pool because the node is now connected.
   264  			task := d.static[id]
   265  			if task != nil && task.staticPoolIndex >= 0 {
   266  				d.removeFromStaticPool(task.staticPoolIndex)
   267  			}
   268  			// TODO: cancel dials to connected peers
   269  
   270  		case c := <-d.remPeerCh:
   271  			if c.is(dynDialedConn) || c.is(staticDialedConn) {
   272  				d.dialPeers--
   273  			}
   274  			delete(d.peers, c.node.ID())
   275  			d.updateStaticPool(c.node.ID())
   276  
   277  		case node := <-d.addStaticCh:
   278  			id := node.ID()
   279  			_, exists := d.static[id]
   280  			d.log.Trace("Adding static node", "id", id, "ip", node.IP(), "added", !exists)
   281  			if exists {
   282  				continue loop
   283  			}
   284  			task := newDialTask(node, staticDialedConn)
   285  			d.static[id] = task
   286  			if d.checkDial(node) == nil {
   287  				d.addToStaticPool(task)
   288  			}
   289  
   290  		case node := <-d.remStaticCh:
   291  			id := node.ID()
   292  			task := d.static[id]
   293  			d.log.Trace("Removing static node", "id", id, "ok", task != nil)
   294  			if task != nil {
   295  				delete(d.static, id)
   296  				if task.staticPoolIndex >= 0 {
   297  					d.removeFromStaticPool(task.staticPoolIndex)
   298  				}
   299  			}
   300  
   301  		case <-d.historyTimer.C():
   302  			d.expireHistory()
   303  
   304  		case <-d.ctx.Done():
   305  			it.Close()
   306  			break loop
   307  		}
   308  	}
   309  
   310  	d.historyTimer.Stop()
   311  	for range d.dialing {
   312  		<-d.doneCh
   313  	}
   314  	d.wg.Done()
   315  }
   316  
   317  // readNodes runs in its own goroutine and delivers nodes from
   318  // the input iterator to the nodesIn channel.
   319  func (d *dialScheduler) readNodes(it enode.Iterator) {
   320  	defer d.wg.Done()
   321  
   322  	for it.Next() {
   323  		select {
   324  		case d.nodesIn <- it.Node():
   325  		case <-d.ctx.Done():
   326  		}
   327  	}
   328  }
   329  
   330  // logStats prints dialer statistics to the log. The message is suppressed when enough
   331  // peers are connected because users should only see it while their client is starting up
   332  // or comes back online.
   333  func (d *dialScheduler) logStats() {
   334  	now := d.clock.Now()
   335  	if d.lastStatsLog.Add(dialStatsLogInterval) > now {
   336  		return
   337  	}
   338  	if d.dialPeers < dialStatsPeerLimit && d.dialPeers < d.maxDialPeers {
   339  		d.log.Info("Looking for peers", "peercount", len(d.peers), "tried", d.doneSinceLastLog, "static", len(d.static))
   340  	}
   341  	d.doneSinceLastLog = 0
   342  	d.lastStatsLog = now
   343  }
   344  
   345  // rearmHistoryTimer configures d.historyTimer to fire when the
   346  // next item in d.history expires.
   347  func (d *dialScheduler) rearmHistoryTimer() {
   348  	if len(d.history) == 0 {
   349  		return
   350  	}
   351  	d.historyTimer.Schedule(d.history.nextExpiry())
   352  }
   353  
   354  // expireHistory removes expired items from d.history.
   355  func (d *dialScheduler) expireHistory() {
   356  	d.history.expire(d.clock.Now(), func(hkey string) {
   357  		var id enode.ID
   358  		copy(id[:], hkey)
   359  		d.updateStaticPool(id)
   360  	})
   361  }
   362  
   363  // freeDialSlots returns the number of free dial slots. The result can be negative
   364  // when peers are connected while their task is still running.
   365  func (d *dialScheduler) freeDialSlots() int {
   366  	slots := (d.maxDialPeers - d.dialPeers) * 2
   367  	if slots > d.maxActiveDials {
   368  		slots = d.maxActiveDials
   369  	}
   370  	free := slots - len(d.dialing)
   371  	return free
   372  }
   373  
   374  // checkDial returns an error if node n should not be dialed.
   375  func (d *dialScheduler) checkDial(n *enode.Node) error {
   376  	if n.ID() == d.self {
   377  		return errSelf
   378  	}
   379  	if n.IP() != nil && n.TCP() == 0 {
   380  		// This check can trigger if a non-TCP node is found
   381  		// by discovery. If there is no IP, the node is a static
   382  		// node and the actual endpoint will be resolved later in dialTask.
   383  		return errNoPort
   384  	}
   385  	if _, ok := d.dialing[n.ID()]; ok {
   386  		return errAlreadyDialing
   387  	}
   388  	if _, ok := d.peers[n.ID()]; ok {
   389  		return errAlreadyConnected
   390  	}
   391  	if d.netRestrict != nil && !d.netRestrict.Contains(n.IP()) {
   392  		return errNetRestrict
   393  	}
   394  	if d.history.contains(string(n.ID().Bytes())) {
   395  		return errRecentlyDialed
   396  	}
   397  	return nil
   398  }
   399  
   400  // startStaticDials starts n static dial tasks.
   401  func (d *dialScheduler) startStaticDials(n int) (started int) {
   402  	for started = 0; started < n && len(d.staticPool) > 0; started++ {
   403  		idx := d.rand.Intn(len(d.staticPool))
   404  		task := d.staticPool[idx]
   405  		d.startDial(task)
   406  		d.removeFromStaticPool(idx)
   407  	}
   408  	return started
   409  }
   410  
   411  // updateStaticPool attempts to move the given static dial back into staticPool.
   412  func (d *dialScheduler) updateStaticPool(id enode.ID) {
   413  	task, ok := d.static[id]
   414  	if ok && task.staticPoolIndex < 0 && d.checkDial(task.dest()) == nil {
   415  		d.addToStaticPool(task)
   416  	}
   417  }
   418  
   419  func (d *dialScheduler) addToStaticPool(task *dialTask) {
   420  	if task.staticPoolIndex >= 0 {
   421  		panic("attempt to add task to staticPool twice")
   422  	}
   423  	d.staticPool = append(d.staticPool, task)
   424  	task.staticPoolIndex = len(d.staticPool) - 1
   425  }
   426  
   427  // removeFromStaticPool removes the task at idx from staticPool. It does that by moving the
   428  // current last element of the pool to idx and then shortening the pool by one.
   429  func (d *dialScheduler) removeFromStaticPool(idx int) {
   430  	task := d.staticPool[idx]
   431  	end := len(d.staticPool) - 1
   432  	d.staticPool[idx] = d.staticPool[end]
   433  	d.staticPool[idx].staticPoolIndex = idx
   434  	d.staticPool[end] = nil
   435  	d.staticPool = d.staticPool[:end]
   436  	task.staticPoolIndex = -1
   437  }
   438  
   439  // startDial runs the given dial task in a separate goroutine.
   440  func (d *dialScheduler) startDial(task *dialTask) {
   441  	node := task.dest()
   442  	d.log.Trace("Starting p2p dial", "id", node.ID(), "ip", node.IP(), "flag", task.flags)
   443  	hkey := string(node.ID().Bytes())
   444  	d.history.add(hkey, d.clock.Now().Add(dialHistoryExpiration))
   445  	d.dialing[node.ID()] = task
   446  	go func() {
   447  		task.run(d)
   448  		d.doneCh <- task
   449  	}()
   450  }
   451  
   452  // A dialTask generated for each node that is dialed.
   453  type dialTask struct {
   454  	staticPoolIndex int
   455  	flags           connFlag
   456  
   457  	// These fields are private to the task and should not be
   458  	// accessed by dialScheduler while the task is running.
   459  	destPtr      atomic.Pointer[enode.Node]
   460  	lastResolved mclock.AbsTime
   461  	resolveDelay time.Duration
   462  }
   463  
   464  func newDialTask(dest *enode.Node, flags connFlag) *dialTask {
   465  	t := &dialTask{flags: flags, staticPoolIndex: -1}
   466  	t.destPtr.Store(dest)
   467  	return t
   468  }
   469  
   470  type dialError struct {
   471  	error
   472  }
   473  
   474  func (t *dialTask) dest() *enode.Node {
   475  	return t.destPtr.Load()
   476  }
   477  
   478  func (t *dialTask) run(d *dialScheduler) {
   479  	if t.needResolve() && !t.resolve(d) {
   480  		return
   481  	}
   482  
   483  	err := t.dial(d, t.dest())
   484  	if err != nil {
   485  		// For static nodes, resolve one more time if dialing fails.
   486  		if _, ok := err.(*dialError); ok && t.flags&staticDialedConn != 0 {
   487  			if t.resolve(d) {
   488  				t.dial(d, t.dest())
   489  			}
   490  		}
   491  	}
   492  }
   493  
   494  func (t *dialTask) needResolve() bool {
   495  	return t.flags&staticDialedConn != 0 && t.dest().IP() == nil
   496  }
   497  
   498  // resolve attempts to find the current endpoint for the destination
   499  // using discovery.
   500  //
   501  // Resolve operations are throttled with backoff to avoid flooding the
   502  // discovery network with useless queries for nodes that don't exist.
   503  // The backoff delay resets when the node is found.
   504  func (t *dialTask) resolve(d *dialScheduler) bool {
   505  	if d.resolver == nil {
   506  		return false
   507  	}
   508  	if t.resolveDelay == 0 {
   509  		t.resolveDelay = initialResolveDelay
   510  	}
   511  	if t.lastResolved > 0 && time.Duration(d.clock.Now()-t.lastResolved) < t.resolveDelay {
   512  		return false
   513  	}
   514  
   515  	node := t.dest()
   516  	resolved := d.resolver.Resolve(node)
   517  	t.lastResolved = d.clock.Now()
   518  	if resolved == nil {
   519  		t.resolveDelay *= 2
   520  		if t.resolveDelay > maxResolveDelay {
   521  			t.resolveDelay = maxResolveDelay
   522  		}
   523  		d.log.Debug("Resolving node failed", "id", node.ID(), "newdelay", t.resolveDelay)
   524  		return false
   525  	}
   526  	// The node was found.
   527  	t.resolveDelay = initialResolveDelay
   528  	t.destPtr.Store(resolved)
   529  	d.log.Debug("Resolved node", "id", resolved.ID(), "addr", &net.TCPAddr{IP: resolved.IP(), Port: resolved.TCP()})
   530  	return true
   531  }
   532  
   533  // dial performs the actual connection attempt.
   534  func (t *dialTask) dial(d *dialScheduler, dest *enode.Node) error {
   535  	dialMeter.Mark(1)
   536  	fd, err := d.dialer.Dial(d.ctx, dest)
   537  	if err != nil {
   538  		d.log.Trace("Dial error", "id", dest.ID(), "addr", nodeAddr(dest), "conn", t.flags, "err", cleanupDialErr(err))
   539  		dialConnectionError.Mark(1)
   540  		return &dialError{err}
   541  	}
   542  	return d.setupFunc(newMeteredConn(fd), t.flags, dest)
   543  }
   544  
   545  func (t *dialTask) String() string {
   546  	node := t.dest()
   547  	id := node.ID()
   548  	return fmt.Sprintf("%v %x %v:%d", t.flags, id[:8], node.IP(), node.TCP())
   549  }
   550  
   551  func cleanupDialErr(err error) error {
   552  	if netErr, ok := err.(*net.OpError); ok && netErr.Op == "dial" {
   553  		return netErr.Err
   554  	}
   555  	return err
   556  }