github.com/pslzym/go-ethereum@v1.8.17-0.20180926104442-4b6824e07b1b/p2p/dial.go (about)

     1  // Copyright 2015 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package p2p
    18  
    19  import (
    20  	"container/heap"
    21  	"errors"
    22  	"fmt"
    23  	"net"
    24  	"time"
    25  
    26  	"github.com/ethereum/go-ethereum/log"
    27  	"github.com/ethereum/go-ethereum/p2p/enode"
    28  	"github.com/ethereum/go-ethereum/p2p/netutil"
    29  )
    30  
    31  const (
    32  	// This is the amount of time spent waiting in between
    33  	// redialing a certain node.
    34  	dialHistoryExpiration = 30 * time.Second
    35  
    36  	// Discovery lookups are throttled and can only run
    37  	// once every few seconds.
    38  	lookupInterval = 4 * time.Second
    39  
    40  	// If no peers are found for this amount of time, the initial bootnodes are
    41  	// attempted to be connected.
    42  	fallbackInterval = 20 * time.Second
    43  
    44  	// Endpoint resolution is throttled with bounded backoff.
    45  	initialResolveDelay = 60 * time.Second
    46  	maxResolveDelay     = time.Hour
    47  )
    48  
    49  // NodeDialer is used to connect to nodes in the network, typically by using
    50  // an underlying net.Dialer but also using net.Pipe in tests
    51  type NodeDialer interface {
    52  	Dial(*enode.Node) (net.Conn, error)
    53  }
    54  
    55  // TCPDialer implements the NodeDialer interface by using a net.Dialer to
    56  // create TCP connections to nodes in the network
    57  type TCPDialer struct {
    58  	*net.Dialer
    59  }
    60  
    61  // Dial creates a TCP connection to the node
    62  func (t TCPDialer) Dial(dest *enode.Node) (net.Conn, error) {
    63  	addr := &net.TCPAddr{IP: dest.IP(), Port: dest.TCP()}
    64  	return t.Dialer.Dial("tcp", addr.String())
    65  }
    66  
    67  // dialstate schedules dials and discovery lookups.
    68  // it get's a chance to compute new tasks on every iteration
    69  // of the main loop in Server.run.
    70  type dialstate struct {
    71  	maxDynDials int
    72  	ntab        discoverTable
    73  	netrestrict *netutil.Netlist
    74  
    75  	lookupRunning bool
    76  	dialing       map[enode.ID]connFlag
    77  	lookupBuf     []*enode.Node // current discovery lookup results
    78  	randomNodes   []*enode.Node // filled from Table
    79  	static        map[enode.ID]*dialTask
    80  	hist          *dialHistory
    81  
    82  	start     time.Time     // time when the dialer was first used
    83  	bootnodes []*enode.Node // default dials when there are no peers
    84  }
    85  
    86  type discoverTable interface {
    87  	Self() *enode.Node
    88  	Close()
    89  	Resolve(*enode.Node) *enode.Node
    90  	LookupRandom() []*enode.Node
    91  	ReadRandomNodes([]*enode.Node) int
    92  }
    93  
    94  // the dial history remembers recent dials.
    95  type dialHistory []pastDial
    96  
    97  // pastDial is an entry in the dial history.
    98  type pastDial struct {
    99  	id  enode.ID
   100  	exp time.Time
   101  }
   102  
   103  type task interface {
   104  	Do(*Server)
   105  }
   106  
   107  // A dialTask is generated for each node that is dialed. Its
   108  // fields cannot be accessed while the task is running.
   109  type dialTask struct {
   110  	flags        connFlag
   111  	dest         *enode.Node
   112  	lastResolved time.Time
   113  	resolveDelay time.Duration
   114  }
   115  
   116  // discoverTask runs discovery table operations.
   117  // Only one discoverTask is active at any time.
   118  // discoverTask.Do performs a random lookup.
   119  type discoverTask struct {
   120  	results []*enode.Node
   121  }
   122  
   123  // A waitExpireTask is generated if there are no other tasks
   124  // to keep the loop in Server.run ticking.
   125  type waitExpireTask struct {
   126  	time.Duration
   127  }
   128  
   129  func newDialState(static []*enode.Node, bootnodes []*enode.Node, ntab discoverTable, maxdyn int, netrestrict *netutil.Netlist) *dialstate {
   130  	s := &dialstate{
   131  		maxDynDials: maxdyn,
   132  		ntab:        ntab,
   133  		netrestrict: netrestrict,
   134  		static:      make(map[enode.ID]*dialTask),
   135  		dialing:     make(map[enode.ID]connFlag),
   136  		bootnodes:   make([]*enode.Node, len(bootnodes)),
   137  		randomNodes: make([]*enode.Node, maxdyn/2),
   138  		hist:        new(dialHistory),
   139  	}
   140  	copy(s.bootnodes, bootnodes)
   141  	for _, n := range static {
   142  		s.addStatic(n)
   143  	}
   144  	return s
   145  }
   146  
   147  func (s *dialstate) addStatic(n *enode.Node) {
   148  	// This overwrites the task instead of updating an existing
   149  	// entry, giving users the opportunity to force a resolve operation.
   150  	s.static[n.ID()] = &dialTask{flags: staticDialedConn, dest: n}
   151  }
   152  
   153  func (s *dialstate) removeStatic(n *enode.Node) {
   154  	// This removes a task so future attempts to connect will not be made.
   155  	delete(s.static, n.ID())
   156  	// This removes a previous dial timestamp so that application
   157  	// can force a server to reconnect with chosen peer immediately.
   158  	s.hist.remove(n.ID())
   159  }
   160  
   161  func (s *dialstate) newTasks(nRunning int, peers map[enode.ID]*Peer, now time.Time) []task {
   162  	if s.start.IsZero() {
   163  		s.start = now
   164  	}
   165  
   166  	var newtasks []task
   167  	addDial := func(flag connFlag, n *enode.Node) bool {
   168  		if err := s.checkDial(n, peers); err != nil {
   169  			log.Trace("Skipping dial candidate", "id", n.ID(), "addr", &net.TCPAddr{IP: n.IP(), Port: n.TCP()}, "err", err)
   170  			return false
   171  		}
   172  		s.dialing[n.ID()] = flag
   173  		newtasks = append(newtasks, &dialTask{flags: flag, dest: n})
   174  		return true
   175  	}
   176  
   177  	// Compute number of dynamic dials necessary at this point.
   178  	needDynDials := s.maxDynDials
   179  	for _, p := range peers {
   180  		if p.rw.is(dynDialedConn) {
   181  			needDynDials--
   182  		}
   183  	}
   184  	for _, flag := range s.dialing {
   185  		if flag&dynDialedConn != 0 {
   186  			needDynDials--
   187  		}
   188  	}
   189  
   190  	// Expire the dial history on every invocation.
   191  	s.hist.expire(now)
   192  
   193  	// Create dials for static nodes if they are not connected.
   194  	for id, t := range s.static {
   195  		err := s.checkDial(t.dest, peers)
   196  		switch err {
   197  		case errNotWhitelisted, errSelf:
   198  			log.Warn("Removing static dial candidate", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()}, "err", err)
   199  			delete(s.static, t.dest.ID())
   200  		case nil:
   201  			s.dialing[id] = t.flags
   202  			newtasks = append(newtasks, t)
   203  		}
   204  	}
   205  	// If we don't have any peers whatsoever, try to dial a random bootnode. This
   206  	// scenario is useful for the testnet (and private networks) where the discovery
   207  	// table might be full of mostly bad peers, making it hard to find good ones.
   208  	if len(peers) == 0 && len(s.bootnodes) > 0 && needDynDials > 0 && now.Sub(s.start) > fallbackInterval {
   209  		bootnode := s.bootnodes[0]
   210  		s.bootnodes = append(s.bootnodes[:0], s.bootnodes[1:]...)
   211  		s.bootnodes = append(s.bootnodes, bootnode)
   212  
   213  		if addDial(dynDialedConn, bootnode) {
   214  			needDynDials--
   215  		}
   216  	}
   217  	// Use random nodes from the table for half of the necessary
   218  	// dynamic dials.
   219  	randomCandidates := needDynDials / 2
   220  	if randomCandidates > 0 {
   221  		n := s.ntab.ReadRandomNodes(s.randomNodes)
   222  		for i := 0; i < randomCandidates && i < n; i++ {
   223  			if addDial(dynDialedConn, s.randomNodes[i]) {
   224  				needDynDials--
   225  			}
   226  		}
   227  	}
   228  	// Create dynamic dials from random lookup results, removing tried
   229  	// items from the result buffer.
   230  	i := 0
   231  	for ; i < len(s.lookupBuf) && needDynDials > 0; i++ {
   232  		if addDial(dynDialedConn, s.lookupBuf[i]) {
   233  			needDynDials--
   234  		}
   235  	}
   236  	s.lookupBuf = s.lookupBuf[:copy(s.lookupBuf, s.lookupBuf[i:])]
   237  	// Launch a discovery lookup if more candidates are needed.
   238  	if len(s.lookupBuf) < needDynDials && !s.lookupRunning {
   239  		s.lookupRunning = true
   240  		newtasks = append(newtasks, &discoverTask{})
   241  	}
   242  
   243  	// Launch a timer to wait for the next node to expire if all
   244  	// candidates have been tried and no task is currently active.
   245  	// This should prevent cases where the dialer logic is not ticked
   246  	// because there are no pending events.
   247  	if nRunning == 0 && len(newtasks) == 0 && s.hist.Len() > 0 {
   248  		t := &waitExpireTask{s.hist.min().exp.Sub(now)}
   249  		newtasks = append(newtasks, t)
   250  	}
   251  	return newtasks
   252  }
   253  
   254  var (
   255  	errSelf             = errors.New("is self")
   256  	errAlreadyDialing   = errors.New("already dialing")
   257  	errAlreadyConnected = errors.New("already connected")
   258  	errRecentlyDialed   = errors.New("recently dialed")
   259  	errNotWhitelisted   = errors.New("not contained in netrestrict whitelist")
   260  )
   261  
   262  func (s *dialstate) checkDial(n *enode.Node, peers map[enode.ID]*Peer) error {
   263  	_, dialing := s.dialing[n.ID()]
   264  	switch {
   265  	case dialing:
   266  		return errAlreadyDialing
   267  	case peers[n.ID()] != nil:
   268  		return errAlreadyConnected
   269  	case s.ntab != nil && n.ID() == s.ntab.Self().ID():
   270  		return errSelf
   271  	case s.netrestrict != nil && !s.netrestrict.Contains(n.IP()):
   272  		return errNotWhitelisted
   273  	case s.hist.contains(n.ID()):
   274  		return errRecentlyDialed
   275  	}
   276  	return nil
   277  }
   278  
   279  func (s *dialstate) taskDone(t task, now time.Time) {
   280  	switch t := t.(type) {
   281  	case *dialTask:
   282  		s.hist.add(t.dest.ID(), now.Add(dialHistoryExpiration))
   283  		delete(s.dialing, t.dest.ID())
   284  	case *discoverTask:
   285  		s.lookupRunning = false
   286  		s.lookupBuf = append(s.lookupBuf, t.results...)
   287  	}
   288  }
   289  
   290  func (t *dialTask) Do(srv *Server) {
   291  	if t.dest.Incomplete() {
   292  		if !t.resolve(srv) {
   293  			return
   294  		}
   295  	}
   296  	err := t.dial(srv, t.dest)
   297  	if err != nil {
   298  		log.Trace("Dial error", "task", t, "err", err)
   299  		// Try resolving the ID of static nodes if dialing failed.
   300  		if _, ok := err.(*dialError); ok && t.flags&staticDialedConn != 0 {
   301  			if t.resolve(srv) {
   302  				t.dial(srv, t.dest)
   303  			}
   304  		}
   305  	}
   306  }
   307  
   308  // resolve attempts to find the current endpoint for the destination
   309  // using discovery.
   310  //
   311  // Resolve operations are throttled with backoff to avoid flooding the
   312  // discovery network with useless queries for nodes that don't exist.
   313  // The backoff delay resets when the node is found.
   314  func (t *dialTask) resolve(srv *Server) bool {
   315  	if srv.ntab == nil {
   316  		log.Debug("Can't resolve node", "id", t.dest.ID, "err", "discovery is disabled")
   317  		return false
   318  	}
   319  	if t.resolveDelay == 0 {
   320  		t.resolveDelay = initialResolveDelay
   321  	}
   322  	if time.Since(t.lastResolved) < t.resolveDelay {
   323  		return false
   324  	}
   325  	resolved := srv.ntab.Resolve(t.dest)
   326  	t.lastResolved = time.Now()
   327  	if resolved == nil {
   328  		t.resolveDelay *= 2
   329  		if t.resolveDelay > maxResolveDelay {
   330  			t.resolveDelay = maxResolveDelay
   331  		}
   332  		log.Debug("Resolving node failed", "id", t.dest.ID, "newdelay", t.resolveDelay)
   333  		return false
   334  	}
   335  	// The node was found.
   336  	t.resolveDelay = initialResolveDelay
   337  	t.dest = resolved
   338  	log.Debug("Resolved node", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()})
   339  	return true
   340  }
   341  
   342  type dialError struct {
   343  	error
   344  }
   345  
   346  // dial performs the actual connection attempt.
   347  func (t *dialTask) dial(srv *Server, dest *enode.Node) error {
   348  	fd, err := srv.Dialer.Dial(dest)
   349  	if err != nil {
   350  		return &dialError{err}
   351  	}
   352  	mfd := newMeteredConn(fd, false)
   353  	return srv.SetupConn(mfd, t.flags, dest)
   354  }
   355  
   356  func (t *dialTask) String() string {
   357  	id := t.dest.ID()
   358  	return fmt.Sprintf("%v %x %v:%d", t.flags, id[:8], t.dest.IP(), t.dest.TCP())
   359  }
   360  
   361  func (t *discoverTask) Do(srv *Server) {
   362  	// newTasks generates a lookup task whenever dynamic dials are
   363  	// necessary. Lookups need to take some time, otherwise the
   364  	// event loop spins too fast.
   365  	next := srv.lastLookup.Add(lookupInterval)
   366  	if now := time.Now(); now.Before(next) {
   367  		time.Sleep(next.Sub(now))
   368  	}
   369  	srv.lastLookup = time.Now()
   370  	t.results = srv.ntab.LookupRandom()
   371  }
   372  
   373  func (t *discoverTask) String() string {
   374  	s := "discovery lookup"
   375  	if len(t.results) > 0 {
   376  		s += fmt.Sprintf(" (%d results)", len(t.results))
   377  	}
   378  	return s
   379  }
   380  
   381  func (t waitExpireTask) Do(*Server) {
   382  	time.Sleep(t.Duration)
   383  }
   384  func (t waitExpireTask) String() string {
   385  	return fmt.Sprintf("wait for dial hist expire (%v)", t.Duration)
   386  }
   387  
   388  // Use only these methods to access or modify dialHistory.
   389  func (h dialHistory) min() pastDial {
   390  	return h[0]
   391  }
   392  func (h *dialHistory) add(id enode.ID, exp time.Time) {
   393  	heap.Push(h, pastDial{id, exp})
   394  
   395  }
   396  func (h *dialHistory) remove(id enode.ID) bool {
   397  	for i, v := range *h {
   398  		if v.id == id {
   399  			heap.Remove(h, i)
   400  			return true
   401  		}
   402  	}
   403  	return false
   404  }
   405  func (h dialHistory) contains(id enode.ID) bool {
   406  	for _, v := range h {
   407  		if v.id == id {
   408  			return true
   409  		}
   410  	}
   411  	return false
   412  }
   413  func (h *dialHistory) expire(now time.Time) {
   414  	for h.Len() > 0 && h.min().exp.Before(now) {
   415  		heap.Pop(h)
   416  	}
   417  }
   418  
   419  // heap.Interface boilerplate
   420  func (h dialHistory) Len() int           { return len(h) }
   421  func (h dialHistory) Less(i, j int) bool { return h[i].exp.Before(h[j].exp) }
   422  func (h dialHistory) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
   423  func (h *dialHistory) Push(x interface{}) {
   424  	*h = append(*h, x.(pastDial))
   425  }
   426  func (h *dialHistory) Pop() interface{} {
   427  	old := *h
   428  	n := len(old)
   429  	x := old[n-1]
   430  	*h = old[0 : n-1]
   431  	return x
   432  }