github.com/csquan/dpos-go-ethereum@v1.9.7/p2p/dial.go (about)

     1  // Copyright 2015 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package p2p
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"net"
    23  	"time"
    24  
    25  	"github.com/ethereum/go-ethereum/log"
    26  	"github.com/ethereum/go-ethereum/p2p/enode"
    27  	"github.com/ethereum/go-ethereum/p2p/netutil"
    28  )
    29  
    30  const (
    31  	// This is the amount of time spent waiting in between redialing a certain node. The
    32  	// limit is a bit higher than inboundThrottleTime to prevent failing dials in small
    33  	// private networks.
    34  	dialHistoryExpiration = inboundThrottleTime + 5*time.Second
    35  
    36  	// If no peers are found for this amount of time, the initial bootnodes are dialed.
    37  	fallbackInterval = 20 * time.Second
    38  
    39  	// Endpoint resolution is throttled with bounded backoff.
    40  	initialResolveDelay = 60 * time.Second
    41  	maxResolveDelay     = time.Hour
    42  )
    43  
    44  // NodeDialer is used to connect to nodes in the network, typically by using
    45  // an underlying net.Dialer but also using net.Pipe in tests
    46  type NodeDialer interface {
    47  	Dial(*enode.Node) (net.Conn, error)
    48  }
    49  
    50  type nodeResolver interface {
    51  	Resolve(*enode.Node) *enode.Node
    52  }
    53  
    54  // TCPDialer implements the NodeDialer interface by using a net.Dialer to
    55  // create TCP connections to nodes in the network
    56  type TCPDialer struct {
    57  	*net.Dialer
    58  }
    59  
    60  // Dial creates a TCP connection to the node
    61  func (t TCPDialer) Dial(dest *enode.Node) (net.Conn, error) {
    62  	addr := &net.TCPAddr{IP: dest.IP(), Port: dest.TCP()}
    63  	return t.Dialer.Dial("tcp", addr.String())
    64  }
    65  
    66  // dialstate schedules dials and discovery lookups.
    67  // It gets a chance to compute new tasks on every iteration
    68  // of the main loop in Server.run.
    69  type dialstate struct {
    70  	maxDynDials int
    71  	netrestrict *netutil.Netlist
    72  	self        enode.ID
    73  	bootnodes   []*enode.Node // default dials when there are no peers
    74  	log         log.Logger
    75  
    76  	start         time.Time // time when the dialer was first used
    77  	lookupRunning bool
    78  	dialing       map[enode.ID]connFlag
    79  	lookupBuf     []*enode.Node // current discovery lookup results
    80  	static        map[enode.ID]*dialTask
    81  	hist          expHeap
    82  }
    83  
    84  type task interface {
    85  	Do(*Server)
    86  }
    87  
    88  func newDialState(self enode.ID, maxdyn int, cfg *Config) *dialstate {
    89  	s := &dialstate{
    90  		maxDynDials: maxdyn,
    91  		self:        self,
    92  		netrestrict: cfg.NetRestrict,
    93  		log:         cfg.Logger,
    94  		static:      make(map[enode.ID]*dialTask),
    95  		dialing:     make(map[enode.ID]connFlag),
    96  		bootnodes:   make([]*enode.Node, len(cfg.BootstrapNodes)),
    97  	}
    98  	copy(s.bootnodes, cfg.BootstrapNodes)
    99  	if s.log == nil {
   100  		s.log = log.Root()
   101  	}
   102  	for _, n := range cfg.StaticNodes {
   103  		s.addStatic(n)
   104  	}
   105  	return s
   106  }
   107  
   108  func (s *dialstate) addStatic(n *enode.Node) {
   109  	// This overwrites the task instead of updating an existing
   110  	// entry, giving users the opportunity to force a resolve operation.
   111  	s.static[n.ID()] = &dialTask{flags: staticDialedConn, dest: n}
   112  }
   113  
   114  func (s *dialstate) removeStatic(n *enode.Node) {
   115  	// This removes a task so future attempts to connect will not be made.
   116  	delete(s.static, n.ID())
   117  }
   118  
   119  func (s *dialstate) newTasks(nRunning int, peers map[enode.ID]*Peer, now time.Time) []task {
   120  	var newtasks []task
   121  	addDial := func(flag connFlag, n *enode.Node) bool {
   122  		if err := s.checkDial(n, peers); err != nil {
   123  			s.log.Trace("Skipping dial candidate", "id", n.ID(), "addr", &net.TCPAddr{IP: n.IP(), Port: n.TCP()}, "err", err)
   124  			return false
   125  		}
   126  		s.dialing[n.ID()] = flag
   127  		newtasks = append(newtasks, &dialTask{flags: flag, dest: n})
   128  		return true
   129  	}
   130  
   131  	if s.start.IsZero() {
   132  		s.start = now
   133  	}
   134  	s.hist.expire(now)
   135  
   136  	// Create dials for static nodes if they are not connected.
   137  	for id, t := range s.static {
   138  		err := s.checkDial(t.dest, peers)
   139  		switch err {
   140  		case errNotWhitelisted, errSelf:
   141  			s.log.Warn("Removing static dial candidate", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()}, "err", err)
   142  			delete(s.static, t.dest.ID())
   143  		case nil:
   144  			s.dialing[id] = t.flags
   145  			newtasks = append(newtasks, t)
   146  		}
   147  	}
   148  
   149  	// Compute number of dynamic dials needed.
   150  	needDynDials := s.maxDynDials
   151  	for _, p := range peers {
   152  		if p.rw.is(dynDialedConn) {
   153  			needDynDials--
   154  		}
   155  	}
   156  	for _, flag := range s.dialing {
   157  		if flag&dynDialedConn != 0 {
   158  			needDynDials--
   159  		}
   160  	}
   161  
   162  	// If we don't have any peers whatsoever, try to dial a random bootnode. This
   163  	// scenario is useful for the testnet (and private networks) where the discovery
   164  	// table might be full of mostly bad peers, making it hard to find good ones.
   165  	if len(peers) == 0 && len(s.bootnodes) > 0 && needDynDials > 0 && now.Sub(s.start) > fallbackInterval {
   166  		bootnode := s.bootnodes[0]
   167  		s.bootnodes = append(s.bootnodes[:0], s.bootnodes[1:]...)
   168  		s.bootnodes = append(s.bootnodes, bootnode)
   169  		if addDial(dynDialedConn, bootnode) {
   170  			needDynDials--
   171  		}
   172  	}
   173  
   174  	// Create dynamic dials from discovery results.
   175  	i := 0
   176  	for ; i < len(s.lookupBuf) && needDynDials > 0; i++ {
   177  		if addDial(dynDialedConn, s.lookupBuf[i]) {
   178  			needDynDials--
   179  		}
   180  	}
   181  	s.lookupBuf = s.lookupBuf[:copy(s.lookupBuf, s.lookupBuf[i:])]
   182  
   183  	// Launch a discovery lookup if more candidates are needed.
   184  	if len(s.lookupBuf) < needDynDials && !s.lookupRunning {
   185  		s.lookupRunning = true
   186  		newtasks = append(newtasks, &discoverTask{want: needDynDials - len(s.lookupBuf)})
   187  	}
   188  
   189  	// Launch a timer to wait for the next node to expire if all
   190  	// candidates have been tried and no task is currently active.
   191  	// This should prevent cases where the dialer logic is not ticked
   192  	// because there are no pending events.
   193  	if nRunning == 0 && len(newtasks) == 0 && s.hist.Len() > 0 {
   194  		t := &waitExpireTask{s.hist.nextExpiry().Sub(now)}
   195  		newtasks = append(newtasks, t)
   196  	}
   197  	return newtasks
   198  }
   199  
   200  var (
   201  	errSelf             = errors.New("is self")
   202  	errAlreadyDialing   = errors.New("already dialing")
   203  	errAlreadyConnected = errors.New("already connected")
   204  	errRecentlyDialed   = errors.New("recently dialed")
   205  	errNotWhitelisted   = errors.New("not contained in netrestrict whitelist")
   206  )
   207  
   208  func (s *dialstate) checkDial(n *enode.Node, peers map[enode.ID]*Peer) error {
   209  	_, dialing := s.dialing[n.ID()]
   210  	switch {
   211  	case dialing:
   212  		return errAlreadyDialing
   213  	case peers[n.ID()] != nil:
   214  		return errAlreadyConnected
   215  	case n.ID() == s.self:
   216  		return errSelf
   217  	case s.netrestrict != nil && !s.netrestrict.Contains(n.IP()):
   218  		return errNotWhitelisted
   219  	case s.hist.contains(string(n.ID().Bytes())):
   220  		return errRecentlyDialed
   221  	}
   222  	return nil
   223  }
   224  
   225  func (s *dialstate) taskDone(t task, now time.Time) {
   226  	switch t := t.(type) {
   227  	case *dialTask:
   228  		s.hist.add(string(t.dest.ID().Bytes()), now.Add(dialHistoryExpiration))
   229  		delete(s.dialing, t.dest.ID())
   230  	case *discoverTask:
   231  		s.lookupRunning = false
   232  		s.lookupBuf = append(s.lookupBuf, t.results...)
   233  	}
   234  }
   235  
   236  // A dialTask is generated for each node that is dialed. Its
   237  // fields cannot be accessed while the task is running.
   238  type dialTask struct {
   239  	flags        connFlag
   240  	dest         *enode.Node
   241  	lastResolved time.Time
   242  	resolveDelay time.Duration
   243  }
   244  
   245  func (t *dialTask) Do(srv *Server) {
   246  	if t.dest.Incomplete() {
   247  		if !t.resolve(srv) {
   248  			return
   249  		}
   250  	}
   251  	err := t.dial(srv, t.dest)
   252  	if err != nil {
   253  		srv.log.Trace("Dial error", "task", t, "err", err)
   254  		// Try resolving the ID of static nodes if dialing failed.
   255  		if _, ok := err.(*dialError); ok && t.flags&staticDialedConn != 0 {
   256  			if t.resolve(srv) {
   257  				t.dial(srv, t.dest)
   258  			}
   259  		}
   260  	}
   261  }
   262  
   263  // resolve attempts to find the current endpoint for the destination
   264  // using discovery.
   265  //
   266  // Resolve operations are throttled with backoff to avoid flooding the
   267  // discovery network with useless queries for nodes that don't exist.
   268  // The backoff delay resets when the node is found.
   269  func (t *dialTask) resolve(srv *Server) bool {
   270  	if srv.staticNodeResolver == nil {
   271  		srv.log.Debug("Can't resolve node", "id", t.dest.ID(), "err", "discovery is disabled")
   272  		return false
   273  	}
   274  	if t.resolveDelay == 0 {
   275  		t.resolveDelay = initialResolveDelay
   276  	}
   277  	if time.Since(t.lastResolved) < t.resolveDelay {
   278  		return false
   279  	}
   280  	resolved := srv.staticNodeResolver.Resolve(t.dest)
   281  	t.lastResolved = time.Now()
   282  	if resolved == nil {
   283  		t.resolveDelay *= 2
   284  		if t.resolveDelay > maxResolveDelay {
   285  			t.resolveDelay = maxResolveDelay
   286  		}
   287  		srv.log.Debug("Resolving node failed", "id", t.dest.ID(), "newdelay", t.resolveDelay)
   288  		return false
   289  	}
   290  	// The node was found.
   291  	t.resolveDelay = initialResolveDelay
   292  	t.dest = resolved
   293  	srv.log.Debug("Resolved node", "id", t.dest.ID(), "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()})
   294  	return true
   295  }
   296  
   297  type dialError struct {
   298  	error
   299  }
   300  
   301  // dial performs the actual connection attempt.
   302  func (t *dialTask) dial(srv *Server, dest *enode.Node) error {
   303  	fd, err := srv.Dialer.Dial(dest)
   304  	if err != nil {
   305  		return &dialError{err}
   306  	}
   307  	mfd := newMeteredConn(fd, false, dest.IP())
   308  	return srv.SetupConn(mfd, t.flags, dest)
   309  }
   310  
   311  func (t *dialTask) String() string {
   312  	id := t.dest.ID()
   313  	return fmt.Sprintf("%v %x %v:%d", t.flags, id[:8], t.dest.IP(), t.dest.TCP())
   314  }
   315  
   316  // discoverTask runs discovery table operations.
   317  // Only one discoverTask is active at any time.
   318  // discoverTask.Do performs a random lookup.
   319  type discoverTask struct {
   320  	want    int
   321  	results []*enode.Node
   322  }
   323  
   324  func (t *discoverTask) Do(srv *Server) {
   325  	t.results = enode.ReadNodes(srv.discmix, t.want)
   326  }
   327  
   328  func (t *discoverTask) String() string {
   329  	s := "discovery query"
   330  	if len(t.results) > 0 {
   331  		s += fmt.Sprintf(" (%d results)", len(t.results))
   332  	} else {
   333  		s += fmt.Sprintf(" (want %d)", t.want)
   334  	}
   335  	return s
   336  }
   337  
   338  // A waitExpireTask is generated if there are no other tasks
   339  // to keep the loop in Server.run ticking.
   340  type waitExpireTask struct {
   341  	time.Duration
   342  }
   343  
   344  func (t waitExpireTask) Do(*Server) {
   345  	time.Sleep(t.Duration)
   346  }
   347  func (t waitExpireTask) String() string {
   348  	return fmt.Sprintf("wait for dial hist expire (%v)", t.Duration)
   349  }