github.com/Elemental-core/elementalcore@v0.0.0-20191206075037-63891242267a/p2p/dial.go (about)

     1  // Copyright 2015 The elementalcore Authors
     2  // This file is part of the elementalcore library.
     3  //
     4  // The elementalcore library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The elementalcore library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the elementalcore library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package p2p
    18  
    19  import (
    20  	"container/heap"
    21  	"crypto/rand"
    22  	"errors"
    23  	"fmt"
    24  	"net"
    25  	"time"
    26  
    27  	"github.com/Elemental-core/elementalcore/log"
    28  	"github.com/Elemental-core/elementalcore/p2p/discover"
    29  	"github.com/Elemental-core/elementalcore/p2p/netutil"
    30  )
    31  
    32  const (
    33  	// This is the amount of time spent waiting in between
    34  	// redialing a certain node.
    35  	dialHistoryExpiration = 30 * time.Second
    36  
    37  	// Discovery lookups are throttled and can only run
    38  	// once every few seconds.
    39  	lookupInterval = 4 * time.Second
    40  
    41  	// If no peers are found for this amount of time, the initial bootnodes are
    42  	// attempted to be connected.
    43  	fallbackInterval = 20 * time.Second
    44  
    45  	// Endpoint resolution is throttled with bounded backoff.
    46  	initialResolveDelay = 60 * time.Second
    47  	maxResolveDelay     = time.Hour
    48  )
    49  
    50  // NodeDialer is used to connect to nodes in the network, typically by using
    51  // an underlying net.Dialer but also using net.Pipe in tests
    52  type NodeDialer interface {
    53  	Dial(*discover.Node) (net.Conn, error)
    54  }
    55  
    56  // TCPDialer implements the NodeDialer interface by using a net.Dialer to
    57  // create TCP connections to nodes in the network
    58  type TCPDialer struct {
    59  	*net.Dialer
    60  }
    61  
    62  // Dial creates a TCP connection to the node
    63  func (t TCPDialer) Dial(dest *discover.Node) (net.Conn, error) {
    64  	addr := &net.TCPAddr{IP: dest.IP, Port: int(dest.TCP)}
    65  	return t.Dialer.Dial("tcp", addr.String())
    66  }
    67  
    68  // dialstate schedules dials and discovery lookups.
    69  // it get's a chance to compute new tasks on every iteration
    70  // of the main loop in Server.run.
    71  type dialstate struct {
    72  	maxDynDials int
    73  	ntab        discoverTable
    74  	netrestrict *netutil.Netlist
    75  
    76  	lookupRunning bool
    77  	dialing       map[discover.NodeID]connFlag
    78  	lookupBuf     []*discover.Node // current discovery lookup results
    79  	randomNodes   []*discover.Node // filled from Table
    80  	static        map[discover.NodeID]*dialTask
    81  	hist          *dialHistory
    82  
    83  	start     time.Time        // time when the dialer was first used
    84  	bootnodes []*discover.Node // default dials when there are no peers
    85  }
    86  
    87  type discoverTable interface {
    88  	Self() *discover.Node
    89  	Close()
    90  	Resolve(target discover.NodeID) *discover.Node
    91  	Lookup(target discover.NodeID) []*discover.Node
    92  	ReadRandomNodes([]*discover.Node) int
    93  }
    94  
    95  // the dial history remembers recent dials.
    96  type dialHistory []pastDial
    97  
    98  // pastDial is an entry in the dial history.
    99  type pastDial struct {
   100  	id  discover.NodeID
   101  	exp time.Time
   102  }
   103  
   104  type task interface {
   105  	Do(*Server)
   106  }
   107  
   108  // A dialTask is generated for each node that is dialed. Its
   109  // fields cannot be accessed while the task is running.
   110  type dialTask struct {
   111  	flags        connFlag
   112  	dest         *discover.Node
   113  	lastResolved time.Time
   114  	resolveDelay time.Duration
   115  }
   116  
   117  // discoverTask runs discovery table operations.
   118  // Only one discoverTask is active at any time.
   119  // discoverTask.Do performs a random lookup.
   120  type discoverTask struct {
   121  	results []*discover.Node
   122  }
   123  
   124  // A waitExpireTask is generated if there are no other tasks
   125  // to keep the loop in Server.run ticking.
   126  type waitExpireTask struct {
   127  	time.Duration
   128  }
   129  
   130  func newDialState(static []*discover.Node, bootnodes []*discover.Node, ntab discoverTable, maxdyn int, netrestrict *netutil.Netlist) *dialstate {
   131  	s := &dialstate{
   132  		maxDynDials: maxdyn,
   133  		ntab:        ntab,
   134  		netrestrict: netrestrict,
   135  		static:      make(map[discover.NodeID]*dialTask),
   136  		dialing:     make(map[discover.NodeID]connFlag),
   137  		bootnodes:   make([]*discover.Node, len(bootnodes)),
   138  		randomNodes: make([]*discover.Node, maxdyn/2),
   139  		hist:        new(dialHistory),
   140  	}
   141  	copy(s.bootnodes, bootnodes)
   142  	for _, n := range static {
   143  		s.addStatic(n)
   144  	}
   145  	return s
   146  }
   147  
   148  func (s *dialstate) addStatic(n *discover.Node) {
   149  	// This overwites the task instead of updating an existing
   150  	// entry, giving users the opportunity to force a resolve operation.
   151  	s.static[n.ID] = &dialTask{flags: staticDialedConn, dest: n}
   152  }
   153  
   154  func (s *dialstate) removeStatic(n *discover.Node) {
   155  	// This removes a task so future attempts to connect will not be made.
   156  	delete(s.static, n.ID)
   157  }
   158  
   159  func (s *dialstate) newTasks(nRunning int, peers map[discover.NodeID]*Peer, now time.Time) []task {
   160  	if s.start == (time.Time{}) {
   161  		s.start = now
   162  	}
   163  
   164  	var newtasks []task
   165  	addDial := func(flag connFlag, n *discover.Node) bool {
   166  		if err := s.checkDial(n, peers); err != nil {
   167  			log.Trace("Skipping dial candidate", "id", n.ID, "addr", &net.TCPAddr{IP: n.IP, Port: int(n.TCP)}, "err", err)
   168  			return false
   169  		}
   170  		s.dialing[n.ID] = flag
   171  		newtasks = append(newtasks, &dialTask{flags: flag, dest: n})
   172  		return true
   173  	}
   174  
   175  	// Compute number of dynamic dials necessary at this point.
   176  	needDynDials := s.maxDynDials
   177  	for _, p := range peers {
   178  		if p.rw.is(dynDialedConn) {
   179  			needDynDials--
   180  		}
   181  	}
   182  	for _, flag := range s.dialing {
   183  		if flag&dynDialedConn != 0 {
   184  			needDynDials--
   185  		}
   186  	}
   187  
   188  	// Expire the dial history on every invocation.
   189  	s.hist.expire(now)
   190  
   191  	// Create dials for static nodes if they are not connected.
   192  	for id, t := range s.static {
   193  		err := s.checkDial(t.dest, peers)
   194  		switch err {
   195  		case errNotWhitelisted, errSelf:
   196  			log.Warn("Removing static dial candidate", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP, Port: int(t.dest.TCP)}, "err", err)
   197  			delete(s.static, t.dest.ID)
   198  		case nil:
   199  			s.dialing[id] = t.flags
   200  			newtasks = append(newtasks, t)
   201  		}
   202  	}
   203  	// If we don't have any peers whatsoever, try to dial a random bootnode. This
   204  	// scenario is useful for the testnet (and private networks) where the discovery
   205  	// table might be full of mostly bad peers, making it hard to find good ones.
   206  	if len(peers) == 0 && len(s.bootnodes) > 0 && needDynDials > 0 && now.Sub(s.start) > fallbackInterval {
   207  		bootnode := s.bootnodes[0]
   208  		s.bootnodes = append(s.bootnodes[:0], s.bootnodes[1:]...)
   209  		s.bootnodes = append(s.bootnodes, bootnode)
   210  
   211  		if addDial(dynDialedConn, bootnode) {
   212  			needDynDials--
   213  		}
   214  	}
   215  	// Use random nodes from the table for half of the necessary
   216  	// dynamic dials.
   217  	randomCandidates := needDynDials / 2
   218  	if randomCandidates > 0 {
   219  		n := s.ntab.ReadRandomNodes(s.randomNodes)
   220  		for i := 0; i < randomCandidates && i < n; i++ {
   221  			if addDial(dynDialedConn, s.randomNodes[i]) {
   222  				needDynDials--
   223  			}
   224  		}
   225  	}
   226  	// Create dynamic dials from random lookup results, removing tried
   227  	// items from the result buffer.
   228  	i := 0
   229  	for ; i < len(s.lookupBuf) && needDynDials > 0; i++ {
   230  		if addDial(dynDialedConn, s.lookupBuf[i]) {
   231  			needDynDials--
   232  		}
   233  	}
   234  	s.lookupBuf = s.lookupBuf[:copy(s.lookupBuf, s.lookupBuf[i:])]
   235  	// Launch a discovery lookup if more candidates are needed.
   236  	if len(s.lookupBuf) < needDynDials && !s.lookupRunning {
   237  		s.lookupRunning = true
   238  		newtasks = append(newtasks, &discoverTask{})
   239  	}
   240  
   241  	// Launch a timer to wait for the next node to expire if all
   242  	// candidates have been tried and no task is currently active.
   243  	// This should prevent cases where the dialer logic is not ticked
   244  	// because there are no pending events.
   245  	if nRunning == 0 && len(newtasks) == 0 && s.hist.Len() > 0 {
   246  		t := &waitExpireTask{s.hist.min().exp.Sub(now)}
   247  		newtasks = append(newtasks, t)
   248  	}
   249  	return newtasks
   250  }
   251  
   252  var (
   253  	errSelf             = errors.New("is self")
   254  	errAlreadyDialing   = errors.New("already dialing")
   255  	errAlreadyConnected = errors.New("already connected")
   256  	errRecentlyDialed   = errors.New("recently dialed")
   257  	errNotWhitelisted   = errors.New("not contained in netrestrict whitelist")
   258  )
   259  
   260  func (s *dialstate) checkDial(n *discover.Node, peers map[discover.NodeID]*Peer) error {
   261  	_, dialing := s.dialing[n.ID]
   262  	switch {
   263  	case dialing:
   264  		return errAlreadyDialing
   265  	case peers[n.ID] != nil:
   266  		return errAlreadyConnected
   267  	case s.ntab != nil && n.ID == s.ntab.Self().ID:
   268  		return errSelf
   269  	case s.netrestrict != nil && !s.netrestrict.Contains(n.IP):
   270  		return errNotWhitelisted
   271  	case s.hist.contains(n.ID):
   272  		return errRecentlyDialed
   273  	}
   274  	return nil
   275  }
   276  
   277  func (s *dialstate) taskDone(t task, now time.Time) {
   278  	switch t := t.(type) {
   279  	case *dialTask:
   280  		s.hist.add(t.dest.ID, now.Add(dialHistoryExpiration))
   281  		delete(s.dialing, t.dest.ID)
   282  	case *discoverTask:
   283  		s.lookupRunning = false
   284  		s.lookupBuf = append(s.lookupBuf, t.results...)
   285  	}
   286  }
   287  
   288  func (t *dialTask) Do(srv *Server) {
   289  	if t.dest.Incomplete() {
   290  		if !t.resolve(srv) {
   291  			return
   292  		}
   293  	}
   294  	success := t.dial(srv, t.dest)
   295  	// Try resolving the ID of static nodes if dialing failed.
   296  	if !success && t.flags&staticDialedConn != 0 {
   297  		if t.resolve(srv) {
   298  			t.dial(srv, t.dest)
   299  		}
   300  	}
   301  }
   302  
   303  // resolve attempts to find the current endpoint for the destination
   304  // using discovery.
   305  //
   306  // Resolve operations are throttled with backoff to avoid flooding the
   307  // discovery network with useless queries for nodes that don't exist.
   308  // The backoff delay resets when the node is found.
   309  func (t *dialTask) resolve(srv *Server) bool {
   310  	if srv.ntab == nil {
   311  		log.Debug("Can't resolve node", "id", t.dest.ID, "err", "discovery is disabled")
   312  		return false
   313  	}
   314  	if t.resolveDelay == 0 {
   315  		t.resolveDelay = initialResolveDelay
   316  	}
   317  	if time.Since(t.lastResolved) < t.resolveDelay {
   318  		return false
   319  	}
   320  	resolved := srv.ntab.Resolve(t.dest.ID)
   321  	t.lastResolved = time.Now()
   322  	if resolved == nil {
   323  		t.resolveDelay *= 2
   324  		if t.resolveDelay > maxResolveDelay {
   325  			t.resolveDelay = maxResolveDelay
   326  		}
   327  		log.Debug("Resolving node failed", "id", t.dest.ID, "newdelay", t.resolveDelay)
   328  		return false
   329  	}
   330  	// The node was found.
   331  	t.resolveDelay = initialResolveDelay
   332  	t.dest = resolved
   333  	log.Debug("Resolved node", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP, Port: int(t.dest.TCP)})
   334  	return true
   335  }
   336  
   337  // dial performs the actual connection attempt.
   338  func (t *dialTask) dial(srv *Server, dest *discover.Node) bool {
   339  	fd, err := srv.Dialer.Dial(dest)
   340  	if err != nil {
   341  		log.Trace("Dial error", "task", t, "err", err)
   342  		return false
   343  	}
   344  	mfd := newMeteredConn(fd, false)
   345  	srv.SetupConn(mfd, t.flags, dest)
   346  	return true
   347  }
   348  
   349  func (t *dialTask) String() string {
   350  	return fmt.Sprintf("%v %x %v:%d", t.flags, t.dest.ID[:8], t.dest.IP, t.dest.TCP)
   351  }
   352  
   353  func (t *discoverTask) Do(srv *Server) {
   354  	// newTasks generates a lookup task whenever dynamic dials are
   355  	// necessary. Lookups need to take some time, otherwise the
   356  	// event loop spins too fast.
   357  	next := srv.lastLookup.Add(lookupInterval)
   358  	if now := time.Now(); now.Before(next) {
   359  		time.Sleep(next.Sub(now))
   360  	}
   361  	srv.lastLookup = time.Now()
   362  	var target discover.NodeID
   363  	rand.Read(target[:])
   364  	t.results = srv.ntab.Lookup(target)
   365  }
   366  
   367  func (t *discoverTask) String() string {
   368  	s := "discovery lookup"
   369  	if len(t.results) > 0 {
   370  		s += fmt.Sprintf(" (%d results)", len(t.results))
   371  	}
   372  	return s
   373  }
   374  
   375  func (t waitExpireTask) Do(*Server) {
   376  	time.Sleep(t.Duration)
   377  }
   378  func (t waitExpireTask) String() string {
   379  	return fmt.Sprintf("wait for dial hist expire (%v)", t.Duration)
   380  }
   381  
   382  // Use only these methods to access or modify dialHistory.
   383  func (h dialHistory) min() pastDial {
   384  	return h[0]
   385  }
   386  func (h *dialHistory) add(id discover.NodeID, exp time.Time) {
   387  	heap.Push(h, pastDial{id, exp})
   388  }
   389  func (h dialHistory) contains(id discover.NodeID) bool {
   390  	for _, v := range h {
   391  		if v.id == id {
   392  			return true
   393  		}
   394  	}
   395  	return false
   396  }
   397  func (h *dialHistory) expire(now time.Time) {
   398  	for h.Len() > 0 && h.min().exp.Before(now) {
   399  		heap.Pop(h)
   400  	}
   401  }
   402  
   403  // heap.Interface boilerplate
   404  func (h dialHistory) Len() int           { return len(h) }
   405  func (h dialHistory) Less(i, j int) bool { return h[i].exp.Before(h[j].exp) }
   406  func (h dialHistory) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
   407  func (h *dialHistory) Push(x interface{}) {
   408  	*h = append(*h, x.(pastDial))
   409  }
   410  func (h *dialHistory) Pop() interface{} {
   411  	old := *h
   412  	n := len(old)
   413  	x := old[n-1]
   414  	*h = old[0 : n-1]
   415  	return x
   416  }