github.com/quinndk/ethereum_read@v0.0.0-20181211143958-29c55eec3237/go-ethereum-master_read/p2p/dial.go (about)

     1  // Copyright 2015 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package p2p
    18  
    19  import (
    20  	"container/heap"
    21  	"crypto/rand"
    22  	"errors"
    23  	"fmt"
    24  	"net"
    25  	"time"
    26  
    27  	"github.com/ethereum/go-ethereum/log"
    28  	"github.com/ethereum/go-ethereum/p2p/discover"
    29  	"github.com/ethereum/go-ethereum/p2p/netutil"
    30  )
    31  
    32  const (
    33  	// This is the amount of time spent waiting in between
    34  	// redialing a certain node.
    35  	dialHistoryExpiration = 30 * time.Second
    36  
    37  	// Discovery lookups are throttled and can only run
    38  	// once every few seconds.
    39  	lookupInterval = 4 * time.Second
    40  
    41  	// If no peers are found for this amount of time, the initial bootnodes are
    42  	// attempted to be connected.
    43  	fallbackInterval = 20 * time.Second
    44  
    45  	// Endpoint resolution is throttled with bounded backoff.
    46  	initialResolveDelay = 60 * time.Second
    47  	maxResolveDelay     = time.Hour
    48  )
    49  
    50  // NodeDialer is used to connect to nodes in the network, typically by using
    51  // an underlying net.Dialer but also using net.Pipe in tests
    52  // 连接网络中的节点,通常使用底层的net.Dialer,但也在测试中使用net.Pipe
    53  type NodeDialer interface {
    54  	Dial(*discover.Node) (net.Conn, error)
    55  }
    56  
    57  // TCPDialer implements the NodeDialer interface by using a net.Dialer to
    58  // create TCP connections to nodes in the network
    59  // 通过使用net.Dialer创建与网络中节点的TCP连接来实现NodeDialer接口
    60  type TCPDialer struct {
    61  	*net.Dialer
    62  }
    63  
    64  // Dial creates a TCP connection to the node
    65  // 与节点创建一个tcp连接
    66  func (t TCPDialer) Dial(dest *discover.Node) (net.Conn, error) {
    67  	addr := &net.TCPAddr{IP: dest.IP, Port: int(dest.TCP)}
    68  	return t.Dialer.Dial("tcp", addr.String())
    69  }
    70  
    71  // dialstate schedules dials and discovery lookups.
    72  // it get's a chance to compute new tasks on every iteration
    73  // of the main loop in Server.run.
    74  type dialstate struct {
    75  
    76  	// 最大的动态节点连接数
    77  	maxDynDials int
    78  	// discoverTable接口实现节点查询
    79  	ntab        discoverTable
    80  	netrestrict *netutil.Netlist
    81  
    82  	lookupRunning bool
    83  	// 正在连接的节点
    84  	dialing       map[discover.NodeID]connFlag
    85  	// 当前查询的节点结果
    86  	lookupBuf     []*discover.Node // current discovery lookup results
    87  	// 从k桶表随机查询的节点
    88  	randomNodes   []*discover.Node // filled from Table
    89  	// 静态节点
    90  	static        map[discover.NodeID]*dialTask
    91  	// 连接历史
    92  	hist          *dialHistory
    93  
    94  	// dialer首次使用的时间
    95  	start     time.Time        // time when the dialer was first used
    96  	// 内置节点,没有找到其他节点  连接这些节点
    97  	bootnodes []*discover.Node // default dials when there are no peers
    98  }
    99  
   100  type discoverTable interface {
   101  	Self() *discover.Node
   102  	Close()
   103  	Resolve(target discover.NodeID) *discover.Node
   104  	Lookup(target discover.NodeID) []*discover.Node
   105  	ReadRandomNodes([]*discover.Node) int
   106  }
   107  
   108  // the dial history remembers recent dials.
   109  type dialHistory []pastDial
   110  
   111  // pastDial is an entry in the dial history.
   112  type pastDial struct {
   113  	id  discover.NodeID
   114  	exp time.Time
   115  }
   116  
   117  type task interface {
   118  	Do(*Server)
   119  }
   120  
   121  // A dialTask is generated for each node that is dialed. Its
   122  // fields cannot be accessed while the task is running.
   123  // 每个连接的节点会生成一个dialTask
   124  type dialTask struct {
   125  	flags        connFlag
   126  	dest         *discover.Node
   127  	lastResolved time.Time
   128  	resolveDelay time.Duration
   129  }
   130  
   131  // discoverTask runs discovery table operations.
   132  // Only one discoverTask is active at any time.
   133  // discoverTask.Do performs a random lookup.
   134  // 发现节点任务
   135  type discoverTask struct {
   136  	results []*discover.Node
   137  }
   138  
   139  // A waitExpireTask is generated if there are no other tasks
   140  // to keep the loop in Server.run ticking.
   141  // 如果没有任务在server.run中循环就会生成waitExpireTask任务
   142  type waitExpireTask struct {
   143  	time.Duration
   144  }
   145  
   146  func newDialState(static []*discover.Node, bootnodes []*discover.Node, ntab discoverTable, maxdyn int, netrestrict *netutil.Netlist) *dialstate {
   147  	s := &dialstate{
   148  		maxDynDials: maxdyn,
   149  		ntab:        ntab,
   150  		netrestrict: netrestrict,
   151  		static:      make(map[discover.NodeID]*dialTask),
   152  		dialing:     make(map[discover.NodeID]connFlag),
   153  		bootnodes:   make([]*discover.Node, len(bootnodes)),
   154  		randomNodes: make([]*discover.Node, maxdyn/2),
   155  		hist:        new(dialHistory),
   156  	}
   157  	copy(s.bootnodes, bootnodes)
   158  	for _, n := range static {
   159  		s.addStatic(n)
   160  	}
   161  	return s
   162  }
   163  
   164  func (s *dialstate) addStatic(n *discover.Node) {
   165  	// This overwites the task instead of updating an existing
   166  	// entry, giving users the opportunity to force a resolve operation.
   167  	s.static[n.ID] = &dialTask{flags: staticDialedConn, dest: n}
   168  }
   169  
   170  func (s *dialstate) removeStatic(n *discover.Node) {
   171  	// This removes a task so future attempts to connect will not be made.
   172  	delete(s.static, n.ID)
   173  	// This removes a previous dial timestamp so that application
   174  	// can force a server to reconnect with chosen peer immediately.
   175  	s.hist.remove(n.ID)
   176  }
   177  
   178  // 新建一个任务
   179  func (s *dialstate) newTasks(nRunning int, peers map[discover.NodeID]*Peer, now time.Time) []task {
   180  	if s.start.IsZero() {
   181  		s.start = now
   182  	}
   183  
   184  	var newtasks []task
   185  	// 检查节点,然后设置状态,最后把节点加入newtasks队列
   186  	addDial := func(flag connFlag, n *discover.Node) bool {
   187  		if err := s.checkDial(n, peers); err != nil {
   188  			log.Trace("Skipping dial candidate", "id", n.ID, "addr", &net.TCPAddr{IP: n.IP, Port: int(n.TCP)}, "err", err)
   189  			return false
   190  		}
   191  		s.dialing[n.ID] = flag
   192  		newtasks = append(newtasks, &dialTask{flags: flag, dest: n})
   193  		return true
   194  	}
   195  
   196  	// Compute number of dynamic dials necessary at this point.
   197  	// 计算所需的动态连接数
   198  	needDynDials := s.maxDynDials
   199  	// 首先统计已经建立连接的节点中动态连接数
   200  	for _, p := range peers {
   201  		// 动态类型
   202  		if p.rw.is(dynDialedConn) {
   203  			needDynDials--
   204  		}
   205  	}
   206  	// 其次统计正在建立的连接的动态连接数
   207  	for _, flag := range s.dialing {
   208  		if flag&dynDialedConn != 0 {
   209  			needDynDials--
   210  		}
   211  	}
   212  
   213  	// Expire the dial history on every invocation.
   214  	// 每次调用使连接记录到期
   215  	s.hist.expire(now)
   216  
   217  	// Create dials for static nodes if they are not connected.
   218  	// 为所有静态节点建立连接
   219  	for id, t := range s.static {
   220  		err := s.checkDial(t.dest, peers)
   221  		switch err {
   222  		case errNotWhitelisted, errSelf:
   223  			log.Warn("Removing static dial candidate", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP, Port: int(t.dest.TCP)}, "err", err)
   224  			delete(s.static, t.dest.ID)
   225  		case nil:
   226  			s.dialing[id] = t.flags
   227  			newtasks = append(newtasks, t)
   228  		}
   229  	}
   230  	// If we don't have any peers whatsoever, try to dial a random bootnode. This
   231  	// scenario is useful for the testnet (and private networks) where the discovery
   232  	// table might be full of mostly bad peers, making it hard to find good ones.
   233  	// 当前还没有任何连接,并且fallbackInterval时间内仍未创建连接 使用内置节点
   234  	if len(peers) == 0 && len(s.bootnodes) > 0 && needDynDials > 0 && now.Sub(s.start) > fallbackInterval {
   235  		bootnode := s.bootnodes[0]
   236  		s.bootnodes = append(s.bootnodes[:0], s.bootnodes[1:]...)
   237  		s.bootnodes = append(s.bootnodes, bootnode)
   238  
   239  		if addDial(dynDialedConn, bootnode) {
   240  			needDynDials--
   241  		}
   242  	}
   243  	// Use random nodes from the table for half of the necessary
   244  	// dynamic dials.
   245  	// 使用1/2的随机节点创建连接
   246  	randomCandidates := needDynDials / 2
   247  	if randomCandidates > 0 {
   248  		n := s.ntab.ReadRandomNodes(s.randomNodes)
   249  		for i := 0; i < randomCandidates && i < n; i++ {
   250  			if addDial(dynDialedConn, s.randomNodes[i]) {
   251  				needDynDials--
   252  			}
   253  		}
   254  	}
   255  	// Create dynamic dials from random lookup results, removing tried
   256  	// items from the result buffer.
   257  	// 为随机查找的节点创建动态连接,并从结果缓冲区中删除尝试的节点
   258  	i := 0
   259  	for ; i < len(s.lookupBuf) && needDynDials > 0; i++ {
   260  		if addDial(dynDialedConn, s.lookupBuf[i]) {
   261  			needDynDials--
   262  		}
   263  	}
   264  	s.lookupBuf = s.lookupBuf[:copy(s.lookupBuf, s.lookupBuf[i:])]
   265  	// Launch a discovery lookup if more candidates are needed.
   266  	// 如果还需要更多的连接,则启动发现节点
   267  	if len(s.lookupBuf) < needDynDials && !s.lookupRunning {
   268  		s.lookupRunning = true
   269  		newtasks = append(newtasks, &discoverTask{})
   270  	}
   271  
   272  	// Launch a timer to wait for the next node to expire if all
   273  	// candidates have been tried and no task is currently active.
   274  	// This should prevent cases where the dialer logic is not ticked
   275  	// because there are no pending events.
   276  	// 如果当前没有任何任务,创建一个waitExpireTask
   277  	if nRunning == 0 && len(newtasks) == 0 && s.hist.Len() > 0 {
   278  		t := &waitExpireTask{s.hist.min().exp.Sub(now)}
   279  		newtasks = append(newtasks, t)
   280  	}
   281  	return newtasks
   282  }
   283  
   284  var (
   285  	errSelf             = errors.New("is self")
   286  	errAlreadyDialing   = errors.New("already dialing")
   287  	errAlreadyConnected = errors.New("already connected")
   288  	errRecentlyDialed   = errors.New("recently dialed")
   289  	errNotWhitelisted   = errors.New("not contained in netrestrict whitelist")
   290  )
   291  
   292  // 检查dial状态(是否需要创建连接)
   293  func (s *dialstate) checkDial(n *discover.Node, peers map[discover.NodeID]*Peer) error {
   294  	_, dialing := s.dialing[n.ID]
   295  	switch {
   296  	case dialing:
   297  		// 正在创建
   298  		return errAlreadyDialing
   299  	case peers[n.ID] != nil
   300  		// 已经创建过连接
   301  		return errAlreadyConnected
   302  	case s.ntab != nil && n.ID == s.ntab.Self().ID:
   303  		// 创建的对象不是自己
   304  		return errSelf
   305  	case s.netrestrict != nil && !s.netrestrict.Contains(n.IP):
   306  		// 网络限制。对方IP不在白名单
   307  		return errNotWhitelisted
   308  	case s.hist.contains(n.ID):
   309  		return errRecentlyDialed
   310  	}
   311  	return nil
   312  }
   313  
   314  func (s *dialstate) taskDone(t task, now time.Time) {
   315  	switch t := t.(type) {
   316  	case *dialTask:
   317  		s.hist.add(t.dest.ID, now.Add(dialHistoryExpiration))
   318  		delete(s.dialing, t.dest.ID)
   319  	case *discoverTask:
   320  		s.lookupRunning = false
   321  		s.lookupBuf = append(s.lookupBuf, t.results...)
   322  	}
   323  }
   324  
   325  func (t *dialTask) Do(srv *Server) {
   326  	// 目标节点dest ip地址为空 使用resolve方法去查找目标节点并解析出ip地址
   327  	if t.dest.Incomplete() {
   328  		if !t.resolve(srv) {
   329  			return
   330  		}
   331  	}
   332  	// 建立连接
   333  	err := t.dial(srv, t.dest)
   334  	if err != nil {
   335  		log.Trace("Dial error", "task", t, "err", err)
   336  		// Try resolving the ID of static nodes if dialing failed.
   337  		// 如果是静态节点连接失败,尝试重新解析其节点ip地址  因为静态节点的ip是配置的,可能发生变动
   338  		if _, ok := err.(*dialError); ok && t.flags&staticDialedConn != 0 {
   339  			if t.resolve(srv) {
   340  				t.dial(srv, t.dest)
   341  			}
   342  		}
   343  	}
   344  }
   345  
   346  // resolve attempts to find the current endpoint for the destination
   347  // using discovery.
   348  //
   349  // Resolve operations are throttled with backoff to avoid flooding the
   350  // discovery network with useless queries for nodes that don't exist.
   351  // The backoff delay resets when the node is found.
   352  // 当目标节点ip地址为空时使用该方法发现节点并解析ip地址
   353  func (t *dialTask) resolve(srv *Server) bool {
   354  	if srv.ntab == nil {
   355  		log.Debug("Can't resolve node", "id", t.dest.ID, "err", "discovery is disabled")
   356  		return false
   357  	}
   358  	if t.resolveDelay == 0 {
   359  		t.resolveDelay = initialResolveDelay
   360  	}
   361  	if time.Since(t.lastResolved) < t.resolveDelay {
   362  		return false
   363  	}
   364  	// 查找到节点
   365  	resolved := srv.ntab.Resolve(t.dest.ID)
   366  	t.lastResolved = time.Now()
   367  	if resolved == nil {
   368  		t.resolveDelay *= 2
   369  		if t.resolveDelay > maxResolveDelay {
   370  			t.resolveDelay = maxResolveDelay
   371  		}
   372  		log.Debug("Resolving node failed", "id", t.dest.ID, "newdelay", t.resolveDelay)
   373  		return false
   374  	}
   375  	// The node was found.
   376  	t.resolveDelay = initialResolveDelay
   377  	t.dest = resolved
   378  	log.Debug("Resolved node", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP, Port: int(t.dest.TCP)})
   379  	return true
   380  }
   381  
   382  type dialError struct {
   383  	error
   384  }
   385  
   386  // dial performs the actual connection attempt.
   387  // 节点连接的实现
   388  func (t *dialTask) dial(srv *Server, dest *discover.Node) error {
   389  	fd, err := srv.Dialer.Dial(dest)
   390  	if err != nil {
   391  		return &dialError{err}
   392  	}
   393  	// 新建一个计量连接
   394  	mfd := newMeteredConn(fd, false)
   395  	// 执行握手并尝试将连接方作为一个peer
   396  	return srv.SetupConn(mfd, t.flags, dest)
   397  }
   398  
   399  func (t *dialTask) String() string {
   400  	return fmt.Sprintf("%v %x %v:%d", t.flags, t.dest.ID[:8], t.dest.IP, t.dest.TCP)
   401  }
   402  
   403  // discoverTask的Do处理
   404  func (t *discoverTask) Do(srv *Server) {
   405  	// newTasks generates a lookup task whenever dynamic dials are
   406  	// necessary. Lookups need to take some time, otherwise the
   407  	// event loop spins too fast.
   408  	// 查找任务
   409  	next := srv.lastLookup.Add(lookupInterval)
   410  	if now := time.Now(); now.Before(next) {
   411  		time.Sleep(next.Sub(now))
   412  	}
   413  	srv.lastLookup = time.Now()
   414  	var target discover.NodeID
   415  	rand.Read(target[:])
   416  	// 查找发现节点的函数
   417  	t.results = srv.ntab.Lookup(target)
   418  }
   419  
   420  func (t *discoverTask) String() string {
   421  	s := "discovery lookup"
   422  	if len(t.results) > 0 {
   423  		s += fmt.Sprintf(" (%d results)", len(t.results))
   424  	}
   425  	return s
   426  }
   427  
   428  func (t waitExpireTask) Do(*Server) {
   429  	time.Sleep(t.Duration)
   430  }
   431  func (t waitExpireTask) String() string {
   432  	return fmt.Sprintf("wait for dial hist expire (%v)", t.Duration)
   433  }
   434  
   435  // Use only these methods to access or modify dialHistory.
   436  func (h dialHistory) min() pastDial {
   437  	return h[0]
   438  }
   439  func (h *dialHistory) add(id discover.NodeID, exp time.Time) {
   440  	heap.Push(h, pastDial{id, exp})
   441  
   442  }
   443  func (h *dialHistory) remove(id discover.NodeID) bool {
   444  	for i, v := range *h {
   445  		if v.id == id {
   446  			heap.Remove(h, i)
   447  			return true
   448  		}
   449  	}
   450  	return false
   451  }
   452  func (h dialHistory) contains(id discover.NodeID) bool {
   453  	for _, v := range h {
   454  		if v.id == id {
   455  			return true
   456  		}
   457  	}
   458  	return false
   459  }
   460  func (h *dialHistory) expire(now time.Time) {
   461  	for h.Len() > 0 && h.min().exp.Before(now) {
   462  		heap.Pop(h)
   463  	}
   464  }
   465  
   466  // heap.Interface boilerplate
   467  func (h dialHistory) Len() int           { return len(h) }
   468  func (h dialHistory) Less(i, j int) bool { return h[i].exp.Before(h[j].exp) }
   469  func (h dialHistory) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
   470  func (h *dialHistory) Push(x interface{}) {
   471  	*h = append(*h, x.(pastDial))
   472  }
   473  func (h *dialHistory) Pop() interface{} {
   474  	old := *h
   475  	n := len(old)
   476  	x := old[n-1]
   477  	*h = old[0 : n-1]
   478  	return x
   479  }