github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/p2p/dial.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package p2p 18 19 import ( 20 "context" 21 crand "crypto/rand" 22 "encoding/binary" 23 "errors" 24 "fmt" 25 mrand "math/rand" 26 "net" 27 "sync" 28 "sync/atomic" 29 "time" 30 31 "github.com/ethereum/go-ethereum/common/mclock" 32 "github.com/ethereum/go-ethereum/log" 33 "github.com/ethereum/go-ethereum/p2p/enode" 34 "github.com/ethereum/go-ethereum/p2p/netutil" 35 ) 36 37 const ( 38 // This is the amount of time spent waiting in between redialing a certain node. The 39 // limit is a bit higher than inboundThrottleTime to prevent failing dials in small 40 // private networks. 41 dialHistoryExpiration = inboundThrottleTime + 5*time.Second 42 43 // Config for the "Looking for peers" message. 44 dialStatsLogInterval = 10 * time.Second // printed at most this often 45 dialStatsPeerLimit = 3 // but not if more than this many dialed peers 46 47 // Endpoint resolution is throttled with bounded backoff. 48 initialResolveDelay = 60 * time.Second 49 maxResolveDelay = time.Hour 50 ) 51 52 // NodeDialer is used to connect to nodes in the network, typically by using 53 // an underlying net.Dialer but also using net.Pipe in tests. 54 type NodeDialer interface { 55 Dial(context.Context, *enode.Node) (net.Conn, error) 56 } 57 58 type nodeResolver interface { 59 Resolve(*enode.Node) *enode.Node 60 } 61 62 // tcpDialer implements NodeDialer using real TCP connections. 63 type tcpDialer struct { 64 d *net.Dialer 65 } 66 67 func (t tcpDialer) Dial(ctx context.Context, dest *enode.Node) (net.Conn, error) { 68 return t.d.DialContext(ctx, "tcp", nodeAddr(dest).String()) 69 } 70 71 func nodeAddr(n *enode.Node) net.Addr { 72 return &net.TCPAddr{IP: n.IP(), Port: n.TCP()} 73 } 74 75 // checkDial errors: 76 var ( 77 errSelf = errors.New("is self") 78 errAlreadyDialing = errors.New("already dialing") 79 errAlreadyConnected = errors.New("already connected") 80 errRecentlyDialed = errors.New("recently dialed") 81 errNetRestrict = errors.New("not contained in netrestrict list") 82 errNoPort = errors.New("node does not provide TCP port") 83 ) 84 85 // dialer creates outbound connections and submits them into Server. 86 // Two types of peer connections can be created: 87 // 88 // - static dials are pre-configured connections. The dialer attempts 89 // keep these nodes connected at all times. 90 // 91 // - dynamic dials are created from node discovery results. The dialer 92 // continuously reads candidate nodes from its input iterator and attempts 93 // to create peer connections to nodes arriving through the iterator. 94 type dialScheduler struct { 95 dialConfig 96 setupFunc dialSetupFunc 97 wg sync.WaitGroup 98 cancel context.CancelFunc 99 ctx context.Context 100 nodesIn chan *enode.Node 101 doneCh chan *dialTask 102 addStaticCh chan *enode.Node 103 remStaticCh chan *enode.Node 104 addPeerCh chan *conn 105 remPeerCh chan *conn 106 107 // Everything below here belongs to loop and 108 // should only be accessed by code on the loop goroutine. 109 dialing map[enode.ID]*dialTask // active tasks 110 peers map[enode.ID]struct{} // all connected peers 111 dialPeers int // current number of dialed peers 112 113 // The static map tracks all static dial tasks. The subset of usable static dial tasks 114 // (i.e. those passing checkDial) is kept in staticPool. The scheduler prefers 115 // launching random static tasks from the pool over launching dynamic dials from the 116 // iterator. 117 static map[enode.ID]*dialTask 118 staticPool []*dialTask 119 120 // The dial history keeps recently dialed nodes. Members of history are not dialed. 121 history expHeap 122 historyTimer *mclock.Alarm 123 124 // for logStats 125 lastStatsLog mclock.AbsTime 126 doneSinceLastLog int 127 } 128 129 type dialSetupFunc func(net.Conn, connFlag, *enode.Node) error 130 131 type dialConfig struct { 132 self enode.ID // our own ID 133 maxDialPeers int // maximum number of dialed peers 134 maxActiveDials int // maximum number of active dials 135 netRestrict *netutil.Netlist // IP netrestrict list, disabled if nil 136 resolver nodeResolver 137 dialer NodeDialer 138 log log.Logger 139 clock mclock.Clock 140 rand *mrand.Rand 141 } 142 143 func (cfg dialConfig) withDefaults() dialConfig { 144 if cfg.maxActiveDials == 0 { 145 cfg.maxActiveDials = defaultMaxPendingPeers 146 } 147 if cfg.log == nil { 148 cfg.log = log.Root() 149 } 150 if cfg.clock == nil { 151 cfg.clock = mclock.System{} 152 } 153 if cfg.rand == nil { 154 seedb := make([]byte, 8) 155 crand.Read(seedb) 156 seed := int64(binary.BigEndian.Uint64(seedb)) 157 cfg.rand = mrand.New(mrand.NewSource(seed)) 158 } 159 return cfg 160 } 161 162 func newDialScheduler(config dialConfig, it enode.Iterator, setupFunc dialSetupFunc) *dialScheduler { 163 cfg := config.withDefaults() 164 d := &dialScheduler{ 165 dialConfig: cfg, 166 historyTimer: mclock.NewAlarm(cfg.clock), 167 setupFunc: setupFunc, 168 dialing: make(map[enode.ID]*dialTask), 169 static: make(map[enode.ID]*dialTask), 170 peers: make(map[enode.ID]struct{}), 171 doneCh: make(chan *dialTask), 172 nodesIn: make(chan *enode.Node), 173 addStaticCh: make(chan *enode.Node), 174 remStaticCh: make(chan *enode.Node), 175 addPeerCh: make(chan *conn), 176 remPeerCh: make(chan *conn), 177 } 178 d.lastStatsLog = d.clock.Now() 179 d.ctx, d.cancel = context.WithCancel(context.Background()) 180 d.wg.Add(2) 181 go d.readNodes(it) 182 go d.loop(it) 183 return d 184 } 185 186 // stop shuts down the dialer, canceling all current dial tasks. 187 func (d *dialScheduler) stop() { 188 d.cancel() 189 d.wg.Wait() 190 } 191 192 // addStatic adds a static dial candidate. 193 func (d *dialScheduler) addStatic(n *enode.Node) { 194 select { 195 case d.addStaticCh <- n: 196 case <-d.ctx.Done(): 197 } 198 } 199 200 // removeStatic removes a static dial candidate. 201 func (d *dialScheduler) removeStatic(n *enode.Node) { 202 select { 203 case d.remStaticCh <- n: 204 case <-d.ctx.Done(): 205 } 206 } 207 208 // peerAdded updates the peer set. 209 func (d *dialScheduler) peerAdded(c *conn) { 210 select { 211 case d.addPeerCh <- c: 212 case <-d.ctx.Done(): 213 } 214 } 215 216 // peerRemoved updates the peer set. 217 func (d *dialScheduler) peerRemoved(c *conn) { 218 select { 219 case d.remPeerCh <- c: 220 case <-d.ctx.Done(): 221 } 222 } 223 224 // loop is the main loop of the dialer. 225 func (d *dialScheduler) loop(it enode.Iterator) { 226 var ( 227 nodesCh chan *enode.Node 228 ) 229 230 loop: 231 for { 232 // Launch new dials if slots are available. 233 slots := d.freeDialSlots() 234 slots -= d.startStaticDials(slots) 235 if slots > 0 { 236 nodesCh = d.nodesIn 237 } else { 238 nodesCh = nil 239 } 240 d.rearmHistoryTimer() 241 d.logStats() 242 243 select { 244 case node := <-nodesCh: 245 if err := d.checkDial(node); err != nil { 246 d.log.Trace("Discarding dial candidate", "id", node.ID(), "ip", node.IP(), "reason", err) 247 } else { 248 d.startDial(newDialTask(node, dynDialedConn)) 249 } 250 251 case task := <-d.doneCh: 252 id := task.dest().ID() 253 delete(d.dialing, id) 254 d.updateStaticPool(id) 255 d.doneSinceLastLog++ 256 257 case c := <-d.addPeerCh: 258 if c.is(dynDialedConn) || c.is(staticDialedConn) { 259 d.dialPeers++ 260 } 261 id := c.node.ID() 262 d.peers[id] = struct{}{} 263 // Remove from static pool because the node is now connected. 264 task := d.static[id] 265 if task != nil && task.staticPoolIndex >= 0 { 266 d.removeFromStaticPool(task.staticPoolIndex) 267 } 268 // TODO: cancel dials to connected peers 269 270 case c := <-d.remPeerCh: 271 if c.is(dynDialedConn) || c.is(staticDialedConn) { 272 d.dialPeers-- 273 } 274 delete(d.peers, c.node.ID()) 275 d.updateStaticPool(c.node.ID()) 276 277 case node := <-d.addStaticCh: 278 id := node.ID() 279 _, exists := d.static[id] 280 d.log.Trace("Adding static node", "id", id, "ip", node.IP(), "added", !exists) 281 if exists { 282 continue loop 283 } 284 task := newDialTask(node, staticDialedConn) 285 d.static[id] = task 286 if d.checkDial(node) == nil { 287 d.addToStaticPool(task) 288 } 289 290 case node := <-d.remStaticCh: 291 id := node.ID() 292 task := d.static[id] 293 d.log.Trace("Removing static node", "id", id, "ok", task != nil) 294 if task != nil { 295 delete(d.static, id) 296 if task.staticPoolIndex >= 0 { 297 d.removeFromStaticPool(task.staticPoolIndex) 298 } 299 } 300 301 case <-d.historyTimer.C(): 302 d.expireHistory() 303 304 case <-d.ctx.Done(): 305 it.Close() 306 break loop 307 } 308 } 309 310 d.historyTimer.Stop() 311 for range d.dialing { 312 <-d.doneCh 313 } 314 d.wg.Done() 315 } 316 317 // readNodes runs in its own goroutine and delivers nodes from 318 // the input iterator to the nodesIn channel. 319 func (d *dialScheduler) readNodes(it enode.Iterator) { 320 defer d.wg.Done() 321 322 for it.Next() { 323 select { 324 case d.nodesIn <- it.Node(): 325 case <-d.ctx.Done(): 326 } 327 } 328 } 329 330 // logStats prints dialer statistics to the log. The message is suppressed when enough 331 // peers are connected because users should only see it while their client is starting up 332 // or comes back online. 333 func (d *dialScheduler) logStats() { 334 now := d.clock.Now() 335 if d.lastStatsLog.Add(dialStatsLogInterval) > now { 336 return 337 } 338 if d.dialPeers < dialStatsPeerLimit && d.dialPeers < d.maxDialPeers { 339 d.log.Info("Looking for peers", "peercount", len(d.peers), "tried", d.doneSinceLastLog, "static", len(d.static)) 340 } 341 d.doneSinceLastLog = 0 342 d.lastStatsLog = now 343 } 344 345 // rearmHistoryTimer configures d.historyTimer to fire when the 346 // next item in d.history expires. 347 func (d *dialScheduler) rearmHistoryTimer() { 348 if len(d.history) == 0 { 349 return 350 } 351 d.historyTimer.Schedule(d.history.nextExpiry()) 352 } 353 354 // expireHistory removes expired items from d.history. 355 func (d *dialScheduler) expireHistory() { 356 d.history.expire(d.clock.Now(), func(hkey string) { 357 var id enode.ID 358 copy(id[:], hkey) 359 d.updateStaticPool(id) 360 }) 361 } 362 363 // freeDialSlots returns the number of free dial slots. The result can be negative 364 // when peers are connected while their task is still running. 365 func (d *dialScheduler) freeDialSlots() int { 366 slots := (d.maxDialPeers - d.dialPeers) * 2 367 if slots > d.maxActiveDials { 368 slots = d.maxActiveDials 369 } 370 free := slots - len(d.dialing) 371 return free 372 } 373 374 // checkDial returns an error if node n should not be dialed. 375 func (d *dialScheduler) checkDial(n *enode.Node) error { 376 if n.ID() == d.self { 377 return errSelf 378 } 379 if n.IP() != nil && n.TCP() == 0 { 380 // This check can trigger if a non-TCP node is found 381 // by discovery. If there is no IP, the node is a static 382 // node and the actual endpoint will be resolved later in dialTask. 383 return errNoPort 384 } 385 if _, ok := d.dialing[n.ID()]; ok { 386 return errAlreadyDialing 387 } 388 if _, ok := d.peers[n.ID()]; ok { 389 return errAlreadyConnected 390 } 391 if d.netRestrict != nil && !d.netRestrict.Contains(n.IP()) { 392 return errNetRestrict 393 } 394 if d.history.contains(string(n.ID().Bytes())) { 395 return errRecentlyDialed 396 } 397 return nil 398 } 399 400 // startStaticDials starts n static dial tasks. 401 func (d *dialScheduler) startStaticDials(n int) (started int) { 402 for started = 0; started < n && len(d.staticPool) > 0; started++ { 403 idx := d.rand.Intn(len(d.staticPool)) 404 task := d.staticPool[idx] 405 d.startDial(task) 406 d.removeFromStaticPool(idx) 407 } 408 return started 409 } 410 411 // updateStaticPool attempts to move the given static dial back into staticPool. 412 func (d *dialScheduler) updateStaticPool(id enode.ID) { 413 task, ok := d.static[id] 414 if ok && task.staticPoolIndex < 0 && d.checkDial(task.dest()) == nil { 415 d.addToStaticPool(task) 416 } 417 } 418 419 func (d *dialScheduler) addToStaticPool(task *dialTask) { 420 if task.staticPoolIndex >= 0 { 421 panic("attempt to add task to staticPool twice") 422 } 423 d.staticPool = append(d.staticPool, task) 424 task.staticPoolIndex = len(d.staticPool) - 1 425 } 426 427 // removeFromStaticPool removes the task at idx from staticPool. It does that by moving the 428 // current last element of the pool to idx and then shortening the pool by one. 429 func (d *dialScheduler) removeFromStaticPool(idx int) { 430 task := d.staticPool[idx] 431 end := len(d.staticPool) - 1 432 d.staticPool[idx] = d.staticPool[end] 433 d.staticPool[idx].staticPoolIndex = idx 434 d.staticPool[end] = nil 435 d.staticPool = d.staticPool[:end] 436 task.staticPoolIndex = -1 437 } 438 439 // startDial runs the given dial task in a separate goroutine. 440 func (d *dialScheduler) startDial(task *dialTask) { 441 node := task.dest() 442 d.log.Trace("Starting p2p dial", "id", node.ID(), "ip", node.IP(), "flag", task.flags) 443 hkey := string(node.ID().Bytes()) 444 d.history.add(hkey, d.clock.Now().Add(dialHistoryExpiration)) 445 d.dialing[node.ID()] = task 446 go func() { 447 task.run(d) 448 d.doneCh <- task 449 }() 450 } 451 452 // A dialTask generated for each node that is dialed. 453 type dialTask struct { 454 staticPoolIndex int 455 flags connFlag 456 457 // These fields are private to the task and should not be 458 // accessed by dialScheduler while the task is running. 459 destPtr atomic.Pointer[enode.Node] 460 lastResolved mclock.AbsTime 461 resolveDelay time.Duration 462 } 463 464 func newDialTask(dest *enode.Node, flags connFlag) *dialTask { 465 t := &dialTask{flags: flags, staticPoolIndex: -1} 466 t.destPtr.Store(dest) 467 return t 468 } 469 470 type dialError struct { 471 error 472 } 473 474 func (t *dialTask) dest() *enode.Node { 475 return t.destPtr.Load() 476 } 477 478 func (t *dialTask) run(d *dialScheduler) { 479 if t.needResolve() && !t.resolve(d) { 480 return 481 } 482 483 err := t.dial(d, t.dest()) 484 if err != nil { 485 // For static nodes, resolve one more time if dialing fails. 486 if _, ok := err.(*dialError); ok && t.flags&staticDialedConn != 0 { 487 if t.resolve(d) { 488 t.dial(d, t.dest()) 489 } 490 } 491 } 492 } 493 494 func (t *dialTask) needResolve() bool { 495 return t.flags&staticDialedConn != 0 && t.dest().IP() == nil 496 } 497 498 // resolve attempts to find the current endpoint for the destination 499 // using discovery. 500 // 501 // Resolve operations are throttled with backoff to avoid flooding the 502 // discovery network with useless queries for nodes that don't exist. 503 // The backoff delay resets when the node is found. 504 func (t *dialTask) resolve(d *dialScheduler) bool { 505 if d.resolver == nil { 506 return false 507 } 508 if t.resolveDelay == 0 { 509 t.resolveDelay = initialResolveDelay 510 } 511 if t.lastResolved > 0 && time.Duration(d.clock.Now()-t.lastResolved) < t.resolveDelay { 512 return false 513 } 514 515 node := t.dest() 516 resolved := d.resolver.Resolve(node) 517 t.lastResolved = d.clock.Now() 518 if resolved == nil { 519 t.resolveDelay *= 2 520 if t.resolveDelay > maxResolveDelay { 521 t.resolveDelay = maxResolveDelay 522 } 523 d.log.Debug("Resolving node failed", "id", node.ID(), "newdelay", t.resolveDelay) 524 return false 525 } 526 // The node was found. 527 t.resolveDelay = initialResolveDelay 528 t.destPtr.Store(resolved) 529 d.log.Debug("Resolved node", "id", resolved.ID(), "addr", &net.TCPAddr{IP: resolved.IP(), Port: resolved.TCP()}) 530 return true 531 } 532 533 // dial performs the actual connection attempt. 534 func (t *dialTask) dial(d *dialScheduler, dest *enode.Node) error { 535 dialMeter.Mark(1) 536 fd, err := d.dialer.Dial(d.ctx, dest) 537 if err != nil { 538 d.log.Trace("Dial error", "id", dest.ID(), "addr", nodeAddr(dest), "conn", t.flags, "err", cleanupDialErr(err)) 539 dialConnectionError.Mark(1) 540 return &dialError{err} 541 } 542 return d.setupFunc(newMeteredConn(fd), t.flags, dest) 543 } 544 545 func (t *dialTask) String() string { 546 node := t.dest() 547 id := node.ID() 548 return fmt.Sprintf("%v %x %v:%d", t.flags, id[:8], node.IP(), node.TCP()) 549 } 550 551 func cleanupDialErr(err error) error { 552 if netErr, ok := err.(*net.OpError); ok && netErr.Op == "dial" { 553 return netErr.Err 554 } 555 return err 556 }