github.com/Debrief-BC/go-debrief@v0.0.0-20200420203408-0c26ca968123/p2p/dial.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package p2p 18 19 import ( 20 "context" 21 crand "crypto/rand" 22 "encoding/binary" 23 "errors" 24 "fmt" 25 mrand "math/rand" 26 "net" 27 "sync" 28 "time" 29 30 "github.com/Debrief-BC/go-debrief/common/mclock" 31 "github.com/Debrief-BC/go-debrief/log" 32 "github.com/Debrief-BC/go-debrief/p2p/enode" 33 "github.com/Debrief-BC/go-debrief/p2p/netutil" 34 ) 35 36 const ( 37 // This is the amount of time spent waiting in between redialing a certain node. The 38 // limit is a bit higher than inboundThrottleTime to prevent failing dials in small 39 // private networks. 40 dialHistoryExpiration = inboundThrottleTime + 5*time.Second 41 42 // Config for the "Looking for peers" message. 43 dialStatsLogInterval = 10 * time.Second // printed at most this often 44 dialStatsPeerLimit = 3 // but not if more than this many dialed peers 45 46 // Endpoint resolution is throttled with bounded backoff. 47 initialResolveDelay = 60 * time.Second 48 maxResolveDelay = time.Hour 49 ) 50 51 // NodeDialer is used to connect to nodes in the network, typically by using 52 // an underlying net.Dialer but also using net.Pipe in tests. 53 type NodeDialer interface { 54 Dial(context.Context, *enode.Node) (net.Conn, error) 55 } 56 57 type nodeResolver interface { 58 Resolve(*enode.Node) *enode.Node 59 } 60 61 // tcpDialer implements NodeDialer using real TCP connections. 62 type tcpDialer struct { 63 d *net.Dialer 64 } 65 66 func (t tcpDialer) Dial(ctx context.Context, dest *enode.Node) (net.Conn, error) { 67 return t.d.DialContext(ctx, "tcp", nodeAddr(dest).String()) 68 } 69 70 func nodeAddr(n *enode.Node) net.Addr { 71 return &net.TCPAddr{IP: n.IP(), Port: n.TCP()} 72 } 73 74 // checkDial errors: 75 var ( 76 errSelf = errors.New("is self") 77 errAlreadyDialing = errors.New("already dialing") 78 errAlreadyConnected = errors.New("already connected") 79 errRecentlyDialed = errors.New("recently dialed") 80 errNotWhitelisted = errors.New("not contained in netrestrict whitelist") 81 ) 82 83 // dialer creates outbound connections and submits them into Server. 84 // Two types of peer connections can be created: 85 // 86 // - static dials are pre-configured connections. The dialer attempts 87 // keep these nodes connected at all times. 88 // 89 // - dynamic dials are created from node discovery results. The dialer 90 // continuously reads candidate nodes from its input iterator and attempts 91 // to create peer connections to nodes arriving through the iterator. 92 // 93 type dialScheduler struct { 94 dialConfig 95 setupFunc dialSetupFunc 96 wg sync.WaitGroup 97 cancel context.CancelFunc 98 ctx context.Context 99 nodesIn chan *enode.Node 100 doneCh chan *dialTask 101 addStaticCh chan *enode.Node 102 remStaticCh chan *enode.Node 103 addPeerCh chan *conn 104 remPeerCh chan *conn 105 106 // Everything below here belongs to loop and 107 // should only be accessed by code on the loop goroutine. 108 dialing map[enode.ID]*dialTask // active tasks 109 peers map[enode.ID]connFlag // all connected peers 110 dialPeers int // current number of dialed peers 111 112 // The static map tracks all static dial tasks. The subset of usable static dial tasks 113 // (i.e. those passing checkDial) is kept in staticPool. The scheduler prefers 114 // launching random static tasks from the pool over launching dynamic dials from the 115 // iterator. 116 static map[enode.ID]*dialTask 117 staticPool []*dialTask 118 119 // The dial history keeps recently dialed nodes. Members of history are not dialed. 120 history expHeap 121 historyTimer mclock.Timer 122 historyTimerTime mclock.AbsTime 123 124 // for logStats 125 lastStatsLog mclock.AbsTime 126 doneSinceLastLog int 127 } 128 129 type dialSetupFunc func(net.Conn, connFlag, *enode.Node) error 130 131 type dialConfig struct { 132 self enode.ID // our own ID 133 maxDialPeers int // maximum number of dialed peers 134 maxActiveDials int // maximum number of active dials 135 netRestrict *netutil.Netlist // IP whitelist, disabled if nil 136 resolver nodeResolver 137 dialer NodeDialer 138 log log.Logger 139 clock mclock.Clock 140 rand *mrand.Rand 141 } 142 143 func (cfg dialConfig) withDefaults() dialConfig { 144 if cfg.maxActiveDials == 0 { 145 cfg.maxActiveDials = defaultMaxPendingPeers 146 } 147 if cfg.log == nil { 148 cfg.log = log.Root() 149 } 150 if cfg.clock == nil { 151 cfg.clock = mclock.System{} 152 } 153 if cfg.rand == nil { 154 seedb := make([]byte, 8) 155 crand.Read(seedb) 156 seed := int64(binary.BigEndian.Uint64(seedb)) 157 cfg.rand = mrand.New(mrand.NewSource(seed)) 158 } 159 return cfg 160 } 161 162 func newDialScheduler(config dialConfig, it enode.Iterator, setupFunc dialSetupFunc) *dialScheduler { 163 d := &dialScheduler{ 164 dialConfig: config.withDefaults(), 165 setupFunc: setupFunc, 166 dialing: make(map[enode.ID]*dialTask), 167 static: make(map[enode.ID]*dialTask), 168 peers: make(map[enode.ID]connFlag), 169 doneCh: make(chan *dialTask), 170 nodesIn: make(chan *enode.Node), 171 addStaticCh: make(chan *enode.Node), 172 remStaticCh: make(chan *enode.Node), 173 addPeerCh: make(chan *conn), 174 remPeerCh: make(chan *conn), 175 } 176 d.lastStatsLog = d.clock.Now() 177 d.ctx, d.cancel = context.WithCancel(context.Background()) 178 d.wg.Add(2) 179 go d.readNodes(it) 180 go d.loop(it) 181 return d 182 } 183 184 // stop shuts down the dialer, canceling all current dial tasks. 185 func (d *dialScheduler) stop() { 186 d.cancel() 187 d.wg.Wait() 188 } 189 190 // addStatic adds a static dial candidate. 191 func (d *dialScheduler) addStatic(n *enode.Node) { 192 select { 193 case d.addStaticCh <- n: 194 case <-d.ctx.Done(): 195 } 196 } 197 198 // removeStatic removes a static dial candidate. 199 func (d *dialScheduler) removeStatic(n *enode.Node) { 200 select { 201 case d.remStaticCh <- n: 202 case <-d.ctx.Done(): 203 } 204 } 205 206 // peerAdded updates the peer set. 207 func (d *dialScheduler) peerAdded(c *conn) { 208 select { 209 case d.addPeerCh <- c: 210 case <-d.ctx.Done(): 211 } 212 } 213 214 // peerRemoved updates the peer set. 215 func (d *dialScheduler) peerRemoved(c *conn) { 216 select { 217 case d.remPeerCh <- c: 218 case <-d.ctx.Done(): 219 } 220 } 221 222 // loop is the main loop of the dialer. 223 func (d *dialScheduler) loop(it enode.Iterator) { 224 var ( 225 nodesCh chan *enode.Node 226 historyExp = make(chan struct{}, 1) 227 ) 228 229 loop: 230 for { 231 // Launch new dials if slots are available. 232 slots := d.freeDialSlots() 233 slots -= d.startStaticDials(slots) 234 if slots > 0 { 235 nodesCh = d.nodesIn 236 } else { 237 nodesCh = nil 238 } 239 d.rearmHistoryTimer(historyExp) 240 d.logStats() 241 242 select { 243 case node := <-nodesCh: 244 if err := d.checkDial(node); err != nil { 245 d.log.Trace("Discarding dial candidate", "id", node.ID(), "ip", node.IP(), "reason", err) 246 } else { 247 d.startDial(newDialTask(node, dynDialedConn)) 248 } 249 250 case task := <-d.doneCh: 251 id := task.dest.ID() 252 delete(d.dialing, id) 253 d.updateStaticPool(id) 254 d.doneSinceLastLog++ 255 256 case c := <-d.addPeerCh: 257 if c.is(dynDialedConn) || c.is(staticDialedConn) { 258 d.dialPeers++ 259 } 260 id := c.node.ID() 261 d.peers[id] = c.flags 262 // Remove from static pool because the node is now connected. 263 task := d.static[id] 264 if task != nil && task.staticPoolIndex >= 0 { 265 d.removeFromStaticPool(task.staticPoolIndex) 266 } 267 // TODO: cancel dials to connected peers 268 269 case c := <-d.remPeerCh: 270 if c.is(dynDialedConn) || c.is(staticDialedConn) { 271 d.dialPeers-- 272 } 273 delete(d.peers, c.node.ID()) 274 d.updateStaticPool(c.node.ID()) 275 276 case node := <-d.addStaticCh: 277 id := node.ID() 278 _, exists := d.static[id] 279 d.log.Trace("Adding static node", "id", id, "ip", node.IP(), "added", !exists) 280 if exists { 281 continue loop 282 } 283 task := newDialTask(node, staticDialedConn) 284 d.static[id] = task 285 if d.checkDial(node) == nil { 286 d.addToStaticPool(task) 287 } 288 289 case node := <-d.remStaticCh: 290 id := node.ID() 291 task := d.static[id] 292 d.log.Trace("Removing static node", "id", id, "ok", task != nil) 293 if task != nil { 294 delete(d.static, id) 295 if task.staticPoolIndex >= 0 { 296 d.removeFromStaticPool(task.staticPoolIndex) 297 } 298 } 299 300 case <-historyExp: 301 d.expireHistory() 302 303 case <-d.ctx.Done(): 304 it.Close() 305 break loop 306 } 307 } 308 309 d.stopHistoryTimer(historyExp) 310 for range d.dialing { 311 <-d.doneCh 312 } 313 d.wg.Done() 314 } 315 316 // readNodes runs in its own goroutine and delivers nodes from 317 // the input iterator to the nodesIn channel. 318 func (d *dialScheduler) readNodes(it enode.Iterator) { 319 defer d.wg.Done() 320 321 for it.Next() { 322 select { 323 case d.nodesIn <- it.Node(): 324 case <-d.ctx.Done(): 325 } 326 } 327 } 328 329 // logStats prints dialer statistics to the log. The message is suppressed when enough 330 // peers are connected because users should only see it while their client is starting up 331 // or comes back online. 332 func (d *dialScheduler) logStats() { 333 now := d.clock.Now() 334 if d.lastStatsLog.Add(dialStatsLogInterval) > now { 335 return 336 } 337 if d.dialPeers < dialStatsPeerLimit && d.dialPeers < d.maxDialPeers { 338 d.log.Info("Looking for peers", "peercount", len(d.peers), "tried", d.doneSinceLastLog, "static", len(d.static)) 339 } 340 d.doneSinceLastLog = 0 341 d.lastStatsLog = now 342 } 343 344 // rearmHistoryTimer configures d.historyTimer to fire when the 345 // next item in d.history expires. 346 func (d *dialScheduler) rearmHistoryTimer(ch chan struct{}) { 347 if len(d.history) == 0 || d.historyTimerTime == d.history.nextExpiry() { 348 return 349 } 350 d.stopHistoryTimer(ch) 351 d.historyTimerTime = d.history.nextExpiry() 352 timeout := time.Duration(d.historyTimerTime - d.clock.Now()) 353 d.historyTimer = d.clock.AfterFunc(timeout, func() { ch <- struct{}{} }) 354 } 355 356 // stopHistoryTimer stops the timer and drains the channel it sends on. 357 func (d *dialScheduler) stopHistoryTimer(ch chan struct{}) { 358 if d.historyTimer != nil && !d.historyTimer.Stop() { 359 <-ch 360 } 361 } 362 363 // expireHistory removes expired items from d.history. 364 func (d *dialScheduler) expireHistory() { 365 d.historyTimer.Stop() 366 d.historyTimer = nil 367 d.historyTimerTime = 0 368 d.history.expire(d.clock.Now(), func(hkey string) { 369 var id enode.ID 370 copy(id[:], hkey) 371 d.updateStaticPool(id) 372 }) 373 } 374 375 // freeDialSlots returns the number of free dial slots. The result can be negative 376 // when peers are connected while their task is still running. 377 func (d *dialScheduler) freeDialSlots() int { 378 slots := (d.maxDialPeers - d.dialPeers) * 2 379 if slots > d.maxActiveDials { 380 slots = d.maxActiveDials 381 } 382 free := slots - len(d.dialing) 383 return free 384 } 385 386 // checkDial returns an error if node n should not be dialed. 387 func (d *dialScheduler) checkDial(n *enode.Node) error { 388 if n.ID() == d.self { 389 return errSelf 390 } 391 if _, ok := d.dialing[n.ID()]; ok { 392 return errAlreadyDialing 393 } 394 if _, ok := d.peers[n.ID()]; ok { 395 return errAlreadyConnected 396 } 397 if d.netRestrict != nil && !d.netRestrict.Contains(n.IP()) { 398 return errNotWhitelisted 399 } 400 if d.history.contains(string(n.ID().Bytes())) { 401 return errRecentlyDialed 402 } 403 return nil 404 } 405 406 // startStaticDials starts n static dial tasks. 407 func (d *dialScheduler) startStaticDials(n int) (started int) { 408 for started = 0; started < n && len(d.staticPool) > 0; started++ { 409 idx := d.rand.Intn(len(d.staticPool)) 410 task := d.staticPool[idx] 411 d.startDial(task) 412 d.removeFromStaticPool(idx) 413 } 414 return started 415 } 416 417 // updateStaticPool attempts to move the given static dial back into staticPool. 418 func (d *dialScheduler) updateStaticPool(id enode.ID) { 419 task, ok := d.static[id] 420 if ok && task.staticPoolIndex < 0 && d.checkDial(task.dest) == nil { 421 d.addToStaticPool(task) 422 } 423 } 424 425 func (d *dialScheduler) addToStaticPool(task *dialTask) { 426 if task.staticPoolIndex >= 0 { 427 panic("attempt to add task to staticPool twice") 428 } 429 d.staticPool = append(d.staticPool, task) 430 task.staticPoolIndex = len(d.staticPool) - 1 431 } 432 433 // removeFromStaticPool removes the task at idx from staticPool. It does that by moving the 434 // current last element of the pool to idx and then shortening the pool by one. 435 func (d *dialScheduler) removeFromStaticPool(idx int) { 436 task := d.staticPool[idx] 437 end := len(d.staticPool) - 1 438 d.staticPool[idx] = d.staticPool[end] 439 d.staticPool[idx].staticPoolIndex = idx 440 d.staticPool[end] = nil 441 d.staticPool = d.staticPool[:end] 442 task.staticPoolIndex = -1 443 } 444 445 // startDial runs the given dial task in a separate goroutine. 446 func (d *dialScheduler) startDial(task *dialTask) { 447 d.log.Trace("Starting p2p dial", "id", task.dest.ID(), "ip", task.dest.IP(), "flag", task.flags) 448 hkey := string(task.dest.ID().Bytes()) 449 d.history.add(hkey, d.clock.Now().Add(dialHistoryExpiration)) 450 d.dialing[task.dest.ID()] = task 451 go func() { 452 task.run(d) 453 d.doneCh <- task 454 }() 455 } 456 457 // A dialTask generated for each node that is dialed. 458 type dialTask struct { 459 staticPoolIndex int 460 flags connFlag 461 // These fields are private to the task and should not be 462 // accessed by dialScheduler while the task is running. 463 dest *enode.Node 464 lastResolved mclock.AbsTime 465 resolveDelay time.Duration 466 } 467 468 func newDialTask(dest *enode.Node, flags connFlag) *dialTask { 469 return &dialTask{dest: dest, flags: flags, staticPoolIndex: -1} 470 } 471 472 type dialError struct { 473 error 474 } 475 476 func (t *dialTask) run(d *dialScheduler) { 477 if t.dest.Incomplete() { 478 if !t.resolve(d) { 479 return 480 } 481 } 482 483 err := t.dial(d, t.dest) 484 if err != nil { 485 // Try resolving the ID of static nodes if dialing failed. 486 if _, ok := err.(*dialError); ok && t.flags&staticDialedConn != 0 { 487 if t.resolve(d) { 488 t.dial(d, t.dest) 489 } 490 } 491 } 492 } 493 494 // resolve attempts to find the current endpoint for the destination 495 // using discovery. 496 // 497 // Resolve operations are throttled with backoff to avoid flooding the 498 // discovery network with useless queries for nodes that don't exist. 499 // The backoff delay resets when the node is found. 500 func (t *dialTask) resolve(d *dialScheduler) bool { 501 if d.resolver == nil { 502 return false 503 } 504 if t.resolveDelay == 0 { 505 t.resolveDelay = initialResolveDelay 506 } 507 if t.lastResolved > 0 && time.Duration(d.clock.Now()-t.lastResolved) < t.resolveDelay { 508 return false 509 } 510 resolved := d.resolver.Resolve(t.dest) 511 t.lastResolved = d.clock.Now() 512 if resolved == nil { 513 t.resolveDelay *= 2 514 if t.resolveDelay > maxResolveDelay { 515 t.resolveDelay = maxResolveDelay 516 } 517 d.log.Debug("Resolving node failed", "id", t.dest.ID(), "newdelay", t.resolveDelay) 518 return false 519 } 520 // The node was found. 521 t.resolveDelay = initialResolveDelay 522 t.dest = resolved 523 d.log.Debug("Resolved node", "id", t.dest.ID(), "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()}) 524 return true 525 } 526 527 // dial performs the actual connection attempt. 528 func (t *dialTask) dial(d *dialScheduler, dest *enode.Node) error { 529 fd, err := d.dialer.Dial(d.ctx, t.dest) 530 if err != nil { 531 d.log.Trace("Dial error", "id", t.dest.ID(), "addr", nodeAddr(t.dest), "conn", t.flags, "err", cleanupDialErr(err)) 532 return &dialError{err} 533 } 534 mfd := newMeteredConn(fd, false, &net.TCPAddr{IP: dest.IP(), Port: dest.TCP()}) 535 return d.setupFunc(mfd, t.flags, dest) 536 } 537 538 func (t *dialTask) String() string { 539 id := t.dest.ID() 540 return fmt.Sprintf("%v %x %v:%d", t.flags, id[:8], t.dest.IP(), t.dest.TCP()) 541 } 542 543 func cleanupDialErr(err error) error { 544 if netErr, ok := err.(*net.OpError); ok && netErr.Op == "dial" { 545 return netErr.Err 546 } 547 return err 548 }