github.com/Elemental-core/elementalcore@v0.0.0-20191206075037-63891242267a/p2p/dial.go (about) 1 // Copyright 2015 The elementalcore Authors 2 // This file is part of the elementalcore library. 3 // 4 // The elementalcore library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The elementalcore library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the elementalcore library. If not, see <http://www.gnu.org/licenses/>. 16 17 package p2p 18 19 import ( 20 "container/heap" 21 "crypto/rand" 22 "errors" 23 "fmt" 24 "net" 25 "time" 26 27 "github.com/Elemental-core/elementalcore/log" 28 "github.com/Elemental-core/elementalcore/p2p/discover" 29 "github.com/Elemental-core/elementalcore/p2p/netutil" 30 ) 31 32 const ( 33 // This is the amount of time spent waiting in between 34 // redialing a certain node. 35 dialHistoryExpiration = 30 * time.Second 36 37 // Discovery lookups are throttled and can only run 38 // once every few seconds. 39 lookupInterval = 4 * time.Second 40 41 // If no peers are found for this amount of time, the initial bootnodes are 42 // attempted to be connected. 43 fallbackInterval = 20 * time.Second 44 45 // Endpoint resolution is throttled with bounded backoff. 46 initialResolveDelay = 60 * time.Second 47 maxResolveDelay = time.Hour 48 ) 49 50 // NodeDialer is used to connect to nodes in the network, typically by using 51 // an underlying net.Dialer but also using net.Pipe in tests 52 type NodeDialer interface { 53 Dial(*discover.Node) (net.Conn, error) 54 } 55 56 // TCPDialer implements the NodeDialer interface by using a net.Dialer to 57 // create TCP connections to nodes in the network 58 type TCPDialer struct { 59 *net.Dialer 60 } 61 62 // Dial creates a TCP connection to the node 63 func (t TCPDialer) Dial(dest *discover.Node) (net.Conn, error) { 64 addr := &net.TCPAddr{IP: dest.IP, Port: int(dest.TCP)} 65 return t.Dialer.Dial("tcp", addr.String()) 66 } 67 68 // dialstate schedules dials and discovery lookups. 69 // it get's a chance to compute new tasks on every iteration 70 // of the main loop in Server.run. 71 type dialstate struct { 72 maxDynDials int 73 ntab discoverTable 74 netrestrict *netutil.Netlist 75 76 lookupRunning bool 77 dialing map[discover.NodeID]connFlag 78 lookupBuf []*discover.Node // current discovery lookup results 79 randomNodes []*discover.Node // filled from Table 80 static map[discover.NodeID]*dialTask 81 hist *dialHistory 82 83 start time.Time // time when the dialer was first used 84 bootnodes []*discover.Node // default dials when there are no peers 85 } 86 87 type discoverTable interface { 88 Self() *discover.Node 89 Close() 90 Resolve(target discover.NodeID) *discover.Node 91 Lookup(target discover.NodeID) []*discover.Node 92 ReadRandomNodes([]*discover.Node) int 93 } 94 95 // the dial history remembers recent dials. 96 type dialHistory []pastDial 97 98 // pastDial is an entry in the dial history. 99 type pastDial struct { 100 id discover.NodeID 101 exp time.Time 102 } 103 104 type task interface { 105 Do(*Server) 106 } 107 108 // A dialTask is generated for each node that is dialed. Its 109 // fields cannot be accessed while the task is running. 110 type dialTask struct { 111 flags connFlag 112 dest *discover.Node 113 lastResolved time.Time 114 resolveDelay time.Duration 115 } 116 117 // discoverTask runs discovery table operations. 118 // Only one discoverTask is active at any time. 119 // discoverTask.Do performs a random lookup. 120 type discoverTask struct { 121 results []*discover.Node 122 } 123 124 // A waitExpireTask is generated if there are no other tasks 125 // to keep the loop in Server.run ticking. 126 type waitExpireTask struct { 127 time.Duration 128 } 129 130 func newDialState(static []*discover.Node, bootnodes []*discover.Node, ntab discoverTable, maxdyn int, netrestrict *netutil.Netlist) *dialstate { 131 s := &dialstate{ 132 maxDynDials: maxdyn, 133 ntab: ntab, 134 netrestrict: netrestrict, 135 static: make(map[discover.NodeID]*dialTask), 136 dialing: make(map[discover.NodeID]connFlag), 137 bootnodes: make([]*discover.Node, len(bootnodes)), 138 randomNodes: make([]*discover.Node, maxdyn/2), 139 hist: new(dialHistory), 140 } 141 copy(s.bootnodes, bootnodes) 142 for _, n := range static { 143 s.addStatic(n) 144 } 145 return s 146 } 147 148 func (s *dialstate) addStatic(n *discover.Node) { 149 // This overwites the task instead of updating an existing 150 // entry, giving users the opportunity to force a resolve operation. 151 s.static[n.ID] = &dialTask{flags: staticDialedConn, dest: n} 152 } 153 154 func (s *dialstate) removeStatic(n *discover.Node) { 155 // This removes a task so future attempts to connect will not be made. 156 delete(s.static, n.ID) 157 } 158 159 func (s *dialstate) newTasks(nRunning int, peers map[discover.NodeID]*Peer, now time.Time) []task { 160 if s.start == (time.Time{}) { 161 s.start = now 162 } 163 164 var newtasks []task 165 addDial := func(flag connFlag, n *discover.Node) bool { 166 if err := s.checkDial(n, peers); err != nil { 167 log.Trace("Skipping dial candidate", "id", n.ID, "addr", &net.TCPAddr{IP: n.IP, Port: int(n.TCP)}, "err", err) 168 return false 169 } 170 s.dialing[n.ID] = flag 171 newtasks = append(newtasks, &dialTask{flags: flag, dest: n}) 172 return true 173 } 174 175 // Compute number of dynamic dials necessary at this point. 176 needDynDials := s.maxDynDials 177 for _, p := range peers { 178 if p.rw.is(dynDialedConn) { 179 needDynDials-- 180 } 181 } 182 for _, flag := range s.dialing { 183 if flag&dynDialedConn != 0 { 184 needDynDials-- 185 } 186 } 187 188 // Expire the dial history on every invocation. 189 s.hist.expire(now) 190 191 // Create dials for static nodes if they are not connected. 192 for id, t := range s.static { 193 err := s.checkDial(t.dest, peers) 194 switch err { 195 case errNotWhitelisted, errSelf: 196 log.Warn("Removing static dial candidate", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP, Port: int(t.dest.TCP)}, "err", err) 197 delete(s.static, t.dest.ID) 198 case nil: 199 s.dialing[id] = t.flags 200 newtasks = append(newtasks, t) 201 } 202 } 203 // If we don't have any peers whatsoever, try to dial a random bootnode. This 204 // scenario is useful for the testnet (and private networks) where the discovery 205 // table might be full of mostly bad peers, making it hard to find good ones. 206 if len(peers) == 0 && len(s.bootnodes) > 0 && needDynDials > 0 && now.Sub(s.start) > fallbackInterval { 207 bootnode := s.bootnodes[0] 208 s.bootnodes = append(s.bootnodes[:0], s.bootnodes[1:]...) 209 s.bootnodes = append(s.bootnodes, bootnode) 210 211 if addDial(dynDialedConn, bootnode) { 212 needDynDials-- 213 } 214 } 215 // Use random nodes from the table for half of the necessary 216 // dynamic dials. 217 randomCandidates := needDynDials / 2 218 if randomCandidates > 0 { 219 n := s.ntab.ReadRandomNodes(s.randomNodes) 220 for i := 0; i < randomCandidates && i < n; i++ { 221 if addDial(dynDialedConn, s.randomNodes[i]) { 222 needDynDials-- 223 } 224 } 225 } 226 // Create dynamic dials from random lookup results, removing tried 227 // items from the result buffer. 228 i := 0 229 for ; i < len(s.lookupBuf) && needDynDials > 0; i++ { 230 if addDial(dynDialedConn, s.lookupBuf[i]) { 231 needDynDials-- 232 } 233 } 234 s.lookupBuf = s.lookupBuf[:copy(s.lookupBuf, s.lookupBuf[i:])] 235 // Launch a discovery lookup if more candidates are needed. 236 if len(s.lookupBuf) < needDynDials && !s.lookupRunning { 237 s.lookupRunning = true 238 newtasks = append(newtasks, &discoverTask{}) 239 } 240 241 // Launch a timer to wait for the next node to expire if all 242 // candidates have been tried and no task is currently active. 243 // This should prevent cases where the dialer logic is not ticked 244 // because there are no pending events. 245 if nRunning == 0 && len(newtasks) == 0 && s.hist.Len() > 0 { 246 t := &waitExpireTask{s.hist.min().exp.Sub(now)} 247 newtasks = append(newtasks, t) 248 } 249 return newtasks 250 } 251 252 var ( 253 errSelf = errors.New("is self") 254 errAlreadyDialing = errors.New("already dialing") 255 errAlreadyConnected = errors.New("already connected") 256 errRecentlyDialed = errors.New("recently dialed") 257 errNotWhitelisted = errors.New("not contained in netrestrict whitelist") 258 ) 259 260 func (s *dialstate) checkDial(n *discover.Node, peers map[discover.NodeID]*Peer) error { 261 _, dialing := s.dialing[n.ID] 262 switch { 263 case dialing: 264 return errAlreadyDialing 265 case peers[n.ID] != nil: 266 return errAlreadyConnected 267 case s.ntab != nil && n.ID == s.ntab.Self().ID: 268 return errSelf 269 case s.netrestrict != nil && !s.netrestrict.Contains(n.IP): 270 return errNotWhitelisted 271 case s.hist.contains(n.ID): 272 return errRecentlyDialed 273 } 274 return nil 275 } 276 277 func (s *dialstate) taskDone(t task, now time.Time) { 278 switch t := t.(type) { 279 case *dialTask: 280 s.hist.add(t.dest.ID, now.Add(dialHistoryExpiration)) 281 delete(s.dialing, t.dest.ID) 282 case *discoverTask: 283 s.lookupRunning = false 284 s.lookupBuf = append(s.lookupBuf, t.results...) 285 } 286 } 287 288 func (t *dialTask) Do(srv *Server) { 289 if t.dest.Incomplete() { 290 if !t.resolve(srv) { 291 return 292 } 293 } 294 success := t.dial(srv, t.dest) 295 // Try resolving the ID of static nodes if dialing failed. 296 if !success && t.flags&staticDialedConn != 0 { 297 if t.resolve(srv) { 298 t.dial(srv, t.dest) 299 } 300 } 301 } 302 303 // resolve attempts to find the current endpoint for the destination 304 // using discovery. 305 // 306 // Resolve operations are throttled with backoff to avoid flooding the 307 // discovery network with useless queries for nodes that don't exist. 308 // The backoff delay resets when the node is found. 309 func (t *dialTask) resolve(srv *Server) bool { 310 if srv.ntab == nil { 311 log.Debug("Can't resolve node", "id", t.dest.ID, "err", "discovery is disabled") 312 return false 313 } 314 if t.resolveDelay == 0 { 315 t.resolveDelay = initialResolveDelay 316 } 317 if time.Since(t.lastResolved) < t.resolveDelay { 318 return false 319 } 320 resolved := srv.ntab.Resolve(t.dest.ID) 321 t.lastResolved = time.Now() 322 if resolved == nil { 323 t.resolveDelay *= 2 324 if t.resolveDelay > maxResolveDelay { 325 t.resolveDelay = maxResolveDelay 326 } 327 log.Debug("Resolving node failed", "id", t.dest.ID, "newdelay", t.resolveDelay) 328 return false 329 } 330 // The node was found. 331 t.resolveDelay = initialResolveDelay 332 t.dest = resolved 333 log.Debug("Resolved node", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP, Port: int(t.dest.TCP)}) 334 return true 335 } 336 337 // dial performs the actual connection attempt. 338 func (t *dialTask) dial(srv *Server, dest *discover.Node) bool { 339 fd, err := srv.Dialer.Dial(dest) 340 if err != nil { 341 log.Trace("Dial error", "task", t, "err", err) 342 return false 343 } 344 mfd := newMeteredConn(fd, false) 345 srv.SetupConn(mfd, t.flags, dest) 346 return true 347 } 348 349 func (t *dialTask) String() string { 350 return fmt.Sprintf("%v %x %v:%d", t.flags, t.dest.ID[:8], t.dest.IP, t.dest.TCP) 351 } 352 353 func (t *discoverTask) Do(srv *Server) { 354 // newTasks generates a lookup task whenever dynamic dials are 355 // necessary. Lookups need to take some time, otherwise the 356 // event loop spins too fast. 357 next := srv.lastLookup.Add(lookupInterval) 358 if now := time.Now(); now.Before(next) { 359 time.Sleep(next.Sub(now)) 360 } 361 srv.lastLookup = time.Now() 362 var target discover.NodeID 363 rand.Read(target[:]) 364 t.results = srv.ntab.Lookup(target) 365 } 366 367 func (t *discoverTask) String() string { 368 s := "discovery lookup" 369 if len(t.results) > 0 { 370 s += fmt.Sprintf(" (%d results)", len(t.results)) 371 } 372 return s 373 } 374 375 func (t waitExpireTask) Do(*Server) { 376 time.Sleep(t.Duration) 377 } 378 func (t waitExpireTask) String() string { 379 return fmt.Sprintf("wait for dial hist expire (%v)", t.Duration) 380 } 381 382 // Use only these methods to access or modify dialHistory. 383 func (h dialHistory) min() pastDial { 384 return h[0] 385 } 386 func (h *dialHistory) add(id discover.NodeID, exp time.Time) { 387 heap.Push(h, pastDial{id, exp}) 388 } 389 func (h dialHistory) contains(id discover.NodeID) bool { 390 for _, v := range h { 391 if v.id == id { 392 return true 393 } 394 } 395 return false 396 } 397 func (h *dialHistory) expire(now time.Time) { 398 for h.Len() > 0 && h.min().exp.Before(now) { 399 heap.Pop(h) 400 } 401 } 402 403 // heap.Interface boilerplate 404 func (h dialHistory) Len() int { return len(h) } 405 func (h dialHistory) Less(i, j int) bool { return h[i].exp.Before(h[j].exp) } 406 func (h dialHistory) Swap(i, j int) { h[i], h[j] = h[j], h[i] } 407 func (h *dialHistory) Push(x interface{}) { 408 *h = append(*h, x.(pastDial)) 409 } 410 func (h *dialHistory) Pop() interface{} { 411 old := *h 412 n := len(old) 413 x := old[n-1] 414 *h = old[0 : n-1] 415 return x 416 }