github.com/humaniq/go-ethereum@v1.6.8-0.20171225131628-061223a13848/p2p/discover/table.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package discover implements the Node Discovery Protocol. 18 // 19 // The Node Discovery protocol provides a way to find RLPx nodes that 20 // can be connected to. It uses a Kademlia-like protocol to maintain a 21 // distributed database of the IDs and endpoints of all listening 22 // nodes. 23 package discover 24 25 import ( 26 "crypto/rand" 27 "encoding/binary" 28 "errors" 29 "fmt" 30 "net" 31 "sort" 32 "sync" 33 "time" 34 35 "github.com/ethereum/go-ethereum/common" 36 "github.com/ethereum/go-ethereum/crypto" 37 "github.com/ethereum/go-ethereum/log" 38 ) 39 40 const ( 41 alpha = 3 // Kademlia concurrency factor 42 bucketSize = 16 // Kademlia bucket size 43 hashBits = len(common.Hash{}) * 8 44 nBuckets = hashBits + 1 // Number of buckets 45 46 maxBondingPingPongs = 16 47 maxFindnodeFailures = 5 48 49 autoRefreshInterval = 1 * time.Hour 50 seedCount = 30 51 seedMaxAge = 5 * 24 * time.Hour 52 ) 53 54 type Table struct { 55 mutex sync.Mutex // protects buckets, their content, and nursery 56 buckets [nBuckets]*bucket // index of known nodes by distance 57 nursery []*Node // bootstrap nodes 58 db *nodeDB // database of known nodes 59 60 refreshReq chan chan struct{} 61 closeReq chan struct{} 62 closed chan struct{} 63 64 bondmu sync.Mutex 65 bonding map[NodeID]*bondproc 66 bondslots chan struct{} // limits total number of active bonding processes 67 68 nodeAddedHook func(*Node) // for testing 69 70 net transport 71 self *Node // metadata of the local node 72 } 73 74 type bondproc struct { 75 err error 76 n *Node 77 done chan struct{} 78 } 79 80 // transport is implemented by the UDP transport. 81 // it is an interface so we can test without opening lots of UDP 82 // sockets and without generating a private key. 83 type transport interface { 84 ping(NodeID, *net.UDPAddr) error 85 waitping(NodeID) error 86 findnode(toid NodeID, addr *net.UDPAddr, target NodeID) ([]*Node, error) 87 close() 88 } 89 90 // bucket contains nodes, ordered by their last activity. the entry 91 // that was most recently active is the first element in entries. 92 type bucket struct{ entries []*Node } 93 94 func newTable(t transport, ourID NodeID, ourAddr *net.UDPAddr, nodeDBPath string) (*Table, error) { 95 // If no node database was given, use an in-memory one 96 db, err := newNodeDB(nodeDBPath, Version, ourID) 97 if err != nil { 98 return nil, err 99 } 100 tab := &Table{ 101 net: t, 102 db: db, 103 self: NewNode(ourID, ourAddr.IP, uint16(ourAddr.Port), uint16(ourAddr.Port)), 104 bonding: make(map[NodeID]*bondproc), 105 bondslots: make(chan struct{}, maxBondingPingPongs), 106 refreshReq: make(chan chan struct{}), 107 closeReq: make(chan struct{}), 108 closed: make(chan struct{}), 109 } 110 for i := 0; i < cap(tab.bondslots); i++ { 111 tab.bondslots <- struct{}{} 112 } 113 for i := range tab.buckets { 114 tab.buckets[i] = new(bucket) 115 } 116 go tab.refreshLoop() 117 return tab, nil 118 } 119 120 // Self returns the local node. 121 // The returned node should not be modified by the caller. 122 func (tab *Table) Self() *Node { 123 return tab.self 124 } 125 126 // ReadRandomNodes fills the given slice with random nodes from the 127 // table. It will not write the same node more than once. The nodes in 128 // the slice are copies and can be modified by the caller. 129 func (tab *Table) ReadRandomNodes(buf []*Node) (n int) { 130 tab.mutex.Lock() 131 defer tab.mutex.Unlock() 132 // TODO: tree-based buckets would help here 133 // Find all non-empty buckets and get a fresh slice of their entries. 134 var buckets [][]*Node 135 for _, b := range tab.buckets { 136 if len(b.entries) > 0 { 137 buckets = append(buckets, b.entries[:]) 138 } 139 } 140 if len(buckets) == 0 { 141 return 0 142 } 143 // Shuffle the buckets. 144 for i := uint32(len(buckets)) - 1; i > 0; i-- { 145 j := randUint(i) 146 buckets[i], buckets[j] = buckets[j], buckets[i] 147 } 148 // Move head of each bucket into buf, removing buckets that become empty. 149 var i, j int 150 for ; i < len(buf); i, j = i+1, (j+1)%len(buckets) { 151 b := buckets[j] 152 buf[i] = &(*b[0]) 153 buckets[j] = b[1:] 154 if len(b) == 1 { 155 buckets = append(buckets[:j], buckets[j+1:]...) 156 } 157 if len(buckets) == 0 { 158 break 159 } 160 } 161 return i + 1 162 } 163 164 func randUint(max uint32) uint32 { 165 if max == 0 { 166 return 0 167 } 168 var b [4]byte 169 rand.Read(b[:]) 170 return binary.BigEndian.Uint32(b[:]) % max 171 } 172 173 // Close terminates the network listener and flushes the node database. 174 func (tab *Table) Close() { 175 select { 176 case <-tab.closed: 177 // already closed. 178 case tab.closeReq <- struct{}{}: 179 <-tab.closed // wait for refreshLoop to end. 180 } 181 } 182 183 // SetFallbackNodes sets the initial points of contact. These nodes 184 // are used to connect to the network if the table is empty and there 185 // are no known nodes in the database. 186 func (tab *Table) SetFallbackNodes(nodes []*Node) error { 187 for _, n := range nodes { 188 if err := n.validateComplete(); err != nil { 189 return fmt.Errorf("bad bootstrap/fallback node %q (%v)", n, err) 190 } 191 } 192 tab.mutex.Lock() 193 tab.nursery = make([]*Node, 0, len(nodes)) 194 for _, n := range nodes { 195 cpy := *n 196 // Recompute cpy.sha because the node might not have been 197 // created by NewNode or ParseNode. 198 cpy.sha = crypto.Keccak256Hash(n.ID[:]) 199 tab.nursery = append(tab.nursery, &cpy) 200 } 201 tab.mutex.Unlock() 202 tab.refresh() 203 return nil 204 } 205 206 // Resolve searches for a specific node with the given ID. 207 // It returns nil if the node could not be found. 208 func (tab *Table) Resolve(targetID NodeID) *Node { 209 // If the node is present in the local table, no 210 // network interaction is required. 211 hash := crypto.Keccak256Hash(targetID[:]) 212 tab.mutex.Lock() 213 cl := tab.closest(hash, 1) 214 tab.mutex.Unlock() 215 if len(cl.entries) > 0 && cl.entries[0].ID == targetID { 216 return cl.entries[0] 217 } 218 // Otherwise, do a network lookup. 219 result := tab.Lookup(targetID) 220 for _, n := range result { 221 if n.ID == targetID { 222 return n 223 } 224 } 225 return nil 226 } 227 228 // Lookup performs a network search for nodes close 229 // to the given target. It approaches the target by querying 230 // nodes that are closer to it on each iteration. 231 // The given target does not need to be an actual node 232 // identifier. 233 func (tab *Table) Lookup(targetID NodeID) []*Node { 234 return tab.lookup(targetID, true) 235 } 236 237 func (tab *Table) lookup(targetID NodeID, refreshIfEmpty bool) []*Node { 238 var ( 239 target = crypto.Keccak256Hash(targetID[:]) 240 asked = make(map[NodeID]bool) 241 seen = make(map[NodeID]bool) 242 reply = make(chan []*Node, alpha) 243 pendingQueries = 0 244 result *nodesByDistance 245 ) 246 // don't query further if we hit ourself. 247 // unlikely to happen often in practice. 248 asked[tab.self.ID] = true 249 250 for { 251 tab.mutex.Lock() 252 // generate initial result set 253 result = tab.closest(target, bucketSize) 254 tab.mutex.Unlock() 255 if len(result.entries) > 0 || !refreshIfEmpty { 256 break 257 } 258 // The result set is empty, all nodes were dropped, refresh. 259 // We actually wait for the refresh to complete here. The very 260 // first query will hit this case and run the bootstrapping 261 // logic. 262 <-tab.refresh() 263 refreshIfEmpty = false 264 } 265 266 for { 267 // ask the alpha closest nodes that we haven't asked yet 268 for i := 0; i < len(result.entries) && pendingQueries < alpha; i++ { 269 n := result.entries[i] 270 if !asked[n.ID] { 271 asked[n.ID] = true 272 pendingQueries++ 273 go func() { 274 // Find potential neighbors to bond with 275 r, err := tab.net.findnode(n.ID, n.addr(), targetID) 276 if err != nil { 277 // Bump the failure counter to detect and evacuate non-bonded entries 278 fails := tab.db.findFails(n.ID) + 1 279 tab.db.updateFindFails(n.ID, fails) 280 log.Trace("Bumping findnode failure counter", "id", n.ID, "failcount", fails) 281 282 if fails >= maxFindnodeFailures { 283 log.Trace("Too many findnode failures, dropping", "id", n.ID, "failcount", fails) 284 tab.delete(n) 285 } 286 } 287 reply <- tab.bondall(r) 288 }() 289 } 290 } 291 if pendingQueries == 0 { 292 // we have asked all closest nodes, stop the search 293 break 294 } 295 // wait for the next reply 296 for _, n := range <-reply { 297 if n != nil && !seen[n.ID] { 298 seen[n.ID] = true 299 result.push(n, bucketSize) 300 } 301 } 302 pendingQueries-- 303 } 304 return result.entries 305 } 306 307 func (tab *Table) refresh() <-chan struct{} { 308 done := make(chan struct{}) 309 select { 310 case tab.refreshReq <- done: 311 case <-tab.closed: 312 close(done) 313 } 314 return done 315 } 316 317 // refreshLoop schedules doRefresh runs and coordinates shutdown. 318 func (tab *Table) refreshLoop() { 319 var ( 320 timer = time.NewTicker(autoRefreshInterval) 321 waiting []chan struct{} // accumulates waiting callers while doRefresh runs 322 done chan struct{} // where doRefresh reports completion 323 ) 324 loop: 325 for { 326 select { 327 case <-timer.C: 328 if done == nil { 329 done = make(chan struct{}) 330 go tab.doRefresh(done) 331 } 332 case req := <-tab.refreshReq: 333 waiting = append(waiting, req) 334 if done == nil { 335 done = make(chan struct{}) 336 go tab.doRefresh(done) 337 } 338 case <-done: 339 for _, ch := range waiting { 340 close(ch) 341 } 342 waiting = nil 343 done = nil 344 case <-tab.closeReq: 345 break loop 346 } 347 } 348 349 if tab.net != nil { 350 tab.net.close() 351 } 352 if done != nil { 353 <-done 354 } 355 for _, ch := range waiting { 356 close(ch) 357 } 358 tab.db.close() 359 close(tab.closed) 360 } 361 362 // doRefresh performs a lookup for a random target to keep buckets 363 // full. seed nodes are inserted if the table is empty (initial 364 // bootstrap or discarded faulty peers). 365 func (tab *Table) doRefresh(done chan struct{}) { 366 defer close(done) 367 368 // The Kademlia paper specifies that the bucket refresh should 369 // perform a lookup in the least recently used bucket. We cannot 370 // adhere to this because the findnode target is a 512bit value 371 // (not hash-sized) and it is not easily possible to generate a 372 // sha3 preimage that falls into a chosen bucket. 373 // We perform a lookup with a random target instead. 374 var target NodeID 375 rand.Read(target[:]) 376 result := tab.lookup(target, false) 377 if len(result) > 0 { 378 return 379 } 380 381 // The table is empty. Load nodes from the database and insert 382 // them. This should yield a few previously seen nodes that are 383 // (hopefully) still alive. 384 seeds := tab.db.querySeeds(seedCount, seedMaxAge) 385 seeds = tab.bondall(append(seeds, tab.nursery...)) 386 387 if len(seeds) == 0 { 388 log.Debug("No discv4 seed nodes found") 389 } 390 for _, n := range seeds { 391 age := log.Lazy{Fn: func() time.Duration { return time.Since(tab.db.lastPong(n.ID)) }} 392 log.Trace("Found seed node in database", "id", n.ID, "addr", n.addr(), "age", age) 393 } 394 tab.mutex.Lock() 395 tab.stuff(seeds) 396 tab.mutex.Unlock() 397 398 // Finally, do a self lookup to fill up the buckets. 399 tab.lookup(tab.self.ID, false) 400 } 401 402 // closest returns the n nodes in the table that are closest to the 403 // given id. The caller must hold tab.mutex. 404 func (tab *Table) closest(target common.Hash, nresults int) *nodesByDistance { 405 // This is a very wasteful way to find the closest nodes but 406 // obviously correct. I believe that tree-based buckets would make 407 // this easier to implement efficiently. 408 close := &nodesByDistance{target: target} 409 for _, b := range tab.buckets { 410 for _, n := range b.entries { 411 close.push(n, nresults) 412 } 413 } 414 return close 415 } 416 417 func (tab *Table) len() (n int) { 418 for _, b := range tab.buckets { 419 n += len(b.entries) 420 } 421 return n 422 } 423 424 // bondall bonds with all given nodes concurrently and returns 425 // those nodes for which bonding has probably succeeded. 426 func (tab *Table) bondall(nodes []*Node) (result []*Node) { 427 rc := make(chan *Node, len(nodes)) 428 for i := range nodes { 429 go func(n *Node) { 430 nn, _ := tab.bond(false, n.ID, n.addr(), n.TCP) 431 rc <- nn 432 }(nodes[i]) 433 } 434 for range nodes { 435 if n := <-rc; n != nil { 436 result = append(result, n) 437 } 438 } 439 return result 440 } 441 442 // bond ensures the local node has a bond with the given remote node. 443 // It also attempts to insert the node into the table if bonding succeeds. 444 // The caller must not hold tab.mutex. 445 // 446 // A bond is must be established before sending findnode requests. 447 // Both sides must have completed a ping/pong exchange for a bond to 448 // exist. The total number of active bonding processes is limited in 449 // order to restrain network use. 450 // 451 // bond is meant to operate idempotently in that bonding with a remote 452 // node which still remembers a previously established bond will work. 453 // The remote node will simply not send a ping back, causing waitping 454 // to time out. 455 // 456 // If pinged is true, the remote node has just pinged us and one half 457 // of the process can be skipped. 458 func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) (*Node, error) { 459 if id == tab.self.ID { 460 return nil, errors.New("is self") 461 } 462 // Retrieve a previously known node and any recent findnode failures 463 node, fails := tab.db.node(id), 0 464 if node != nil { 465 fails = tab.db.findFails(id) 466 } 467 // If the node is unknown (non-bonded) or failed (remotely unknown), bond from scratch 468 var result error 469 age := time.Since(tab.db.lastPong(id)) 470 if node == nil || fails > 0 || age > nodeDBNodeExpiration { 471 log.Trace("Starting bonding ping/pong", "id", id, "known", node != nil, "failcount", fails, "age", age) 472 473 tab.bondmu.Lock() 474 w := tab.bonding[id] 475 if w != nil { 476 // Wait for an existing bonding process to complete. 477 tab.bondmu.Unlock() 478 <-w.done 479 } else { 480 // Register a new bonding process. 481 w = &bondproc{done: make(chan struct{})} 482 tab.bonding[id] = w 483 tab.bondmu.Unlock() 484 // Do the ping/pong. The result goes into w. 485 tab.pingpong(w, pinged, id, addr, tcpPort) 486 // Unregister the process after it's done. 487 tab.bondmu.Lock() 488 delete(tab.bonding, id) 489 tab.bondmu.Unlock() 490 } 491 // Retrieve the bonding results 492 result = w.err 493 if result == nil { 494 node = w.n 495 } 496 } 497 if node != nil { 498 // Add the node to the table even if the bonding ping/pong 499 // fails. It will be relaced quickly if it continues to be 500 // unresponsive. 501 tab.add(node) 502 tab.db.updateFindFails(id, 0) 503 } 504 return node, result 505 } 506 507 func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) { 508 // Request a bonding slot to limit network usage 509 <-tab.bondslots 510 defer func() { tab.bondslots <- struct{}{} }() 511 512 // Ping the remote side and wait for a pong. 513 if w.err = tab.ping(id, addr); w.err != nil { 514 close(w.done) 515 return 516 } 517 if !pinged { 518 // Give the remote node a chance to ping us before we start 519 // sending findnode requests. If they still remember us, 520 // waitping will simply time out. 521 tab.net.waitping(id) 522 } 523 // Bonding succeeded, update the node database. 524 w.n = NewNode(id, addr.IP, uint16(addr.Port), tcpPort) 525 tab.db.updateNode(w.n) 526 close(w.done) 527 } 528 529 // ping a remote endpoint and wait for a reply, also updating the node 530 // database accordingly. 531 func (tab *Table) ping(id NodeID, addr *net.UDPAddr) error { 532 tab.db.updateLastPing(id, time.Now()) 533 if err := tab.net.ping(id, addr); err != nil { 534 return err 535 } 536 tab.db.updateLastPong(id, time.Now()) 537 538 // Start the background expiration goroutine after the first 539 // successful communication. Subsequent calls have no effect if it 540 // is already running. We do this here instead of somewhere else 541 // so that the search for seed nodes also considers older nodes 542 // that would otherwise be removed by the expiration. 543 tab.db.ensureExpirer() 544 return nil 545 } 546 547 // add attempts to add the given node its corresponding bucket. If the 548 // bucket has space available, adding the node succeeds immediately. 549 // Otherwise, the node is added if the least recently active node in 550 // the bucket does not respond to a ping packet. 551 // 552 // The caller must not hold tab.mutex. 553 func (tab *Table) add(new *Node) { 554 b := tab.buckets[logdist(tab.self.sha, new.sha)] 555 tab.mutex.Lock() 556 defer tab.mutex.Unlock() 557 if b.bump(new) { 558 return 559 } 560 var oldest *Node 561 if len(b.entries) == bucketSize { 562 oldest = b.entries[bucketSize-1] 563 if oldest.contested { 564 // The node is already being replaced, don't attempt 565 // to replace it. 566 return 567 } 568 oldest.contested = true 569 // Let go of the mutex so other goroutines can access 570 // the table while we ping the least recently active node. 571 tab.mutex.Unlock() 572 err := tab.ping(oldest.ID, oldest.addr()) 573 tab.mutex.Lock() 574 oldest.contested = false 575 if err == nil { 576 // The node responded, don't replace it. 577 return 578 } 579 } 580 added := b.replace(new, oldest) 581 if added && tab.nodeAddedHook != nil { 582 tab.nodeAddedHook(new) 583 } 584 } 585 586 // stuff adds nodes the table to the end of their corresponding bucket 587 // if the bucket is not full. The caller must hold tab.mutex. 588 func (tab *Table) stuff(nodes []*Node) { 589 outer: 590 for _, n := range nodes { 591 if n.ID == tab.self.ID { 592 continue // don't add self 593 } 594 bucket := tab.buckets[logdist(tab.self.sha, n.sha)] 595 for i := range bucket.entries { 596 if bucket.entries[i].ID == n.ID { 597 continue outer // already in bucket 598 } 599 } 600 if len(bucket.entries) < bucketSize { 601 bucket.entries = append(bucket.entries, n) 602 if tab.nodeAddedHook != nil { 603 tab.nodeAddedHook(n) 604 } 605 } 606 } 607 } 608 609 // delete removes an entry from the node table (used to evacuate 610 // failed/non-bonded discovery peers). 611 func (tab *Table) delete(node *Node) { 612 tab.mutex.Lock() 613 defer tab.mutex.Unlock() 614 bucket := tab.buckets[logdist(tab.self.sha, node.sha)] 615 for i := range bucket.entries { 616 if bucket.entries[i].ID == node.ID { 617 bucket.entries = append(bucket.entries[:i], bucket.entries[i+1:]...) 618 return 619 } 620 } 621 } 622 623 func (b *bucket) replace(n *Node, last *Node) bool { 624 // Don't add if b already contains n. 625 for i := range b.entries { 626 if b.entries[i].ID == n.ID { 627 return false 628 } 629 } 630 // Replace last if it is still the last entry or just add n if b 631 // isn't full. If is no longer the last entry, it has either been 632 // replaced with someone else or became active. 633 if len(b.entries) == bucketSize && (last == nil || b.entries[bucketSize-1].ID != last.ID) { 634 return false 635 } 636 if len(b.entries) < bucketSize { 637 b.entries = append(b.entries, nil) 638 } 639 copy(b.entries[1:], b.entries) 640 b.entries[0] = n 641 return true 642 } 643 644 func (b *bucket) bump(n *Node) bool { 645 for i := range b.entries { 646 if b.entries[i].ID == n.ID { 647 // move it to the front 648 copy(b.entries[1:], b.entries[:i]) 649 b.entries[0] = n 650 return true 651 } 652 } 653 return false 654 } 655 656 // nodesByDistance is a list of nodes, ordered by 657 // distance to target. 658 type nodesByDistance struct { 659 entries []*Node 660 target common.Hash 661 } 662 663 // push adds the given node to the list, keeping the total size below maxElems. 664 func (h *nodesByDistance) push(n *Node, maxElems int) { 665 ix := sort.Search(len(h.entries), func(i int) bool { 666 return distcmp(h.target, h.entries[i].sha, n.sha) > 0 667 }) 668 if len(h.entries) < maxElems { 669 h.entries = append(h.entries, n) 670 } 671 if ix == len(h.entries) { 672 // farther away than all nodes we already have. 673 // if there was room for it, the node is now the last element. 674 } else { 675 // slide existing entries down to make room 676 // this will overwrite the entry we just appended. 677 copy(h.entries[ix+1:], h.entries[ix:]) 678 h.entries[ix] = n 679 } 680 }