github.com/murrekatt/go-ethereum@v1.5.8-0.20170123175102-fc52f2c007fb/p2p/discover/table.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package discover implements the Node Discovery Protocol. 18 // 19 // The Node Discovery protocol provides a way to find RLPx nodes that 20 // can be connected to. It uses a Kademlia-like protocol to maintain a 21 // distributed database of the IDs and endpoints of all listening 22 // nodes. 23 package discover 24 25 import ( 26 "crypto/rand" 27 "encoding/binary" 28 "errors" 29 "fmt" 30 "net" 31 "sort" 32 "sync" 33 "time" 34 35 "github.com/ethereum/go-ethereum/common" 36 "github.com/ethereum/go-ethereum/crypto" 37 "github.com/ethereum/go-ethereum/logger" 38 "github.com/ethereum/go-ethereum/logger/glog" 39 ) 40 41 const ( 42 alpha = 3 // Kademlia concurrency factor 43 bucketSize = 16 // Kademlia bucket size 44 hashBits = len(common.Hash{}) * 8 45 nBuckets = hashBits + 1 // Number of buckets 46 47 maxBondingPingPongs = 16 48 maxFindnodeFailures = 5 49 50 autoRefreshInterval = 1 * time.Hour 51 seedCount = 30 52 seedMaxAge = 5 * 24 * time.Hour 53 ) 54 55 type Table struct { 56 mutex sync.Mutex // protects buckets, their content, and nursery 57 buckets [nBuckets]*bucket // index of known nodes by distance 58 nursery []*Node // bootstrap nodes 59 db *nodeDB // database of known nodes 60 61 refreshReq chan chan struct{} 62 closeReq chan struct{} 63 closed chan struct{} 64 65 bondmu sync.Mutex 66 bonding map[NodeID]*bondproc 67 bondslots chan struct{} // limits total number of active bonding processes 68 69 nodeAddedHook func(*Node) // for testing 70 71 net transport 72 self *Node // metadata of the local node 73 } 74 75 type bondproc struct { 76 err error 77 n *Node 78 done chan struct{} 79 } 80 81 // transport is implemented by the UDP transport. 82 // it is an interface so we can test without opening lots of UDP 83 // sockets and without generating a private key. 84 type transport interface { 85 ping(NodeID, *net.UDPAddr) error 86 waitping(NodeID) error 87 findnode(toid NodeID, addr *net.UDPAddr, target NodeID) ([]*Node, error) 88 close() 89 } 90 91 // bucket contains nodes, ordered by their last activity. the entry 92 // that was most recently active is the first element in entries. 93 type bucket struct{ entries []*Node } 94 95 func newTable(t transport, ourID NodeID, ourAddr *net.UDPAddr, nodeDBPath string) (*Table, error) { 96 // If no node database was given, use an in-memory one 97 db, err := newNodeDB(nodeDBPath, Version, ourID) 98 if err != nil { 99 return nil, err 100 } 101 tab := &Table{ 102 net: t, 103 db: db, 104 self: NewNode(ourID, ourAddr.IP, uint16(ourAddr.Port), uint16(ourAddr.Port)), 105 bonding: make(map[NodeID]*bondproc), 106 bondslots: make(chan struct{}, maxBondingPingPongs), 107 refreshReq: make(chan chan struct{}), 108 closeReq: make(chan struct{}), 109 closed: make(chan struct{}), 110 } 111 for i := 0; i < cap(tab.bondslots); i++ { 112 tab.bondslots <- struct{}{} 113 } 114 for i := range tab.buckets { 115 tab.buckets[i] = new(bucket) 116 } 117 go tab.refreshLoop() 118 return tab, nil 119 } 120 121 // Self returns the local node. 122 // The returned node should not be modified by the caller. 123 func (tab *Table) Self() *Node { 124 return tab.self 125 } 126 127 // ReadRandomNodes fills the given slice with random nodes from the 128 // table. It will not write the same node more than once. The nodes in 129 // the slice are copies and can be modified by the caller. 130 func (tab *Table) ReadRandomNodes(buf []*Node) (n int) { 131 tab.mutex.Lock() 132 defer tab.mutex.Unlock() 133 // TODO: tree-based buckets would help here 134 // Find all non-empty buckets and get a fresh slice of their entries. 135 var buckets [][]*Node 136 for _, b := range tab.buckets { 137 if len(b.entries) > 0 { 138 buckets = append(buckets, b.entries[:]) 139 } 140 } 141 if len(buckets) == 0 { 142 return 0 143 } 144 // Shuffle the buckets. 145 for i := uint32(len(buckets)) - 1; i > 0; i-- { 146 j := randUint(i) 147 buckets[i], buckets[j] = buckets[j], buckets[i] 148 } 149 // Move head of each bucket into buf, removing buckets that become empty. 150 var i, j int 151 for ; i < len(buf); i, j = i+1, (j+1)%len(buckets) { 152 b := buckets[j] 153 buf[i] = &(*b[0]) 154 buckets[j] = b[1:] 155 if len(b) == 1 { 156 buckets = append(buckets[:j], buckets[j+1:]...) 157 } 158 if len(buckets) == 0 { 159 break 160 } 161 } 162 return i + 1 163 } 164 165 func randUint(max uint32) uint32 { 166 if max == 0 { 167 return 0 168 } 169 var b [4]byte 170 rand.Read(b[:]) 171 return binary.BigEndian.Uint32(b[:]) % max 172 } 173 174 // Close terminates the network listener and flushes the node database. 175 func (tab *Table) Close() { 176 select { 177 case <-tab.closed: 178 // already closed. 179 case tab.closeReq <- struct{}{}: 180 <-tab.closed // wait for refreshLoop to end. 181 } 182 } 183 184 // SetFallbackNodes sets the initial points of contact. These nodes 185 // are used to connect to the network if the table is empty and there 186 // are no known nodes in the database. 187 func (tab *Table) SetFallbackNodes(nodes []*Node) error { 188 for _, n := range nodes { 189 if err := n.validateComplete(); err != nil { 190 return fmt.Errorf("bad bootstrap/fallback node %q (%v)", n, err) 191 } 192 } 193 tab.mutex.Lock() 194 tab.nursery = make([]*Node, 0, len(nodes)) 195 for _, n := range nodes { 196 cpy := *n 197 // Recompute cpy.sha because the node might not have been 198 // created by NewNode or ParseNode. 199 cpy.sha = crypto.Keccak256Hash(n.ID[:]) 200 tab.nursery = append(tab.nursery, &cpy) 201 } 202 tab.mutex.Unlock() 203 tab.refresh() 204 return nil 205 } 206 207 // Resolve searches for a specific node with the given ID. 208 // It returns nil if the node could not be found. 209 func (tab *Table) Resolve(targetID NodeID) *Node { 210 // If the node is present in the local table, no 211 // network interaction is required. 212 hash := crypto.Keccak256Hash(targetID[:]) 213 tab.mutex.Lock() 214 cl := tab.closest(hash, 1) 215 tab.mutex.Unlock() 216 if len(cl.entries) > 0 && cl.entries[0].ID == targetID { 217 return cl.entries[0] 218 } 219 // Otherwise, do a network lookup. 220 result := tab.Lookup(targetID) 221 for _, n := range result { 222 if n.ID == targetID { 223 return n 224 } 225 } 226 return nil 227 } 228 229 // Lookup performs a network search for nodes close 230 // to the given target. It approaches the target by querying 231 // nodes that are closer to it on each iteration. 232 // The given target does not need to be an actual node 233 // identifier. 234 func (tab *Table) Lookup(targetID NodeID) []*Node { 235 return tab.lookup(targetID, true) 236 } 237 238 func (tab *Table) lookup(targetID NodeID, refreshIfEmpty bool) []*Node { 239 var ( 240 target = crypto.Keccak256Hash(targetID[:]) 241 asked = make(map[NodeID]bool) 242 seen = make(map[NodeID]bool) 243 reply = make(chan []*Node, alpha) 244 pendingQueries = 0 245 result *nodesByDistance 246 ) 247 // don't query further if we hit ourself. 248 // unlikely to happen often in practice. 249 asked[tab.self.ID] = true 250 251 for { 252 tab.mutex.Lock() 253 // generate initial result set 254 result = tab.closest(target, bucketSize) 255 tab.mutex.Unlock() 256 if len(result.entries) > 0 || !refreshIfEmpty { 257 break 258 } 259 // The result set is empty, all nodes were dropped, refresh. 260 // We actually wait for the refresh to complete here. The very 261 // first query will hit this case and run the bootstrapping 262 // logic. 263 <-tab.refresh() 264 refreshIfEmpty = false 265 } 266 267 for { 268 // ask the alpha closest nodes that we haven't asked yet 269 for i := 0; i < len(result.entries) && pendingQueries < alpha; i++ { 270 n := result.entries[i] 271 if !asked[n.ID] { 272 asked[n.ID] = true 273 pendingQueries++ 274 go func() { 275 // Find potential neighbors to bond with 276 r, err := tab.net.findnode(n.ID, n.addr(), targetID) 277 if err != nil { 278 // Bump the failure counter to detect and evacuate non-bonded entries 279 fails := tab.db.findFails(n.ID) + 1 280 tab.db.updateFindFails(n.ID, fails) 281 glog.V(logger.Detail).Infof("Bumping failures for %x: %d", n.ID[:8], fails) 282 283 if fails >= maxFindnodeFailures { 284 glog.V(logger.Detail).Infof("Evacuating node %x: %d findnode failures", n.ID[:8], fails) 285 tab.delete(n) 286 } 287 } 288 reply <- tab.bondall(r) 289 }() 290 } 291 } 292 if pendingQueries == 0 { 293 // we have asked all closest nodes, stop the search 294 break 295 } 296 // wait for the next reply 297 for _, n := range <-reply { 298 if n != nil && !seen[n.ID] { 299 seen[n.ID] = true 300 result.push(n, bucketSize) 301 } 302 } 303 pendingQueries-- 304 } 305 return result.entries 306 } 307 308 func (tab *Table) refresh() <-chan struct{} { 309 done := make(chan struct{}) 310 select { 311 case tab.refreshReq <- done: 312 case <-tab.closed: 313 close(done) 314 } 315 return done 316 } 317 318 // refreshLoop schedules doRefresh runs and coordinates shutdown. 319 func (tab *Table) refreshLoop() { 320 var ( 321 timer = time.NewTicker(autoRefreshInterval) 322 waiting []chan struct{} // accumulates waiting callers while doRefresh runs 323 done chan struct{} // where doRefresh reports completion 324 ) 325 loop: 326 for { 327 select { 328 case <-timer.C: 329 if done == nil { 330 done = make(chan struct{}) 331 go tab.doRefresh(done) 332 } 333 case req := <-tab.refreshReq: 334 waiting = append(waiting, req) 335 if done == nil { 336 done = make(chan struct{}) 337 go tab.doRefresh(done) 338 } 339 case <-done: 340 for _, ch := range waiting { 341 close(ch) 342 } 343 waiting = nil 344 done = nil 345 case <-tab.closeReq: 346 break loop 347 } 348 } 349 350 if tab.net != nil { 351 tab.net.close() 352 } 353 if done != nil { 354 <-done 355 } 356 for _, ch := range waiting { 357 close(ch) 358 } 359 tab.db.close() 360 close(tab.closed) 361 } 362 363 // doRefresh performs a lookup for a random target to keep buckets 364 // full. seed nodes are inserted if the table is empty (initial 365 // bootstrap or discarded faulty peers). 366 func (tab *Table) doRefresh(done chan struct{}) { 367 defer close(done) 368 369 // The Kademlia paper specifies that the bucket refresh should 370 // perform a lookup in the least recently used bucket. We cannot 371 // adhere to this because the findnode target is a 512bit value 372 // (not hash-sized) and it is not easily possible to generate a 373 // sha3 preimage that falls into a chosen bucket. 374 // We perform a lookup with a random target instead. 375 var target NodeID 376 rand.Read(target[:]) 377 result := tab.lookup(target, false) 378 if len(result) > 0 { 379 return 380 } 381 382 // The table is empty. Load nodes from the database and insert 383 // them. This should yield a few previously seen nodes that are 384 // (hopefully) still alive. 385 seeds := tab.db.querySeeds(seedCount, seedMaxAge) 386 seeds = tab.bondall(append(seeds, tab.nursery...)) 387 if glog.V(logger.Debug) { 388 if len(seeds) == 0 { 389 glog.Infof("no seed nodes found") 390 } 391 for _, n := range seeds { 392 age := time.Since(tab.db.lastPong(n.ID)) 393 glog.Infof("seed node (age %v): %v", age, n) 394 } 395 } 396 tab.mutex.Lock() 397 tab.stuff(seeds) 398 tab.mutex.Unlock() 399 400 // Finally, do a self lookup to fill up the buckets. 401 tab.lookup(tab.self.ID, false) 402 } 403 404 // closest returns the n nodes in the table that are closest to the 405 // given id. The caller must hold tab.mutex. 406 func (tab *Table) closest(target common.Hash, nresults int) *nodesByDistance { 407 // This is a very wasteful way to find the closest nodes but 408 // obviously correct. I believe that tree-based buckets would make 409 // this easier to implement efficiently. 410 close := &nodesByDistance{target: target} 411 for _, b := range tab.buckets { 412 for _, n := range b.entries { 413 close.push(n, nresults) 414 } 415 } 416 return close 417 } 418 419 func (tab *Table) len() (n int) { 420 for _, b := range tab.buckets { 421 n += len(b.entries) 422 } 423 return n 424 } 425 426 // bondall bonds with all given nodes concurrently and returns 427 // those nodes for which bonding has probably succeeded. 428 func (tab *Table) bondall(nodes []*Node) (result []*Node) { 429 rc := make(chan *Node, len(nodes)) 430 for i := range nodes { 431 go func(n *Node) { 432 nn, _ := tab.bond(false, n.ID, n.addr(), uint16(n.TCP)) 433 rc <- nn 434 }(nodes[i]) 435 } 436 for range nodes { 437 if n := <-rc; n != nil { 438 result = append(result, n) 439 } 440 } 441 return result 442 } 443 444 // bond ensures the local node has a bond with the given remote node. 445 // It also attempts to insert the node into the table if bonding succeeds. 446 // The caller must not hold tab.mutex. 447 // 448 // A bond is must be established before sending findnode requests. 449 // Both sides must have completed a ping/pong exchange for a bond to 450 // exist. The total number of active bonding processes is limited in 451 // order to restrain network use. 452 // 453 // bond is meant to operate idempotently in that bonding with a remote 454 // node which still remembers a previously established bond will work. 455 // The remote node will simply not send a ping back, causing waitping 456 // to time out. 457 // 458 // If pinged is true, the remote node has just pinged us and one half 459 // of the process can be skipped. 460 func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) (*Node, error) { 461 if id == tab.self.ID { 462 return nil, errors.New("is self") 463 } 464 // Retrieve a previously known node and any recent findnode failures 465 node, fails := tab.db.node(id), 0 466 if node != nil { 467 fails = tab.db.findFails(id) 468 } 469 // If the node is unknown (non-bonded) or failed (remotely unknown), bond from scratch 470 var result error 471 age := time.Since(tab.db.lastPong(id)) 472 if node == nil || fails > 0 || age > nodeDBNodeExpiration { 473 glog.V(logger.Detail).Infof("Bonding %x: known=%t, fails=%d age=%v", id[:8], node != nil, fails, age) 474 475 tab.bondmu.Lock() 476 w := tab.bonding[id] 477 if w != nil { 478 // Wait for an existing bonding process to complete. 479 tab.bondmu.Unlock() 480 <-w.done 481 } else { 482 // Register a new bonding process. 483 w = &bondproc{done: make(chan struct{})} 484 tab.bonding[id] = w 485 tab.bondmu.Unlock() 486 // Do the ping/pong. The result goes into w. 487 tab.pingpong(w, pinged, id, addr, tcpPort) 488 // Unregister the process after it's done. 489 tab.bondmu.Lock() 490 delete(tab.bonding, id) 491 tab.bondmu.Unlock() 492 } 493 // Retrieve the bonding results 494 result = w.err 495 if result == nil { 496 node = w.n 497 } 498 } 499 if node != nil { 500 // Add the node to the table even if the bonding ping/pong 501 // fails. It will be relaced quickly if it continues to be 502 // unresponsive. 503 tab.add(node) 504 tab.db.updateFindFails(id, 0) 505 } 506 return node, result 507 } 508 509 func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) { 510 // Request a bonding slot to limit network usage 511 <-tab.bondslots 512 defer func() { tab.bondslots <- struct{}{} }() 513 514 // Ping the remote side and wait for a pong. 515 if w.err = tab.ping(id, addr); w.err != nil { 516 close(w.done) 517 return 518 } 519 if !pinged { 520 // Give the remote node a chance to ping us before we start 521 // sending findnode requests. If they still remember us, 522 // waitping will simply time out. 523 tab.net.waitping(id) 524 } 525 // Bonding succeeded, update the node database. 526 w.n = NewNode(id, addr.IP, uint16(addr.Port), tcpPort) 527 tab.db.updateNode(w.n) 528 close(w.done) 529 } 530 531 // ping a remote endpoint and wait for a reply, also updating the node 532 // database accordingly. 533 func (tab *Table) ping(id NodeID, addr *net.UDPAddr) error { 534 tab.db.updateLastPing(id, time.Now()) 535 if err := tab.net.ping(id, addr); err != nil { 536 return err 537 } 538 tab.db.updateLastPong(id, time.Now()) 539 540 // Start the background expiration goroutine after the first 541 // successful communication. Subsequent calls have no effect if it 542 // is already running. We do this here instead of somewhere else 543 // so that the search for seed nodes also considers older nodes 544 // that would otherwise be removed by the expiration. 545 tab.db.ensureExpirer() 546 return nil 547 } 548 549 // add attempts to add the given node its corresponding bucket. If the 550 // bucket has space available, adding the node succeeds immediately. 551 // Otherwise, the node is added if the least recently active node in 552 // the bucket does not respond to a ping packet. 553 // 554 // The caller must not hold tab.mutex. 555 func (tab *Table) add(new *Node) { 556 b := tab.buckets[logdist(tab.self.sha, new.sha)] 557 tab.mutex.Lock() 558 defer tab.mutex.Unlock() 559 if b.bump(new) { 560 return 561 } 562 var oldest *Node 563 if len(b.entries) == bucketSize { 564 oldest = b.entries[bucketSize-1] 565 if oldest.contested { 566 // The node is already being replaced, don't attempt 567 // to replace it. 568 return 569 } 570 oldest.contested = true 571 // Let go of the mutex so other goroutines can access 572 // the table while we ping the least recently active node. 573 tab.mutex.Unlock() 574 err := tab.ping(oldest.ID, oldest.addr()) 575 tab.mutex.Lock() 576 oldest.contested = false 577 if err == nil { 578 // The node responded, don't replace it. 579 return 580 } 581 } 582 added := b.replace(new, oldest) 583 if added && tab.nodeAddedHook != nil { 584 tab.nodeAddedHook(new) 585 } 586 } 587 588 // stuff adds nodes the table to the end of their corresponding bucket 589 // if the bucket is not full. The caller must hold tab.mutex. 590 func (tab *Table) stuff(nodes []*Node) { 591 outer: 592 for _, n := range nodes { 593 if n.ID == tab.self.ID { 594 continue // don't add self 595 } 596 bucket := tab.buckets[logdist(tab.self.sha, n.sha)] 597 for i := range bucket.entries { 598 if bucket.entries[i].ID == n.ID { 599 continue outer // already in bucket 600 } 601 } 602 if len(bucket.entries) < bucketSize { 603 bucket.entries = append(bucket.entries, n) 604 if tab.nodeAddedHook != nil { 605 tab.nodeAddedHook(n) 606 } 607 } 608 } 609 } 610 611 // delete removes an entry from the node table (used to evacuate 612 // failed/non-bonded discovery peers). 613 func (tab *Table) delete(node *Node) { 614 tab.mutex.Lock() 615 defer tab.mutex.Unlock() 616 bucket := tab.buckets[logdist(tab.self.sha, node.sha)] 617 for i := range bucket.entries { 618 if bucket.entries[i].ID == node.ID { 619 bucket.entries = append(bucket.entries[:i], bucket.entries[i+1:]...) 620 return 621 } 622 } 623 } 624 625 func (b *bucket) replace(n *Node, last *Node) bool { 626 // Don't add if b already contains n. 627 for i := range b.entries { 628 if b.entries[i].ID == n.ID { 629 return false 630 } 631 } 632 // Replace last if it is still the last entry or just add n if b 633 // isn't full. If is no longer the last entry, it has either been 634 // replaced with someone else or became active. 635 if len(b.entries) == bucketSize && (last == nil || b.entries[bucketSize-1].ID != last.ID) { 636 return false 637 } 638 if len(b.entries) < bucketSize { 639 b.entries = append(b.entries, nil) 640 } 641 copy(b.entries[1:], b.entries) 642 b.entries[0] = n 643 return true 644 } 645 646 func (b *bucket) bump(n *Node) bool { 647 for i := range b.entries { 648 if b.entries[i].ID == n.ID { 649 // move it to the front 650 copy(b.entries[1:], b.entries[:i]) 651 b.entries[0] = n 652 return true 653 } 654 } 655 return false 656 } 657 658 // nodesByDistance is a list of nodes, ordered by 659 // distance to target. 660 type nodesByDistance struct { 661 entries []*Node 662 target common.Hash 663 } 664 665 // push adds the given node to the list, keeping the total size below maxElems. 666 func (h *nodesByDistance) push(n *Node, maxElems int) { 667 ix := sort.Search(len(h.entries), func(i int) bool { 668 return distcmp(h.target, h.entries[i].sha, n.sha) > 0 669 }) 670 if len(h.entries) < maxElems { 671 h.entries = append(h.entries, n) 672 } 673 if ix == len(h.entries) { 674 // farther away than all nodes we already have. 675 // if there was room for it, the node is now the last element. 676 } else { 677 // slide existing entries down to make room 678 // this will overwrite the entry we just appended. 679 copy(h.entries[ix+1:], h.entries[ix:]) 680 h.entries[ix] = n 681 } 682 }