github.com/halybang/go-ethereum@v1.0.5-0.20180325041310-3b262bc1367c/p2p/discover/table.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package discover implements the Node Discovery Protocol. 18 // 19 // The Node Discovery protocol provides a way to find RLPx nodes that 20 // can be connected to. It uses a Kademlia-like protocol to maintain a 21 // distributed database of the IDs and endpoints of all listening 22 // nodes. 23 package discover 24 25 import ( 26 "crypto/rand" 27 "encoding/binary" 28 "errors" 29 "fmt" 30 "net" 31 "sort" 32 "sync" 33 "time" 34 35 "github.com/wanchain/go-wanchain/common" 36 "github.com/wanchain/go-wanchain/crypto" 37 "github.com/wanchain/go-wanchain/log" 38 ) 39 40 const ( 41 alpha = 3 // Kademlia concurrency factor 42 bucketSize = 16 // Kademlia bucket size 43 hashBits = len(common.Hash{}) * 8 44 nBuckets = hashBits + 1 // Number of buckets 45 46 maxBondingPingPongs = 16 47 maxFindnodeFailures = 5 48 49 autoRefreshInterval = 1 * time.Hour 50 seedCount = 30 51 seedMaxAge = 5 * 24 * time.Hour 52 ) 53 54 type Table struct { 55 mutex sync.Mutex // protects buckets, their content, and nursery 56 buckets [nBuckets]*bucket // index of known nodes by distance 57 nursery []*Node // bootstrap nodes 58 db *nodeDB // database of known nodes 59 60 refreshReq chan chan struct{} 61 closeReq chan struct{} 62 closed chan struct{} 63 64 bondmu sync.Mutex 65 bonding map[NodeID]*bondproc 66 bondslots chan struct{} // limits total number of active bonding processes 67 68 nodeAddedHook func(*Node) // for testing 69 70 net transport 71 self *Node // metadata of the local node 72 } 73 74 type bondproc struct { 75 err error 76 n *Node 77 done chan struct{} 78 } 79 80 // transport is implemented by the UDP transport. 81 // it is an interface so we can test without opening lots of UDP 82 // sockets and without generating a private key. 83 type transport interface { 84 ping(NodeID, *net.UDPAddr) error 85 waitping(NodeID) error 86 findnode(toid NodeID, addr *net.UDPAddr, target NodeID) ([]*Node, error) 87 close() 88 } 89 90 // bucket contains nodes, ordered by their last activity. the entry 91 // that was most recently active is the first element in entries. 92 type bucket struct{ entries []*Node } 93 94 func newTable(t transport, ourID NodeID, ourAddr *net.UDPAddr, nodeDBPath string) (*Table, error) { 95 // If no node database was given, use an in-memory one 96 db, err := newNodeDB(nodeDBPath, Version, ourID) 97 if err != nil { 98 return nil, err 99 } 100 tab := &Table{ 101 net: t, 102 db: db, 103 self: NewNode(ourID, ourAddr.IP, uint16(ourAddr.Port), uint16(ourAddr.Port)), 104 bonding: make(map[NodeID]*bondproc), 105 bondslots: make(chan struct{}, maxBondingPingPongs), 106 refreshReq: make(chan chan struct{}), 107 closeReq: make(chan struct{}), 108 closed: make(chan struct{}), 109 } 110 for i := 0; i < cap(tab.bondslots); i++ { 111 tab.bondslots <- struct{}{} 112 } 113 for i := range tab.buckets { 114 tab.buckets[i] = new(bucket) 115 } 116 go tab.refreshLoop() 117 return tab, nil 118 } 119 120 // Self returns the local node. 121 // The returned node should not be modified by the caller. 122 func (tab *Table) Self() *Node { 123 return tab.self 124 } 125 126 // ReadRandomNodes fills the given slice with random nodes from the 127 // table. It will not write the same node more than once. The nodes in 128 // the slice are copies and can be modified by the caller. 129 func (tab *Table) ReadRandomNodes(buf []*Node) (n int) { 130 tab.mutex.Lock() 131 defer tab.mutex.Unlock() 132 // TODO: tree-based buckets would help here 133 // Find all non-empty buckets and get a fresh slice of their entries. 134 var buckets [][]*Node 135 for _, b := range tab.buckets { 136 if len(b.entries) > 0 { 137 buckets = append(buckets, b.entries[:]) 138 } 139 } 140 if len(buckets) == 0 { 141 return 0 142 } 143 // Shuffle the buckets. 144 for i := uint32(len(buckets)) - 1; i > 0; i-- { 145 j := randUint(i) 146 buckets[i], buckets[j] = buckets[j], buckets[i] 147 } 148 // Move head of each bucket into buf, removing buckets that become empty. 149 var i, j int 150 for ; i < len(buf); i, j = i+1, (j+1)%len(buckets) { 151 b := buckets[j] 152 buf[i] = &(*b[0]) 153 buckets[j] = b[1:] 154 if len(b) == 1 { 155 buckets = append(buckets[:j], buckets[j+1:]...) 156 } 157 if len(buckets) == 0 { 158 break 159 } 160 } 161 return i + 1 162 } 163 164 func randUint(max uint32) uint32 { 165 if max == 0 { 166 return 0 167 } 168 var b [4]byte 169 rand.Read(b[:]) 170 return binary.BigEndian.Uint32(b[:]) % max 171 } 172 173 // Close terminates the network listener and flushes the node database. 174 func (tab *Table) Close() { 175 select { 176 case <-tab.closed: 177 // already closed. 178 case tab.closeReq <- struct{}{}: 179 <-tab.closed // wait for refreshLoop to end. 180 } 181 } 182 183 // SetFallbackNodes sets the initial points of contact. These nodes 184 // are used to connect to the network if the table is empty and there 185 // are no known nodes in the database. 186 func (tab *Table) SetFallbackNodes(nodes []*Node) error { 187 for _, n := range nodes { 188 if err := n.validateComplete(); err != nil { 189 return fmt.Errorf("bad bootstrap/fallback node %q (%v)", n, err) 190 } 191 } 192 tab.mutex.Lock() 193 tab.nursery = make([]*Node, 0, len(nodes)) 194 for _, n := range nodes { 195 cpy := *n 196 // Recompute cpy.sha because the node might not have been 197 // created by NewNode or ParseNode. 198 cpy.sha = crypto.Keccak256Hash(n.ID[:]) 199 tab.nursery = append(tab.nursery, &cpy) 200 } 201 tab.mutex.Unlock() 202 tab.refresh() 203 return nil 204 } 205 206 // Resolve searches for a specific node with the given ID. 207 // It returns nil if the node could not be found. 208 func (tab *Table) Resolve(targetID NodeID) *Node { 209 // If the node is present in the local table, no 210 // network interaction is required. 211 hash := crypto.Keccak256Hash(targetID[:]) 212 tab.mutex.Lock() 213 cl := tab.closest(hash, 1) 214 tab.mutex.Unlock() 215 if len(cl.entries) > 0 && cl.entries[0].ID == targetID { 216 return cl.entries[0] 217 } 218 // Otherwise, do a network lookup. 219 result := tab.Lookup(targetID) 220 for _, n := range result { 221 if n.ID == targetID { 222 return n 223 } 224 } 225 return nil 226 } 227 228 // Lookup performs a network search for nodes close 229 // to the given target. It approaches the target by querying 230 // nodes that are closer to it on each iteration. 231 // The given target does not need to be an actual node 232 // identifier. 233 func (tab *Table) Lookup(targetID NodeID) []*Node { 234 return tab.lookup(targetID, true) 235 } 236 237 func (tab *Table) lookup(targetID NodeID, refreshIfEmpty bool) []*Node { 238 var ( 239 target = crypto.Keccak256Hash(targetID[:]) 240 asked = make(map[NodeID]bool) 241 seen = make(map[NodeID]bool) 242 reply = make(chan []*Node, alpha) 243 pendingQueries = 0 244 result *nodesByDistance 245 ) 246 // don't query further if we hit ourself. 247 // unlikely to happen often in practice. 248 asked[tab.self.ID] = true 249 250 for { 251 tab.mutex.Lock() 252 // generate initial result set 253 result = tab.closest(target, bucketSize) 254 tab.mutex.Unlock() 255 if len(result.entries) > 0 || !refreshIfEmpty { 256 break 257 } 258 // The result set is empty, all nodes were dropped, refresh. 259 // We actually wait for the refresh to complete here. The very 260 // first query will hit this case and run the bootstrapping 261 // logic. 262 <-tab.refresh() 263 refreshIfEmpty = false 264 } 265 266 for { 267 // ask the alpha closest nodes that we haven't asked yet 268 for i := 0; i < len(result.entries) && pendingQueries < alpha; i++ { 269 270 n := result.entries[i] 271 272 if !asked[n.ID] { 273 asked[n.ID] = true 274 pendingQueries++ 275 go func() { 276 // Find potential neighbors to bond with 277 r, err := tab.net.findnode(n.ID, n.addr(), targetID) 278 if err != nil { 279 // Bump the failure counter to detect and evacuate non-bonded entries 280 fails := tab.db.findFails(n.ID) + 1 281 tab.db.updateFindFails(n.ID, fails) 282 log.Trace("Bumping findnode failure counter", "id", n.ID, "failcount", fails) 283 284 if fails >= maxFindnodeFailures { 285 log.Trace("Too many findnode failures, dropping", "id", n.ID, "failcount", fails) 286 tab.delete(n) 287 } 288 } 289 reply <- tab.bondall(r) 290 }() 291 } 292 } 293 if pendingQueries == 0 { 294 // we have asked all closest nodes, stop the search 295 break 296 } 297 // wait for the next reply 298 for _, n := range <-reply { 299 if n != nil && !seen[n.ID] { 300 seen[n.ID] = true 301 result.push(n, bucketSize) 302 } 303 } 304 pendingQueries-- 305 } 306 return result.entries 307 } 308 309 func (tab *Table) refresh() <-chan struct{} { 310 done := make(chan struct{}) 311 select { 312 case tab.refreshReq <- done: 313 case <-tab.closed: 314 close(done) 315 } 316 return done 317 } 318 319 // refreshLoop schedules doRefresh runs and coordinates shutdown. 320 func (tab *Table) refreshLoop() { 321 var ( 322 timer = time.NewTicker(autoRefreshInterval) 323 waiting []chan struct{} // accumulates waiting callers while doRefresh runs 324 done chan struct{} // where doRefresh reports completion 325 ) 326 loop: 327 for { 328 select { 329 case <-timer.C: 330 if done == nil { 331 done = make(chan struct{}) 332 go tab.doRefresh(done) 333 } 334 case req := <-tab.refreshReq: 335 waiting = append(waiting, req) 336 if done == nil { 337 done = make(chan struct{}) 338 go tab.doRefresh(done) 339 } 340 case <-done: 341 for _, ch := range waiting { 342 close(ch) 343 } 344 waiting = nil 345 done = nil 346 case <-tab.closeReq: 347 break loop 348 } 349 } 350 351 if tab.net != nil { 352 tab.net.close() 353 } 354 if done != nil { 355 <-done 356 } 357 for _, ch := range waiting { 358 close(ch) 359 } 360 tab.db.close() 361 close(tab.closed) 362 } 363 364 // doRefresh performs a lookup for a random target to keep buckets 365 // full. seed nodes are inserted if the table is empty (initial 366 // bootstrap or discarded faulty peers). 367 func (tab *Table) doRefresh(done chan struct{}) { 368 defer close(done) 369 370 // The Kademlia paper specifies that the bucket refresh should 371 // perform a lookup in the least recently used bucket. We cannot 372 // adhere to this because the findnode target is a 512bit value 373 // (not hash-sized) and it is not easily possible to generate a 374 // sha3 preimage that falls into a chosen bucket. 375 // We perform a lookup with a random target instead. 376 var target NodeID 377 rand.Read(target[:]) 378 result := tab.lookup(target, false) 379 if len(result) > 0 { 380 return 381 } 382 383 // The table is empty. Load nodes from the database and insert 384 // them. This should yield a few previously seen nodes that are 385 // (hopefully) still alive. 386 seeds := tab.db.querySeeds(seedCount, seedMaxAge) 387 388 389 390 seeds = tab.bondall(append(seeds, tab.nursery...)) 391 392 if len(seeds) == 0 { 393 log.Debug("No discv4 seed nodes found") 394 } 395 for _, n := range seeds { 396 age := log.Lazy{Fn: func() time.Duration { return time.Since(tab.db.lastPong(n.ID)) }} 397 log.Trace("Found seed node in database", "id", n.ID, "addr", n.addr(), "age", age) 398 } 399 tab.mutex.Lock() 400 tab.stuff(seeds) 401 tab.mutex.Unlock() 402 403 // Finally, do a self lookup to fill up the buckets. 404 tab.lookup(tab.self.ID, false) 405 } 406 407 // closest returns the n nodes in the table that are closest to the 408 // given id. The caller must hold tab.mutex. 409 func (tab *Table) closest(target common.Hash, nresults int) *nodesByDistance { 410 // This is a very wasteful way to find the closest nodes but 411 // obviously correct. I believe that tree-based buckets would make 412 // this easier to implement efficiently. 413 close := &nodesByDistance{target: target} 414 for _, b := range tab.buckets { 415 for _, n := range b.entries { 416 close.push(n, nresults) 417 } 418 } 419 return close 420 } 421 422 func (tab *Table) len() (n int) { 423 for _, b := range tab.buckets { 424 n += len(b.entries) 425 } 426 return n 427 } 428 429 // bondall bonds with all given nodes concurrently and returns 430 // those nodes for which bonding has probably succeeded. 431 func (tab *Table) bondall(nodes []*Node) (result []*Node) { 432 rc := make(chan *Node, len(nodes)) 433 for i := range nodes { 434 go func(n *Node) { 435 nn, _ := tab.bond(false, n.ID, n.addr(), uint16(n.TCP)) 436 rc <- nn 437 }(nodes[i]) 438 } 439 for range nodes { 440 if n := <-rc; n != nil{ 441 result = append(result, n) 442 } 443 } 444 return result 445 } 446 447 // bond ensures the local node has a bond with the given remote node. 448 // It also attempts to insert the node into the table if bonding succeeds. 449 // The caller must not hold tab.mutex. 450 // 451 // A bond is must be established before sending findnode requests. 452 // Both sides must have completed a ping/pong exchange for a bond to 453 // exist. The total number of active bonding processes is limited in 454 // order to restrain network use. 455 // 456 // bond is meant to operate idempotently in that bonding with a remote 457 // node which still remembers a previously established bond will work. 458 // The remote node will simply not send a ping back, causing waitping 459 // to time out. 460 // 461 // If pinged is true, the remote node has just pinged us and one half 462 // of the process can be skipped. 463 func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) (*Node, error) { 464 if id == tab.self.ID { 465 return nil, errors.New("is self") 466 } 467 // Retrieve a previously known node and any recent findnode failures 468 node, fails := tab.db.node(id), 0 469 if node != nil { 470 fails = tab.db.findFails(id) 471 } 472 // If the node is unknown (non-bonded) or failed (remotely unknown), bond from scratch 473 var result error 474 age := time.Since(tab.db.lastPong(id)) 475 if node == nil || fails > 0 || age > nodeDBNodeExpiration { 476 log.Trace("Starting bonding ping/pong", "id", id, "known", node != nil, "failcount", fails, "age", age) 477 478 tab.bondmu.Lock() 479 w := tab.bonding[id] 480 if w != nil { 481 // Wait for an existing bonding process to complete. 482 tab.bondmu.Unlock() 483 <-w.done 484 } else { 485 // Register a new bonding process. 486 w = &bondproc{done: make(chan struct{})} 487 tab.bonding[id] = w 488 tab.bondmu.Unlock() 489 // Do the ping/pong. The result goes into w. 490 tab.pingpong(w, pinged, id, addr, tcpPort) 491 // Unregister the process after it's done. 492 tab.bondmu.Lock() 493 delete(tab.bonding, id) 494 tab.bondmu.Unlock() 495 } 496 // Retrieve the bonding results 497 result = w.err 498 if result == nil { 499 node = w.n 500 } 501 } 502 if node != nil { 503 // Add the node to the table even if the bonding ping/pong 504 // fails. It will be relaced quickly if it continues to be 505 // unresponsive. 506 tab.add(node) 507 tab.db.updateFindFails(id, 0) 508 } 509 return node, result 510 } 511 512 func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) { 513 // Request a bonding slot to limit network usage 514 <-tab.bondslots 515 defer func() { tab.bondslots <- struct{}{} }() 516 517 // Ping the remote side and wait for a pong. 518 if w.err = tab.ping(id, addr); w.err != nil { 519 close(w.done) 520 return 521 } 522 if !pinged { 523 // Give the remote node a chance to ping us before we start 524 // sending findnode requests. If they still remember us, 525 // waitping will simply time out. 526 tab.net.waitping(id) 527 } 528 // Bonding succeeded, update the node database. 529 w.n = NewNode(id, addr.IP, uint16(addr.Port), tcpPort) 530 tab.db.updateNode(w.n) 531 close(w.done) 532 } 533 534 // ping a remote endpoint and wait for a reply, also updating the node 535 // database accordingly. 536 func (tab *Table) ping(id NodeID, addr *net.UDPAddr) error { 537 tab.db.updateLastPing(id, time.Now()) 538 if err := tab.net.ping(id, addr); err != nil { 539 return err 540 } 541 tab.db.updateLastPong(id, time.Now()) 542 543 // Start the background expiration goroutine after the first 544 // successful communication. Subsequent calls have no effect if it 545 // is already running. We do this here instead of somewhere else 546 // so that the search for seed nodes also considers older nodes 547 // that would otherwise be removed by the expiration. 548 tab.db.ensureExpirer() 549 return nil 550 } 551 552 // add attempts to add the given node its corresponding bucket. If the 553 // bucket has space available, adding the node succeeds immediately. 554 // Otherwise, the node is added if the least recently active node in 555 // the bucket does not respond to a ping packet. 556 // 557 // The caller must not hold tab.mutex. 558 func (tab *Table) add(new *Node) { 559 b := tab.buckets[logdist(tab.self.sha, new.sha)] 560 tab.mutex.Lock() 561 defer tab.mutex.Unlock() 562 if b.bump(new) { 563 return 564 } 565 var oldest *Node 566 if len(b.entries) == bucketSize { 567 oldest = b.entries[bucketSize-1] 568 if oldest.contested { 569 // The node is already being replaced, don't attempt 570 // to replace it. 571 return 572 } 573 oldest.contested = true 574 // Let go of the mutex so other goroutines can access 575 // the table while we ping the least recently active node. 576 tab.mutex.Unlock() 577 err := tab.ping(oldest.ID, oldest.addr()) 578 tab.mutex.Lock() 579 oldest.contested = false 580 if err == nil { 581 // The node responded, don't replace it. 582 return 583 } 584 } 585 added := b.replace(new, oldest) 586 if added && tab.nodeAddedHook != nil { 587 tab.nodeAddedHook(new) 588 } 589 } 590 591 // stuff adds nodes the table to the end of their corresponding bucket 592 // if the bucket is not full. The caller must hold tab.mutex. 593 func (tab *Table) stuff(nodes []*Node) { 594 outer: 595 for _, n := range nodes { 596 if n.ID == tab.self.ID { 597 continue // don't add self 598 } 599 bucket := tab.buckets[logdist(tab.self.sha, n.sha)] 600 for i := range bucket.entries { 601 if bucket.entries[i].ID == n.ID { 602 continue outer // already in bucket 603 } 604 } 605 if len(bucket.entries) < bucketSize { 606 bucket.entries = append(bucket.entries, n) 607 if tab.nodeAddedHook != nil { 608 tab.nodeAddedHook(n) 609 } 610 } 611 } 612 } 613 614 // delete removes an entry from the node table (used to evacuate 615 // failed/non-bonded discovery peers). 616 func (tab *Table) delete(node *Node) { 617 tab.mutex.Lock() 618 defer tab.mutex.Unlock() 619 bucket := tab.buckets[logdist(tab.self.sha, node.sha)] 620 for i := range bucket.entries { 621 if bucket.entries[i].ID == node.ID { 622 bucket.entries = append(bucket.entries[:i], bucket.entries[i+1:]...) 623 return 624 } 625 } 626 } 627 628 func (b *bucket) replace(n *Node, last *Node) bool { 629 // Don't add if b already contains n. 630 for i := range b.entries { 631 if b.entries[i].ID == n.ID { 632 return false 633 } 634 } 635 // Replace last if it is still the last entry or just add n if b 636 // isn't full. If is no longer the last entry, it has either been 637 // replaced with someone else or became active. 638 if len(b.entries) == bucketSize && (last == nil || b.entries[bucketSize-1].ID != last.ID) { 639 return false 640 } 641 if len(b.entries) < bucketSize { 642 b.entries = append(b.entries, nil) 643 } 644 copy(b.entries[1:], b.entries) 645 b.entries[0] = n 646 return true 647 } 648 649 func (b *bucket) bump(n *Node) bool { 650 for i := range b.entries { 651 if b.entries[i].ID == n.ID { 652 // move it to the front 653 copy(b.entries[1:], b.entries[:i]) 654 b.entries[0] = n 655 return true 656 } 657 } 658 return false 659 } 660 661 // nodesByDistance is a list of nodes, ordered by 662 // distance to target. 663 type nodesByDistance struct { 664 entries []*Node 665 target common.Hash 666 } 667 668 // push adds the given node to the list, keeping the total size below maxElems. 669 func (h *nodesByDistance) push(n *Node, maxElems int) { 670 ix := sort.Search(len(h.entries), func(i int) bool { 671 return distcmp(h.target, h.entries[i].sha, n.sha) > 0 672 }) 673 if len(h.entries) < maxElems { 674 h.entries = append(h.entries, n) 675 } 676 if ix == len(h.entries) { 677 // farther away than all nodes we already have. 678 // if there was room for it, the node is now the last element. 679 } else { 680 // slide existing entries down to make room 681 // this will overwrite the entry we just appended. 682 copy(h.entries[ix+1:], h.entries[ix:]) 683 h.entries[ix] = n 684 } 685 }