github.com/jeffallen/go-ethereum@v1.1.4-0.20150910155051-571d3236c49c/p2p/discover/table.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package discover implements the Node Discovery Protocol. 18 // 19 // The Node Discovery protocol provides a way to find RLPx nodes that 20 // can be connected to. It uses a Kademlia-like protocol to maintain a 21 // distributed database of the IDs and endpoints of all listening 22 // nodes. 23 package discover 24 25 import ( 26 "crypto/rand" 27 "encoding/binary" 28 "net" 29 "sort" 30 "sync" 31 "time" 32 33 "github.com/ethereum/go-ethereum/common" 34 "github.com/ethereum/go-ethereum/crypto" 35 "github.com/ethereum/go-ethereum/logger" 36 "github.com/ethereum/go-ethereum/logger/glog" 37 ) 38 39 const ( 40 alpha = 3 // Kademlia concurrency factor 41 bucketSize = 16 // Kademlia bucket size 42 hashBits = len(common.Hash{}) * 8 43 nBuckets = hashBits + 1 // Number of buckets 44 45 maxBondingPingPongs = 16 46 maxFindnodeFailures = 5 47 ) 48 49 type Table struct { 50 mutex sync.Mutex // protects buckets, their content, and nursery 51 buckets [nBuckets]*bucket // index of known nodes by distance 52 nursery []*Node // bootstrap nodes 53 db *nodeDB // database of known nodes 54 55 bondmu sync.Mutex 56 bonding map[NodeID]*bondproc 57 bondslots chan struct{} // limits total number of active bonding processes 58 59 nodeAddedHook func(*Node) // for testing 60 61 net transport 62 self *Node // metadata of the local node 63 } 64 65 type bondproc struct { 66 err error 67 n *Node 68 done chan struct{} 69 } 70 71 // transport is implemented by the UDP transport. 72 // it is an interface so we can test without opening lots of UDP 73 // sockets and without generating a private key. 74 type transport interface { 75 ping(NodeID, *net.UDPAddr) error 76 waitping(NodeID) error 77 findnode(toid NodeID, addr *net.UDPAddr, target NodeID) ([]*Node, error) 78 close() 79 } 80 81 // bucket contains nodes, ordered by their last activity. the entry 82 // that was most recently active is the first element in entries. 83 type bucket struct { 84 lastLookup time.Time 85 entries []*Node 86 } 87 88 func newTable(t transport, ourID NodeID, ourAddr *net.UDPAddr, nodeDBPath string) *Table { 89 // If no node database was given, use an in-memory one 90 db, err := newNodeDB(nodeDBPath, Version, ourID) 91 if err != nil { 92 glog.V(logger.Warn).Infoln("Failed to open node database:", err) 93 db, _ = newNodeDB("", Version, ourID) 94 } 95 tab := &Table{ 96 net: t, 97 db: db, 98 self: newNode(ourID, ourAddr.IP, uint16(ourAddr.Port), uint16(ourAddr.Port)), 99 bonding: make(map[NodeID]*bondproc), 100 bondslots: make(chan struct{}, maxBondingPingPongs), 101 } 102 for i := 0; i < cap(tab.bondslots); i++ { 103 tab.bondslots <- struct{}{} 104 } 105 for i := range tab.buckets { 106 tab.buckets[i] = new(bucket) 107 } 108 return tab 109 } 110 111 // Self returns the local node. 112 // The returned node should not be modified by the caller. 113 func (tab *Table) Self() *Node { 114 return tab.self 115 } 116 117 // ReadRandomNodes fills the given slice with random nodes from the 118 // table. It will not write the same node more than once. The nodes in 119 // the slice are copies and can be modified by the caller. 120 func (tab *Table) ReadRandomNodes(buf []*Node) (n int) { 121 tab.mutex.Lock() 122 defer tab.mutex.Unlock() 123 // TODO: tree-based buckets would help here 124 // Find all non-empty buckets and get a fresh slice of their entries. 125 var buckets [][]*Node 126 for _, b := range tab.buckets { 127 if len(b.entries) > 0 { 128 buckets = append(buckets, b.entries[:]) 129 } 130 } 131 if len(buckets) == 0 { 132 return 0 133 } 134 // Shuffle the buckets. 135 for i := uint32(len(buckets)) - 1; i > 0; i-- { 136 j := randUint(i) 137 buckets[i], buckets[j] = buckets[j], buckets[i] 138 } 139 // Move head of each bucket into buf, removing buckets that become empty. 140 var i, j int 141 for ; i < len(buf); i, j = i+1, (j+1)%len(buckets) { 142 b := buckets[j] 143 buf[i] = &(*b[0]) 144 buckets[j] = b[1:] 145 if len(b) == 1 { 146 buckets = append(buckets[:j], buckets[j+1:]...) 147 } 148 if len(buckets) == 0 { 149 break 150 } 151 } 152 return i + 1 153 } 154 155 func randUint(max uint32) uint32 { 156 if max == 0 { 157 return 0 158 } 159 var b [4]byte 160 rand.Read(b[:]) 161 return binary.BigEndian.Uint32(b[:]) % max 162 } 163 164 // Close terminates the network listener and flushes the node database. 165 func (tab *Table) Close() { 166 if tab.net != nil { 167 tab.net.close() 168 } 169 tab.db.close() 170 } 171 172 // Bootstrap sets the bootstrap nodes. These nodes are used to connect 173 // to the network if the table is empty. Bootstrap will also attempt to 174 // fill the table by performing random lookup operations on the 175 // network. 176 func (tab *Table) Bootstrap(nodes []*Node) { 177 tab.mutex.Lock() 178 // TODO: maybe filter nodes with bad fields (nil, etc.) to avoid strange crashes 179 tab.nursery = make([]*Node, 0, len(nodes)) 180 for _, n := range nodes { 181 cpy := *n 182 cpy.sha = crypto.Sha3Hash(n.ID[:]) 183 tab.nursery = append(tab.nursery, &cpy) 184 } 185 tab.mutex.Unlock() 186 tab.refresh() 187 } 188 189 // Lookup performs a network search for nodes close 190 // to the given target. It approaches the target by querying 191 // nodes that are closer to it on each iteration. 192 // The given target does not need to be an actual node 193 // identifier. 194 func (tab *Table) Lookup(targetID NodeID) []*Node { 195 var ( 196 target = crypto.Sha3Hash(targetID[:]) 197 asked = make(map[NodeID]bool) 198 seen = make(map[NodeID]bool) 199 reply = make(chan []*Node, alpha) 200 pendingQueries = 0 201 ) 202 // don't query further if we hit ourself. 203 // unlikely to happen often in practice. 204 asked[tab.self.ID] = true 205 206 tab.mutex.Lock() 207 // update last lookup stamp (for refresh logic) 208 tab.buckets[logdist(tab.self.sha, target)].lastLookup = time.Now() 209 // generate initial result set 210 result := tab.closest(target, bucketSize) 211 tab.mutex.Unlock() 212 213 // If the result set is empty, all nodes were dropped, refresh 214 if len(result.entries) == 0 { 215 tab.refresh() 216 return nil 217 } 218 219 for { 220 // ask the alpha closest nodes that we haven't asked yet 221 for i := 0; i < len(result.entries) && pendingQueries < alpha; i++ { 222 n := result.entries[i] 223 if !asked[n.ID] { 224 asked[n.ID] = true 225 pendingQueries++ 226 go func() { 227 // Find potential neighbors to bond with 228 r, err := tab.net.findnode(n.ID, n.addr(), targetID) 229 if err != nil { 230 // Bump the failure counter to detect and evacuate non-bonded entries 231 fails := tab.db.findFails(n.ID) + 1 232 tab.db.updateFindFails(n.ID, fails) 233 glog.V(logger.Detail).Infof("Bumping failures for %x: %d", n.ID[:8], fails) 234 235 if fails >= maxFindnodeFailures { 236 glog.V(logger.Detail).Infof("Evacuating node %x: %d findnode failures", n.ID[:8], fails) 237 tab.delete(n) 238 } 239 } 240 reply <- tab.bondall(r) 241 }() 242 } 243 } 244 if pendingQueries == 0 { 245 // we have asked all closest nodes, stop the search 246 break 247 } 248 // wait for the next reply 249 for _, n := range <-reply { 250 if n != nil && !seen[n.ID] { 251 seen[n.ID] = true 252 result.push(n, bucketSize) 253 } 254 } 255 pendingQueries-- 256 } 257 return result.entries 258 } 259 260 // refresh performs a lookup for a random target to keep buckets full, or seeds 261 // the table if it is empty (initial bootstrap or discarded faulty peers). 262 func (tab *Table) refresh() { 263 seed := true 264 265 // If the discovery table is empty, seed with previously known nodes 266 tab.mutex.Lock() 267 for _, bucket := range tab.buckets { 268 if len(bucket.entries) > 0 { 269 seed = false 270 break 271 } 272 } 273 tab.mutex.Unlock() 274 275 // If the table is not empty, try to refresh using the live entries 276 if !seed { 277 // The Kademlia paper specifies that the bucket refresh should 278 // perform a refresh in the least recently used bucket. We cannot 279 // adhere to this because the findnode target is a 512bit value 280 // (not hash-sized) and it is not easily possible to generate a 281 // sha3 preimage that falls into a chosen bucket. 282 // 283 // We perform a lookup with a random target instead. 284 var target NodeID 285 rand.Read(target[:]) 286 287 result := tab.Lookup(target) 288 if len(result) == 0 { 289 // Lookup failed, seed after all 290 seed = true 291 } 292 } 293 294 if seed { 295 // Pick a batch of previously know seeds to lookup with 296 seeds := tab.db.querySeeds(10) 297 for _, seed := range seeds { 298 glog.V(logger.Debug).Infoln("Seeding network with", seed) 299 } 300 nodes := append(tab.nursery, seeds...) 301 302 // Bond with all the seed nodes (will pingpong only if failed recently) 303 bonded := tab.bondall(nodes) 304 if len(bonded) > 0 { 305 tab.Lookup(tab.self.ID) 306 } 307 // TODO: the Kademlia paper says that we're supposed to perform 308 // random lookups in all buckets further away than our closest neighbor. 309 } 310 } 311 312 // closest returns the n nodes in the table that are closest to the 313 // given id. The caller must hold tab.mutex. 314 func (tab *Table) closest(target common.Hash, nresults int) *nodesByDistance { 315 // This is a very wasteful way to find the closest nodes but 316 // obviously correct. I believe that tree-based buckets would make 317 // this easier to implement efficiently. 318 close := &nodesByDistance{target: target} 319 for _, b := range tab.buckets { 320 for _, n := range b.entries { 321 close.push(n, nresults) 322 } 323 } 324 return close 325 } 326 327 func (tab *Table) len() (n int) { 328 for _, b := range tab.buckets { 329 n += len(b.entries) 330 } 331 return n 332 } 333 334 // bondall bonds with all given nodes concurrently and returns 335 // those nodes for which bonding has probably succeeded. 336 func (tab *Table) bondall(nodes []*Node) (result []*Node) { 337 rc := make(chan *Node, len(nodes)) 338 for i := range nodes { 339 go func(n *Node) { 340 nn, _ := tab.bond(false, n.ID, n.addr(), uint16(n.TCP)) 341 rc <- nn 342 }(nodes[i]) 343 } 344 for _ = range nodes { 345 if n := <-rc; n != nil { 346 result = append(result, n) 347 } 348 } 349 return result 350 } 351 352 // bond ensures the local node has a bond with the given remote node. 353 // It also attempts to insert the node into the table if bonding succeeds. 354 // The caller must not hold tab.mutex. 355 // 356 // A bond is must be established before sending findnode requests. 357 // Both sides must have completed a ping/pong exchange for a bond to 358 // exist. The total number of active bonding processes is limited in 359 // order to restrain network use. 360 // 361 // bond is meant to operate idempotently in that bonding with a remote 362 // node which still remembers a previously established bond will work. 363 // The remote node will simply not send a ping back, causing waitping 364 // to time out. 365 // 366 // If pinged is true, the remote node has just pinged us and one half 367 // of the process can be skipped. 368 func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) (*Node, error) { 369 // Retrieve a previously known node and any recent findnode failures 370 node, fails := tab.db.node(id), 0 371 if node != nil { 372 fails = tab.db.findFails(id) 373 } 374 // If the node is unknown (non-bonded) or failed (remotely unknown), bond from scratch 375 var result error 376 if node == nil || fails > 0 { 377 glog.V(logger.Detail).Infof("Bonding %x: known=%v, fails=%v", id[:8], node != nil, fails) 378 379 tab.bondmu.Lock() 380 w := tab.bonding[id] 381 if w != nil { 382 // Wait for an existing bonding process to complete. 383 tab.bondmu.Unlock() 384 <-w.done 385 } else { 386 // Register a new bonding process. 387 w = &bondproc{done: make(chan struct{})} 388 tab.bonding[id] = w 389 tab.bondmu.Unlock() 390 // Do the ping/pong. The result goes into w. 391 tab.pingpong(w, pinged, id, addr, tcpPort) 392 // Unregister the process after it's done. 393 tab.bondmu.Lock() 394 delete(tab.bonding, id) 395 tab.bondmu.Unlock() 396 } 397 // Retrieve the bonding results 398 result = w.err 399 if result == nil { 400 node = w.n 401 } 402 } 403 if node != nil { 404 // Add the node to the table even if the bonding ping/pong 405 // fails. It will be relaced quickly if it continues to be 406 // unresponsive. 407 tab.add(node) 408 tab.db.updateFindFails(id, 0) 409 } 410 return node, result 411 } 412 413 func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) { 414 // Request a bonding slot to limit network usage 415 <-tab.bondslots 416 defer func() { tab.bondslots <- struct{}{} }() 417 418 // Ping the remote side and wait for a pong. 419 if w.err = tab.ping(id, addr); w.err != nil { 420 close(w.done) 421 return 422 } 423 if !pinged { 424 // Give the remote node a chance to ping us before we start 425 // sending findnode requests. If they still remember us, 426 // waitping will simply time out. 427 tab.net.waitping(id) 428 } 429 // Bonding succeeded, update the node database. 430 w.n = newNode(id, addr.IP, uint16(addr.Port), tcpPort) 431 tab.db.updateNode(w.n) 432 close(w.done) 433 } 434 435 // ping a remote endpoint and wait for a reply, also updating the node 436 // database accordingly. 437 func (tab *Table) ping(id NodeID, addr *net.UDPAddr) error { 438 // Update the last ping and send the message 439 tab.db.updateLastPing(id, time.Now()) 440 if err := tab.net.ping(id, addr); err != nil { 441 return err 442 } 443 // Pong received, update the database and return 444 tab.db.updateLastPong(id, time.Now()) 445 tab.db.ensureExpirer() 446 return nil 447 } 448 449 // add attempts to add the given node its corresponding bucket. If the 450 // bucket has space available, adding the node succeeds immediately. 451 // Otherwise, the node is added if the least recently active node in 452 // the bucket does not respond to a ping packet. 453 // 454 // The caller must not hold tab.mutex. 455 func (tab *Table) add(new *Node) { 456 b := tab.buckets[logdist(tab.self.sha, new.sha)] 457 tab.mutex.Lock() 458 defer tab.mutex.Unlock() 459 if b.bump(new) { 460 return 461 } 462 var oldest *Node 463 if len(b.entries) == bucketSize { 464 oldest = b.entries[bucketSize-1] 465 if oldest.contested { 466 // The node is already being replaced, don't attempt 467 // to replace it. 468 return 469 } 470 oldest.contested = true 471 // Let go of the mutex so other goroutines can access 472 // the table while we ping the least recently active node. 473 tab.mutex.Unlock() 474 err := tab.ping(oldest.ID, oldest.addr()) 475 tab.mutex.Lock() 476 oldest.contested = false 477 if err == nil { 478 // The node responded, don't replace it. 479 return 480 } 481 } 482 added := b.replace(new, oldest) 483 if added && tab.nodeAddedHook != nil { 484 tab.nodeAddedHook(new) 485 } 486 } 487 488 // stuff adds nodes the table to the end of their corresponding bucket 489 // if the bucket is not full. The caller must hold tab.mutex. 490 func (tab *Table) stuff(nodes []*Node) { 491 outer: 492 for _, n := range nodes { 493 if n.ID == tab.self.ID { 494 continue // don't add self 495 } 496 bucket := tab.buckets[logdist(tab.self.sha, n.sha)] 497 for i := range bucket.entries { 498 if bucket.entries[i].ID == n.ID { 499 continue outer // already in bucket 500 } 501 } 502 if len(bucket.entries) < bucketSize { 503 bucket.entries = append(bucket.entries, n) 504 if tab.nodeAddedHook != nil { 505 tab.nodeAddedHook(n) 506 } 507 } 508 } 509 } 510 511 // delete removes an entry from the node table (used to evacuate 512 // failed/non-bonded discovery peers). 513 func (tab *Table) delete(node *Node) { 514 tab.mutex.Lock() 515 defer tab.mutex.Unlock() 516 bucket := tab.buckets[logdist(tab.self.sha, node.sha)] 517 for i := range bucket.entries { 518 if bucket.entries[i].ID == node.ID { 519 bucket.entries = append(bucket.entries[:i], bucket.entries[i+1:]...) 520 return 521 } 522 } 523 } 524 525 func (b *bucket) replace(n *Node, last *Node) bool { 526 // Don't add if b already contains n. 527 for i := range b.entries { 528 if b.entries[i].ID == n.ID { 529 return false 530 } 531 } 532 // Replace last if it is still the last entry or just add n if b 533 // isn't full. If is no longer the last entry, it has either been 534 // replaced with someone else or became active. 535 if len(b.entries) == bucketSize && (last == nil || b.entries[bucketSize-1].ID != last.ID) { 536 return false 537 } 538 if len(b.entries) < bucketSize { 539 b.entries = append(b.entries, nil) 540 } 541 copy(b.entries[1:], b.entries) 542 b.entries[0] = n 543 return true 544 } 545 546 func (b *bucket) bump(n *Node) bool { 547 for i := range b.entries { 548 if b.entries[i].ID == n.ID { 549 // move it to the front 550 copy(b.entries[1:], b.entries[:i]) 551 b.entries[0] = n 552 return true 553 } 554 } 555 return false 556 } 557 558 // nodesByDistance is a list of nodes, ordered by 559 // distance to target. 560 type nodesByDistance struct { 561 entries []*Node 562 target common.Hash 563 } 564 565 // push adds the given node to the list, keeping the total size below maxElems. 566 func (h *nodesByDistance) push(n *Node, maxElems int) { 567 ix := sort.Search(len(h.entries), func(i int) bool { 568 return distcmp(h.target, h.entries[i].sha, n.sha) > 0 569 }) 570 if len(h.entries) < maxElems { 571 h.entries = append(h.entries, n) 572 } 573 if ix == len(h.entries) { 574 // farther away than all nodes we already have. 575 // if there was room for it, the node is now the last element. 576 } else { 577 // slide existing entries down to make room 578 // this will overwrite the entry we just appended. 579 copy(h.entries[ix+1:], h.entries[ix:]) 580 h.entries[ix] = n 581 } 582 }