github.com/bcskill/bcschain/v3@v3.4.9-beta2/p2p/discv5/net.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package discv5 18 19 import ( 20 "bytes" 21 "crypto/ecdsa" 22 "errors" 23 "fmt" 24 "net" 25 "time" 26 27 "golang.org/x/crypto/sha3" 28 29 "github.com/bcskill/bcschain/v3/common" 30 "github.com/bcskill/bcschain/v3/common/mclock" 31 "github.com/bcskill/bcschain/v3/crypto" 32 "github.com/bcskill/bcschain/v3/log" 33 "github.com/bcskill/bcschain/v3/p2p/netutil" 34 "github.com/bcskill/bcschain/v3/rlp" 35 ) 36 37 var ( 38 errInvalidEvent = errors.New("invalid in current state") 39 errNoQuery = errors.New("no pending query") 40 errWrongAddress = errors.New("unknown sender address") 41 ) 42 43 const ( 44 autoRefreshInterval = 1 * time.Hour 45 bucketRefreshInterval = 1 * time.Minute 46 seedCount = 30 47 seedMaxAge = 5 * 24 * time.Hour 48 lowPort = 1024 49 ) 50 51 const testTopic = "foo" 52 53 const ( 54 printTestImgLogs = false 55 ) 56 57 // Network manages the table and all protocol interaction. 58 type Network struct { 59 db *nodeDB // database of known nodes 60 conn transport 61 netrestrict *netutil.Netlist 62 63 closed chan struct{} // closed when loop is done 64 closeReq chan struct{} // 'request to close' 65 refreshReq chan []*Node // lookups ask for refresh on this channel 66 refreshResp chan (<-chan struct{}) // ...and get the channel to block on from this one 67 read chan ingressPacket // ingress packets arrive here 68 timeout chan timeoutEvent 69 queryReq chan *findnodeQuery // lookups submit findnode queries on this channel 70 tableOpReq chan func() 71 tableOpResp chan struct{} 72 topicRegisterReq chan topicRegisterReq 73 topicSearchReq chan topicSearchReq 74 75 // State of the main loop. 76 tab *Table 77 topictab *topicTable 78 ticketStore *ticketStore 79 nursery []*Node 80 nodes map[NodeID]*Node // tracks active nodes with state != known 81 timeoutTimers map[timeoutEvent]*time.Timer 82 83 // Revalidation queues. 84 // Nodes put on these queues will be pinged eventually. 85 slowRevalidateQueue []*Node 86 fastRevalidateQueue []*Node 87 88 // Buffers for state transition. 89 sendBuf []*ingressPacket 90 } 91 92 // transport is implemented by the UDP transport. 93 // it is an interface so we can test without opening lots of UDP 94 // sockets and without generating a private key. 95 type transport interface { 96 sendPing(remote *Node, remoteAddr *net.UDPAddr, topics []Topic) (hash []byte) 97 sendNeighbours(remote *Node, nodes []*Node) 98 sendFindnodeHash(remote *Node, target common.Hash) 99 sendTopicRegister(remote *Node, topics []Topic, topicIdx int, pong []byte) 100 sendTopicNodes(remote *Node, queryHash common.Hash, nodes []*Node) 101 102 send(remote *Node, ptype nodeEvent, p interface{}) (hash []byte) 103 104 localAddr() *net.UDPAddr 105 Close() 106 } 107 108 type findnodeQuery struct { 109 remote *Node 110 target common.Hash 111 reply chan<- []*Node 112 nresults int // counter for received nodes 113 } 114 115 type topicRegisterReq struct { 116 add bool 117 topic Topic 118 } 119 120 type topicSearchReq struct { 121 topic Topic 122 found chan<- *Node 123 lookup chan<- bool 124 delay time.Duration 125 } 126 127 type topicSearchResult struct { 128 target lookupInfo 129 nodes []*Node 130 } 131 132 type timeoutEvent struct { 133 ev nodeEvent 134 node *Node 135 } 136 137 func newNetwork(conn transport, ourPubkey ecdsa.PublicKey, dbPath string, netrestrict *netutil.Netlist) (*Network, error) { 138 ourID := PubkeyID(&ourPubkey) 139 140 var db *nodeDB 141 if dbPath != "<no database>" { 142 var err error 143 if db, err = newNodeDB(dbPath, Version, ourID); err != nil { 144 return nil, err 145 } 146 } 147 148 tab := newTable(ourID, conn.localAddr()) 149 net := &Network{ 150 db: db, 151 conn: conn, 152 netrestrict: netrestrict, 153 tab: tab, 154 topictab: newTopicTable(db, tab.self), 155 ticketStore: newTicketStore(), 156 refreshReq: make(chan []*Node), 157 refreshResp: make(chan (<-chan struct{})), 158 closed: make(chan struct{}), 159 closeReq: make(chan struct{}), 160 read: make(chan ingressPacket, 100), 161 timeout: make(chan timeoutEvent), 162 timeoutTimers: make(map[timeoutEvent]*time.Timer), 163 tableOpReq: make(chan func()), 164 tableOpResp: make(chan struct{}), 165 queryReq: make(chan *findnodeQuery), 166 topicRegisterReq: make(chan topicRegisterReq), 167 topicSearchReq: make(chan topicSearchReq), 168 nodes: make(map[NodeID]*Node), 169 } 170 go net.loop() 171 return net, nil 172 } 173 174 // Close terminates the network listener and flushes the node database. 175 func (net *Network) Close() { 176 net.conn.Close() 177 select { 178 case <-net.closed: 179 case net.closeReq <- struct{}{}: 180 <-net.closed 181 } 182 } 183 184 // Self returns the local node. 185 // The returned node should not be modified by the caller. 186 func (net *Network) Self() *Node { 187 return net.tab.self 188 } 189 190 // ReadRandomNodes fills the given slice with random nodes from the 191 // table. It will not write the same node more than once. The nodes in 192 // the slice are copies and can be modified by the caller. 193 func (net *Network) ReadRandomNodes(buf []*Node) (n int) { 194 net.reqTableOp(func() { n = net.tab.readRandomNodes(buf) }) 195 return n 196 } 197 198 // SetFallbackNodes sets the initial points of contact. These nodes 199 // are used to connect to the network if the table is empty and there 200 // are no known nodes in the database. 201 func (net *Network) SetFallbackNodes(nodes []*Node) error { 202 nursery := make([]*Node, 0, len(nodes)) 203 for _, n := range nodes { 204 if err := n.validateComplete(); err != nil { 205 return fmt.Errorf("bad bootstrap/fallback node %q (%v)", n, err) 206 } 207 // Recompute cpy.sha because the node might not have been 208 // created by NewNode or ParseNode. 209 cpy := *n 210 cpy.sha = crypto.Keccak256Hash(n.ID[:]) 211 nursery = append(nursery, &cpy) 212 } 213 net.reqRefresh(nursery) 214 return nil 215 } 216 217 // Resolve searches for a specific node with the given ID. 218 // It returns nil if the node could not be found. 219 func (net *Network) Resolve(targetID NodeID) *Node { 220 result := net.lookup(crypto.Keccak256Hash(targetID[:]), true) 221 for _, n := range result { 222 if n.ID == targetID { 223 return n 224 } 225 } 226 return nil 227 } 228 229 // Lookup performs a network search for nodes close 230 // to the given target. It approaches the target by querying 231 // nodes that are closer to it on each iteration. 232 // The given target does not need to be an actual node 233 // identifier. 234 // 235 // The local node may be included in the result. 236 func (net *Network) Lookup(targetID NodeID) []*Node { 237 return net.lookup(crypto.Keccak256Hash(targetID[:]), false) 238 } 239 240 func (net *Network) lookup(target common.Hash, stopOnMatch bool) []*Node { 241 var ( 242 asked = make(map[NodeID]bool) 243 seen = make(map[NodeID]bool) 244 reply = make(chan []*Node, alpha) 245 result = nodesByDistance{target: target} 246 pendingQueries = 0 247 ) 248 // Get initial answers from the local node. 249 result.push(net.tab.self, bucketSize) 250 for { 251 // Ask the α closest nodes that we haven't asked yet. 252 for i := 0; i < len(result.entries) && pendingQueries < alpha; i++ { 253 n := result.entries[i] 254 if !asked[n.ID] { 255 asked[n.ID] = true 256 pendingQueries++ 257 net.reqQueryFindnode(n, target, reply) 258 } 259 } 260 if pendingQueries == 0 { 261 // We have asked all closest nodes, stop the search. 262 break 263 } 264 // Wait for the next reply. 265 select { 266 case nodes := <-reply: 267 for _, n := range nodes { 268 if n != nil && !seen[n.ID] { 269 seen[n.ID] = true 270 result.push(n, bucketSize) 271 if stopOnMatch && n.sha == target { 272 return result.entries 273 } 274 } 275 } 276 pendingQueries-- 277 case <-time.After(respTimeout): 278 // forget all pending requests, start new ones 279 pendingQueries = 0 280 reply = make(chan []*Node, alpha) 281 } 282 } 283 return result.entries 284 } 285 286 func (net *Network) RegisterTopic(topic Topic, stop <-chan struct{}) { 287 select { 288 case net.topicRegisterReq <- topicRegisterReq{true, topic}: 289 case <-net.closed: 290 return 291 } 292 select { 293 case <-net.closed: 294 case <-stop: 295 select { 296 case net.topicRegisterReq <- topicRegisterReq{false, topic}: 297 case <-net.closed: 298 } 299 } 300 } 301 302 func (net *Network) SearchTopic(topic Topic, setPeriod <-chan time.Duration, found chan<- *Node, lookup chan<- bool) { 303 for { 304 select { 305 case <-net.closed: 306 return 307 case delay, ok := <-setPeriod: 308 select { 309 case net.topicSearchReq <- topicSearchReq{topic: topic, found: found, lookup: lookup, delay: delay}: 310 case <-net.closed: 311 return 312 } 313 if !ok { 314 return 315 } 316 } 317 } 318 } 319 320 func (net *Network) reqRefresh(nursery []*Node) <-chan struct{} { 321 select { 322 case net.refreshReq <- nursery: 323 return <-net.refreshResp 324 case <-net.closed: 325 return net.closed 326 } 327 } 328 329 func (net *Network) reqQueryFindnode(n *Node, target common.Hash, reply chan []*Node) bool { 330 q := &findnodeQuery{remote: n, target: target, reply: reply} 331 select { 332 case net.queryReq <- q: 333 return true 334 case <-net.closed: 335 return false 336 } 337 } 338 339 func (net *Network) reqReadPacket(pkt ingressPacket) { 340 select { 341 case net.read <- pkt: 342 case <-net.closed: 343 } 344 } 345 346 func (net *Network) reqTableOp(f func()) (called bool) { 347 select { 348 case net.tableOpReq <- f: 349 <-net.tableOpResp 350 return true 351 case <-net.closed: 352 return false 353 } 354 } 355 356 // TODO: external address handling. 357 358 type topicSearchInfo struct { 359 lookupChn chan<- bool 360 period time.Duration 361 } 362 363 const maxSearchCount = 5 364 365 func (net *Network) loop() { 366 var ( 367 refreshTimer = time.NewTicker(autoRefreshInterval) 368 bucketRefreshTimer = time.NewTimer(bucketRefreshInterval) 369 refreshDone chan struct{} // closed when the 'refresh' lookup has ended 370 ) 371 372 // Tracking the next ticket to register. 373 var ( 374 nextTicket *ticketRef 375 nextRegisterTimer *time.Timer 376 nextRegisterTime <-chan time.Time 377 ) 378 defer func() { 379 if nextRegisterTimer != nil { 380 nextRegisterTimer.Stop() 381 } 382 }() 383 resetNextTicket := func() { 384 ticket, timeout := net.ticketStore.nextFilteredTicket() 385 if nextTicket != ticket { 386 nextTicket = ticket 387 if nextRegisterTimer != nil { 388 nextRegisterTimer.Stop() 389 nextRegisterTime = nil 390 } 391 if ticket != nil { 392 nextRegisterTimer = time.NewTimer(timeout) 393 nextRegisterTime = nextRegisterTimer.C 394 } 395 } 396 } 397 398 // Tracking registration and search lookups. 399 var ( 400 topicRegisterLookupTarget lookupInfo 401 topicRegisterLookupDone chan []*Node 402 topicRegisterLookupTick = time.NewTimer(0) 403 searchReqWhenRefreshDone []topicSearchReq 404 searchInfo = make(map[Topic]topicSearchInfo) 405 activeSearchCount int 406 ) 407 topicSearchLookupDone := make(chan topicSearchResult, 100) 408 topicSearch := make(chan Topic, 100) 409 <-topicRegisterLookupTick.C 410 411 statsDump := time.NewTicker(10 * time.Second) 412 413 loop: 414 for { 415 resetNextTicket() 416 417 select { 418 case <-net.closeReq: 419 log.Trace("<-net.closeReq") 420 break loop 421 422 // Ingress packet handling. 423 case pkt := <-net.read: 424 //fmt.Println("read", pkt.ev) 425 log.Trace("<-net.read") 426 n := net.internNode(&pkt) 427 prestate := n.state 428 status := "ok" 429 if err := net.handle(n, pkt.ev, &pkt); err != nil { 430 status = err.Error() 431 } 432 if log.Tracing() { 433 log.Trace("", "msg", log.Lazy{Fn: func() string { 434 return fmt.Sprintf("<<< (%d) %v from %x@%v: %v -> %v (%v)", 435 net.tab.count, pkt.ev, pkt.remoteID[:8], pkt.remoteAddr, prestate, n.state, status) 436 }}) 437 } 438 // TODO: persist state if n.state goes >= known, delete if it goes <= known 439 440 // State transition timeouts. 441 case timeout := <-net.timeout: 442 log.Trace("<-net.timeout") 443 if net.timeoutTimers[timeout] == nil { 444 // Stale timer (was aborted). 445 continue 446 } 447 delete(net.timeoutTimers, timeout) 448 prestate := timeout.node.state 449 status := "ok" 450 if err := net.handle(timeout.node, timeout.ev, nil); err != nil { 451 status = err.Error() 452 } 453 log.Trace("", "msg", log.Lazy{Fn: func() string { 454 return fmt.Sprintf("--- (%d) %v for %x@%v: %v -> %v (%v)", 455 net.tab.count, timeout.ev, timeout.node.ID[:8], timeout.node.addr(), prestate, timeout.node.state, status) 456 }}) 457 458 // Querying. 459 case q := <-net.queryReq: 460 log.Trace("<-net.queryReq") 461 if !q.start(net) { 462 q.remote.deferQuery(q) 463 } 464 465 // Interacting with the table. 466 case f := <-net.tableOpReq: 467 log.Trace("<-net.tableOpReq") 468 f() 469 net.tableOpResp <- struct{}{} 470 471 // Topic registration stuff. 472 case req := <-net.topicRegisterReq: 473 log.Trace("<-net.topicRegisterReq") 474 if !req.add { 475 net.ticketStore.removeRegisterTopic(req.topic) 476 continue 477 } 478 net.ticketStore.addTopic(req.topic, true) 479 // If we're currently waiting idle (nothing to look up), give the ticket store a 480 // chance to start it sooner. This should speed up convergence of the radius 481 // determination for new topics. 482 // if topicRegisterLookupDone == nil { 483 if topicRegisterLookupTarget.target == (common.Hash{}) { 484 log.Trace("topicRegisterLookupTarget == null") 485 if topicRegisterLookupTick.Stop() { 486 <-topicRegisterLookupTick.C 487 } 488 target, delay := net.ticketStore.nextRegisterLookup() 489 topicRegisterLookupTarget = target 490 topicRegisterLookupTick.Reset(delay) 491 } 492 493 case nodes := <-topicRegisterLookupDone: 494 log.Trace("<-topicRegisterLookupDone") 495 net.ticketStore.registerLookupDone(topicRegisterLookupTarget, nodes, func(n *Node) []byte { 496 net.ping(n, n.addr()) 497 return n.pingEcho 498 }) 499 target, delay := net.ticketStore.nextRegisterLookup() 500 topicRegisterLookupTarget = target 501 topicRegisterLookupTick.Reset(delay) 502 topicRegisterLookupDone = nil 503 504 case <-topicRegisterLookupTick.C: 505 log.Trace("<-topicRegisterLookupTick") 506 if (topicRegisterLookupTarget.target == common.Hash{}) { 507 target, delay := net.ticketStore.nextRegisterLookup() 508 topicRegisterLookupTarget = target 509 topicRegisterLookupTick.Reset(delay) 510 topicRegisterLookupDone = nil 511 } else { 512 topicRegisterLookupDone = make(chan []*Node) 513 target := topicRegisterLookupTarget.target 514 go func() { topicRegisterLookupDone <- net.lookup(target, false) }() 515 } 516 517 case <-nextRegisterTime: 518 log.Trace("<-nextRegisterTime") 519 net.ticketStore.ticketRegistered(*nextTicket) 520 //fmt.Println("sendTopicRegister", nextTicket.t.node.addr().String(), nextTicket.t.topics, nextTicket.idx, nextTicket.t.pong) 521 net.conn.sendTopicRegister(nextTicket.t.node, nextTicket.t.topics, nextTicket.idx, nextTicket.t.pong) 522 523 case req := <-net.topicSearchReq: 524 if refreshDone == nil { 525 log.Trace("<-net.topicSearchReq") 526 info, ok := searchInfo[req.topic] 527 if ok { 528 if req.delay == time.Duration(0) { 529 delete(searchInfo, req.topic) 530 net.ticketStore.removeSearchTopic(req.topic) 531 } else { 532 info.period = req.delay 533 searchInfo[req.topic] = info 534 } 535 continue 536 } 537 if req.delay != time.Duration(0) { 538 var info topicSearchInfo 539 info.period = req.delay 540 info.lookupChn = req.lookup 541 searchInfo[req.topic] = info 542 net.ticketStore.addSearchTopic(req.topic, req.found) 543 topicSearch <- req.topic 544 } 545 } else { 546 searchReqWhenRefreshDone = append(searchReqWhenRefreshDone, req) 547 } 548 549 case topic := <-topicSearch: 550 if activeSearchCount < maxSearchCount { 551 activeSearchCount++ 552 target := net.ticketStore.nextSearchLookup(topic) 553 go func() { 554 nodes := net.lookup(target.target, false) 555 topicSearchLookupDone <- topicSearchResult{target: target, nodes: nodes} 556 }() 557 } 558 period := searchInfo[topic].period 559 if period != time.Duration(0) { 560 go func() { 561 time.Sleep(period) 562 topicSearch <- topic 563 }() 564 } 565 566 case res := <-topicSearchLookupDone: 567 activeSearchCount-- 568 if lookupChn := searchInfo[res.target.topic].lookupChn; lookupChn != nil { 569 lookupChn <- net.ticketStore.radius[res.target.topic].converged 570 } 571 net.ticketStore.searchLookupDone(res.target, res.nodes, func(n *Node, topic Topic) []byte { 572 if n.state != nil && n.state.canQuery { 573 return net.conn.send(n, topicQueryPacket, topicQuery{Topic: topic}) // TODO: set expiration 574 } else { 575 if n.state == unknown { 576 net.ping(n, n.addr()) 577 } 578 return nil 579 } 580 }) 581 582 case <-statsDump.C: 583 log.Trace("<-statsDump.C") 584 /*r, ok := net.ticketStore.radius[testTopic] 585 if !ok { 586 fmt.Printf("(%x) no radius @ %v\n", net.tab.self.ID[:8], time.Now()) 587 } else { 588 topics := len(net.ticketStore.tickets) 589 tickets := len(net.ticketStore.nodes) 590 rad := r.radius / (maxRadius/10000+1) 591 fmt.Printf("(%x) topics:%d radius:%d tickets:%d @ %v\n", net.tab.self.ID[:8], topics, rad, tickets, time.Now()) 592 }*/ 593 594 tm := mclock.Now() 595 for topic, r := range net.ticketStore.radius { 596 if printTestImgLogs { 597 rad := r.radius / (maxRadius/1000000 + 1) 598 minrad := r.minRadius / (maxRadius/1000000 + 1) 599 fmt.Printf("*R %d %v %016x %v\n", tm/1000000, topic, net.tab.self.sha[:8], rad) 600 fmt.Printf("*MR %d %v %016x %v\n", tm/1000000, topic, net.tab.self.sha[:8], minrad) 601 } 602 } 603 for topic, t := range net.topictab.topics { 604 wp := t.wcl.nextWaitPeriod(tm) 605 if printTestImgLogs { 606 fmt.Printf("*W %d %v %016x %d\n", tm/1000000, topic, net.tab.self.sha[:8], wp/1000000) 607 } 608 } 609 610 // Periodic / lookup-initiated bucket refresh. 611 case <-refreshTimer.C: 612 log.Trace("<-refreshTimer.C") 613 // TODO: ideally we would start the refresh timer after 614 // fallback nodes have been set for the first time. 615 if refreshDone == nil { 616 refreshDone = make(chan struct{}) 617 net.refresh(refreshDone) 618 } 619 case <-bucketRefreshTimer.C: 620 target := net.tab.chooseBucketRefreshTarget() 621 go func() { 622 net.lookup(target, false) 623 bucketRefreshTimer.Reset(bucketRefreshInterval) 624 }() 625 case newNursery := <-net.refreshReq: 626 log.Trace("<-net.refreshReq") 627 if newNursery != nil { 628 net.nursery = newNursery 629 } 630 if refreshDone == nil { 631 refreshDone = make(chan struct{}) 632 net.refresh(refreshDone) 633 } 634 net.refreshResp <- refreshDone 635 case <-refreshDone: 636 log.Trace("<-net.refreshDone", "table size", net.tab.count) 637 if net.tab.count != 0 { 638 refreshDone = nil 639 list := searchReqWhenRefreshDone 640 searchReqWhenRefreshDone = nil 641 go func() { 642 for _, req := range list { 643 net.topicSearchReq <- req 644 } 645 }() 646 } else { 647 refreshDone = make(chan struct{}) 648 net.refresh(refreshDone) 649 } 650 } 651 } 652 log.Trace("loop stopped") 653 654 log.Debug(fmt.Sprintf("shutting down")) 655 if net.conn != nil { 656 net.conn.Close() 657 } 658 if refreshDone != nil { 659 // TODO: wait for pending refresh. 660 //<-refreshResults 661 } 662 // Cancel all pending timeouts. 663 for _, timer := range net.timeoutTimers { 664 timer.Stop() 665 } 666 if net.db != nil { 667 net.db.close() 668 } 669 close(net.closed) 670 } 671 672 // Everything below runs on the Network.loop goroutine 673 // and can modify Node, Table and Network at any time without locking. 674 675 func (net *Network) refresh(done chan<- struct{}) { 676 var seeds []*Node 677 if net.db != nil { 678 seeds = net.db.querySeeds(seedCount, seedMaxAge) 679 } 680 if len(seeds) == 0 { 681 seeds = net.nursery 682 } 683 if len(seeds) == 0 { 684 log.Trace("no seed nodes found") 685 close(done) 686 return 687 } 688 for _, n := range seeds { 689 log.Debug("", "msg", log.Lazy{Fn: func() string { 690 var age string 691 if net.db != nil { 692 age = time.Since(net.db.lastPong(n.ID)).String() 693 } else { 694 age = "unknown" 695 } 696 return fmt.Sprintf("seed node (age %s): %v", age, n) 697 }}) 698 n = net.internNodeFromDB(n) 699 if n.state == unknown { 700 net.transition(n, verifyinit) 701 } 702 // Force-add the seed node so Lookup does something. 703 // It will be deleted again if verification fails. 704 net.tab.add(n) 705 } 706 // Start self lookup to fill up the buckets. 707 go func() { 708 net.Lookup(net.tab.self.ID) 709 close(done) 710 }() 711 } 712 713 // Node Interning. 714 715 func (net *Network) internNode(pkt *ingressPacket) *Node { 716 if n := net.nodes[pkt.remoteID]; n != nil { 717 n.IP = pkt.remoteAddr.IP 718 n.UDP = uint16(pkt.remoteAddr.Port) 719 n.TCP = uint16(pkt.remoteAddr.Port) 720 return n 721 } 722 n := NewNode(pkt.remoteID, pkt.remoteAddr.IP, uint16(pkt.remoteAddr.Port), uint16(pkt.remoteAddr.Port)) 723 n.state = unknown 724 net.nodes[pkt.remoteID] = n 725 return n 726 } 727 728 func (net *Network) internNodeFromDB(dbn *Node) *Node { 729 if n := net.nodes[dbn.ID]; n != nil { 730 return n 731 } 732 n := NewNode(dbn.ID, dbn.IP, dbn.UDP, dbn.TCP) 733 n.state = unknown 734 net.nodes[n.ID] = n 735 return n 736 } 737 738 func (net *Network) internNodeFromNeighbours(sender *net.UDPAddr, rn rpcNode) (n *Node, err error) { 739 if rn.ID == net.tab.self.ID { 740 return nil, errors.New("is self") 741 } 742 if rn.UDP <= lowPort { 743 return nil, errors.New("low port") 744 } 745 n = net.nodes[rn.ID] 746 if n == nil { 747 // We haven't seen this node before. 748 n, err = nodeFromRPC(sender, rn) 749 if net.netrestrict != nil && !net.netrestrict.Contains(n.IP) { 750 return n, errors.New("not contained in netrestrict whitelist") 751 } 752 if err == nil { 753 n.state = unknown 754 net.nodes[n.ID] = n 755 } 756 return n, err 757 } 758 if !n.IP.Equal(rn.IP) || n.UDP != rn.UDP || n.TCP != rn.TCP { 759 if n.state == known { 760 // reject address change if node is known by us 761 err = fmt.Errorf("metadata mismatch: got %v, want %v", rn, n) 762 } else { 763 // accept otherwise; this will be handled nicer with signed ENRs 764 n.IP = rn.IP 765 n.UDP = rn.UDP 766 n.TCP = rn.TCP 767 } 768 } 769 return n, err 770 } 771 772 // nodeNetGuts is embedded in Node and contains fields. 773 type nodeNetGuts struct { 774 // This is a cached copy of sha3(ID) which is used for node 775 // distance calculations. This is part of Node in order to make it 776 // possible to write tests that need a node at a certain distance. 777 // In those tests, the content of sha will not actually correspond 778 // with ID. 779 sha common.Hash 780 781 // State machine fields. Access to these fields 782 // is restricted to the Network.loop goroutine. 783 state *nodeState 784 pingEcho []byte // hash of last ping sent by us 785 pingTopics []Topic // topic set sent by us in last ping 786 deferredQueries []*findnodeQuery // queries that can't be sent yet 787 pendingNeighbours *findnodeQuery // current query, waiting for reply 788 queryTimeouts int 789 } 790 791 func (n *nodeNetGuts) deferQuery(q *findnodeQuery) { 792 n.deferredQueries = append(n.deferredQueries, q) 793 } 794 795 func (n *nodeNetGuts) startNextQuery(net *Network) { 796 if len(n.deferredQueries) == 0 { 797 return 798 } 799 nextq := n.deferredQueries[0] 800 if nextq.start(net) { 801 n.deferredQueries = append(n.deferredQueries[:0], n.deferredQueries[1:]...) 802 } 803 } 804 805 func (q *findnodeQuery) start(net *Network) bool { 806 // Satisfy queries against the local node directly. 807 if q.remote == net.tab.self { 808 closest := net.tab.closest(crypto.Keccak256Hash(q.target[:]), bucketSize) 809 q.reply <- closest.entries 810 return true 811 } 812 if q.remote.state.canQuery && q.remote.pendingNeighbours == nil { 813 net.conn.sendFindnodeHash(q.remote, q.target) 814 net.timedEvent(respTimeout, q.remote, neighboursTimeout) 815 q.remote.pendingNeighbours = q 816 return true 817 } 818 // If the node is not known yet, it won't accept queries. 819 // Initiate the transition to known. 820 // The request will be sent later when the node reaches known state. 821 if q.remote.state == unknown { 822 net.transition(q.remote, verifyinit) 823 } 824 return false 825 } 826 827 // Node Events (the input to the state machine). 828 829 type nodeEvent uint 830 831 //go:generate stringer -type=nodeEvent 832 833 const ( 834 invalidEvent nodeEvent = iota // zero is reserved 835 836 // Packet type events. 837 // These correspond to packet types in the UDP protocol. 838 pingPacket 839 pongPacket 840 findnodePacket 841 neighborsPacket 842 findnodeHashPacket 843 topicRegisterPacket 844 topicQueryPacket 845 topicNodesPacket 846 847 // Non-packet events. 848 // Event values in this category are allocated outside 849 // the packet type range (packet types are encoded as a single byte). 850 pongTimeout nodeEvent = iota + 256 851 pingTimeout 852 neighboursTimeout 853 ) 854 855 // Node State Machine. 856 857 type nodeState struct { 858 name string 859 handle func(*Network, *Node, nodeEvent, *ingressPacket) (next *nodeState, err error) 860 enter func(*Network, *Node) 861 canQuery bool 862 } 863 864 func (s *nodeState) String() string { 865 return s.name 866 } 867 868 var ( 869 unknown *nodeState 870 verifyinit *nodeState 871 verifywait *nodeState 872 remoteverifywait *nodeState 873 known *nodeState 874 contested *nodeState 875 unresponsive *nodeState 876 ) 877 878 func init() { 879 unknown = &nodeState{ 880 name: "unknown", 881 enter: func(net *Network, n *Node) { 882 net.tab.delete(n) 883 n.pingEcho = nil 884 // Abort active queries. 885 for _, q := range n.deferredQueries { 886 q.reply <- nil 887 } 888 n.deferredQueries = nil 889 if n.pendingNeighbours != nil { 890 n.pendingNeighbours.reply <- nil 891 n.pendingNeighbours = nil 892 } 893 n.queryTimeouts = 0 894 }, 895 handle: func(net *Network, n *Node, ev nodeEvent, pkt *ingressPacket) (*nodeState, error) { 896 switch ev { 897 case pingPacket: 898 net.handlePing(n, pkt) 899 net.ping(n, pkt.remoteAddr) 900 return verifywait, nil 901 default: 902 return unknown, errInvalidEvent 903 } 904 }, 905 } 906 907 verifyinit = &nodeState{ 908 name: "verifyinit", 909 enter: func(net *Network, n *Node) { 910 net.ping(n, n.addr()) 911 }, 912 handle: func(net *Network, n *Node, ev nodeEvent, pkt *ingressPacket) (*nodeState, error) { 913 switch ev { 914 case pingPacket: 915 net.handlePing(n, pkt) 916 return verifywait, nil 917 case pongPacket: 918 err := net.handleKnownPong(n, pkt) 919 return remoteverifywait, err 920 case pongTimeout: 921 return unknown, nil 922 default: 923 return verifyinit, errInvalidEvent 924 } 925 }, 926 } 927 928 verifywait = &nodeState{ 929 name: "verifywait", 930 handle: func(net *Network, n *Node, ev nodeEvent, pkt *ingressPacket) (*nodeState, error) { 931 switch ev { 932 case pingPacket: 933 net.handlePing(n, pkt) 934 return verifywait, nil 935 case pongPacket: 936 err := net.handleKnownPong(n, pkt) 937 return known, err 938 case pongTimeout: 939 return unknown, nil 940 default: 941 return verifywait, errInvalidEvent 942 } 943 }, 944 } 945 946 remoteverifywait = &nodeState{ 947 name: "remoteverifywait", 948 enter: func(net *Network, n *Node) { 949 net.timedEvent(respTimeout, n, pingTimeout) 950 }, 951 handle: func(net *Network, n *Node, ev nodeEvent, pkt *ingressPacket) (*nodeState, error) { 952 switch ev { 953 case pingPacket: 954 net.handlePing(n, pkt) 955 return remoteverifywait, nil 956 case pingTimeout: 957 return known, nil 958 default: 959 return remoteverifywait, errInvalidEvent 960 } 961 }, 962 } 963 964 known = &nodeState{ 965 name: "known", 966 canQuery: true, 967 enter: func(net *Network, n *Node) { 968 n.queryTimeouts = 0 969 n.startNextQuery(net) 970 // Insert into the table and start revalidation of the last node 971 // in the bucket if it is full. 972 last := net.tab.add(n) 973 if last != nil && last.state == known { 974 // TODO: do this asynchronously 975 net.transition(last, contested) 976 } 977 }, 978 handle: func(net *Network, n *Node, ev nodeEvent, pkt *ingressPacket) (*nodeState, error) { 979 switch ev { 980 case pingPacket: 981 net.handlePing(n, pkt) 982 return known, nil 983 case pongPacket: 984 err := net.handleKnownPong(n, pkt) 985 return known, err 986 default: 987 return net.handleQueryEvent(n, ev, pkt) 988 } 989 }, 990 } 991 992 contested = &nodeState{ 993 name: "contested", 994 canQuery: true, 995 enter: func(net *Network, n *Node) { 996 net.ping(n, n.addr()) 997 }, 998 handle: func(net *Network, n *Node, ev nodeEvent, pkt *ingressPacket) (*nodeState, error) { 999 switch ev { 1000 case pongPacket: 1001 // Node is still alive. 1002 err := net.handleKnownPong(n, pkt) 1003 return known, err 1004 case pongTimeout: 1005 net.tab.deleteReplace(n) 1006 return unresponsive, nil 1007 case pingPacket: 1008 net.handlePing(n, pkt) 1009 return contested, nil 1010 default: 1011 return net.handleQueryEvent(n, ev, pkt) 1012 } 1013 }, 1014 } 1015 1016 unresponsive = &nodeState{ 1017 name: "unresponsive", 1018 canQuery: true, 1019 handle: func(net *Network, n *Node, ev nodeEvent, pkt *ingressPacket) (*nodeState, error) { 1020 switch ev { 1021 case pingPacket: 1022 net.handlePing(n, pkt) 1023 return known, nil 1024 case pongPacket: 1025 err := net.handleKnownPong(n, pkt) 1026 return known, err 1027 default: 1028 return net.handleQueryEvent(n, ev, pkt) 1029 } 1030 }, 1031 } 1032 } 1033 1034 // handle processes packets sent by n and events related to n. 1035 func (net *Network) handle(n *Node, ev nodeEvent, pkt *ingressPacket) error { 1036 //fmt.Println("handle", n.addr().String(), n.state, ev) 1037 if pkt != nil { 1038 if err := net.checkPacket(n, ev, pkt); err != nil { 1039 //fmt.Println("check err:", err) 1040 return err 1041 } 1042 // Start the background expiration goroutine after the first 1043 // successful communication. Subsequent calls have no effect if it 1044 // is already running. We do this here instead of somewhere else 1045 // so that the search for seed nodes also considers older nodes 1046 // that would otherwise be removed by the expirer. 1047 if net.db != nil { 1048 net.db.ensureExpirer() 1049 } 1050 } 1051 if n.state == nil { 1052 n.state = unknown //??? 1053 } 1054 next, err := n.state.handle(net, n, ev, pkt) 1055 net.transition(n, next) 1056 //fmt.Println("new state:", n.state) 1057 return err 1058 } 1059 1060 func (net *Network) checkPacket(n *Node, ev nodeEvent, pkt *ingressPacket) error { 1061 // Replay prevention checks. 1062 switch ev { 1063 case pingPacket, findnodeHashPacket, neighborsPacket: 1064 // TODO: check date is > last date seen 1065 // TODO: check ping version 1066 case pongPacket: 1067 if !bytes.Equal(pkt.data.(*pong).ReplyTok, n.pingEcho) { 1068 // fmt.Println("pong reply token mismatch") 1069 return fmt.Errorf("pong reply token mismatch") 1070 } 1071 n.pingEcho = nil 1072 } 1073 // Address validation. 1074 // TODO: Ideally we would do the following: 1075 // - reject all packets with wrong address except ping. 1076 // - for ping with new address, transition to verifywait but keep the 1077 // previous node (with old address) around. if the new one reaches known, 1078 // swap it out. 1079 return nil 1080 } 1081 1082 func (net *Network) transition(n *Node, next *nodeState) { 1083 if n.state != next { 1084 n.state = next 1085 if next.enter != nil { 1086 next.enter(net, n) 1087 } 1088 } 1089 1090 // TODO: persist/unpersist node 1091 } 1092 1093 func (net *Network) timedEvent(d time.Duration, n *Node, ev nodeEvent) { 1094 timeout := timeoutEvent{ev, n} 1095 net.timeoutTimers[timeout] = time.AfterFunc(d, func() { 1096 select { 1097 case net.timeout <- timeout: 1098 case <-net.closed: 1099 } 1100 }) 1101 } 1102 1103 func (net *Network) abortTimedEvent(n *Node, ev nodeEvent) { 1104 timer := net.timeoutTimers[timeoutEvent{ev, n}] 1105 if timer != nil { 1106 timer.Stop() 1107 delete(net.timeoutTimers, timeoutEvent{ev, n}) 1108 } 1109 } 1110 1111 func (net *Network) ping(n *Node, addr *net.UDPAddr) { 1112 //fmt.Println("ping", n.addr().String(), n.ID.String(), n.sha.Hex()) 1113 if n.pingEcho != nil || n.ID == net.tab.self.ID { 1114 //fmt.Println(" not sent") 1115 return 1116 } 1117 log.Trace("Pinging remote node", "node", n.ID) 1118 n.pingTopics = net.ticketStore.regTopicSet() 1119 n.pingEcho = net.conn.sendPing(n, addr, n.pingTopics) 1120 net.timedEvent(respTimeout, n, pongTimeout) 1121 } 1122 1123 func (net *Network) handlePing(n *Node, pkt *ingressPacket) { 1124 log.Trace("Handling remote ping", "node", n.ID) 1125 ping := pkt.data.(*ping) 1126 n.TCP = ping.From.TCP 1127 t := net.topictab.getTicket(n, ping.Topics) 1128 1129 pong := &pong{ 1130 To: makeEndpoint(n.addr(), n.TCP), // TODO: maybe use known TCP port from DB 1131 ReplyTok: pkt.hash, 1132 Expiration: uint64(time.Now().Add(expiration).Unix()), 1133 } 1134 ticketToPong(t, pong) 1135 net.conn.send(n, pongPacket, pong) 1136 } 1137 1138 func (net *Network) handleKnownPong(n *Node, pkt *ingressPacket) error { 1139 log.Trace("Handling known pong", "node", n.ID) 1140 net.abortTimedEvent(n, pongTimeout) 1141 now := mclock.Now() 1142 ticket, err := pongToTicket(now, n.pingTopics, n, pkt) 1143 if err == nil { 1144 // fmt.Printf("(%x) ticket: %+v\n", net.tab.self.ID[:8], pkt.data) 1145 net.ticketStore.addTicket(now, pkt.data.(*pong).ReplyTok, ticket) 1146 } else { 1147 log.Trace("Failed to convert pong to ticket", "err", err) 1148 } 1149 n.pingEcho = nil 1150 n.pingTopics = nil 1151 return err 1152 } 1153 1154 func (net *Network) handleQueryEvent(n *Node, ev nodeEvent, pkt *ingressPacket) (*nodeState, error) { 1155 switch ev { 1156 case findnodePacket: 1157 target := crypto.Keccak256Hash(pkt.data.(*findnode).Target[:]) 1158 results := net.tab.closest(target, bucketSize).entries 1159 net.conn.sendNeighbours(n, results) 1160 return n.state, nil 1161 case neighborsPacket: 1162 err := net.handleNeighboursPacket(n, pkt) 1163 return n.state, err 1164 case neighboursTimeout: 1165 if n.pendingNeighbours != nil { 1166 n.pendingNeighbours.reply <- nil 1167 n.pendingNeighbours = nil 1168 } 1169 n.queryTimeouts++ 1170 if n.queryTimeouts > maxFindnodeFailures && n.state == known { 1171 return contested, errors.New("too many timeouts") 1172 } 1173 return n.state, nil 1174 1175 // v5 1176 1177 case findnodeHashPacket: 1178 results := net.tab.closest(pkt.data.(*findnodeHash).Target, bucketSize).entries 1179 net.conn.sendNeighbours(n, results) 1180 return n.state, nil 1181 case topicRegisterPacket: 1182 //fmt.Println("got topicRegisterPacket") 1183 regdata := pkt.data.(*topicRegister) 1184 pong, err := net.checkTopicRegister(regdata) 1185 if err != nil { 1186 //fmt.Println(err) 1187 return n.state, fmt.Errorf("bad waiting ticket: %v", err) 1188 } 1189 net.topictab.useTicket(n, pong.TicketSerial, regdata.Topics, int(regdata.Idx), pong.Expiration, pong.WaitPeriods) 1190 return n.state, nil 1191 case topicQueryPacket: 1192 // TODO: handle expiration 1193 topic := pkt.data.(*topicQuery).Topic 1194 results := net.topictab.getEntries(topic) 1195 if _, ok := net.ticketStore.tickets[topic]; ok { 1196 results = append(results, net.tab.self) // we're not registering in our own table but if we're advertising, return ourselves too 1197 } 1198 if len(results) > 10 { 1199 results = results[:10] 1200 } 1201 var hash common.Hash 1202 copy(hash[:], pkt.hash) 1203 net.conn.sendTopicNodes(n, hash, results) 1204 return n.state, nil 1205 case topicNodesPacket: 1206 p := pkt.data.(*topicNodes) 1207 if net.ticketStore.gotTopicNodes(n, p.Echo, p.Nodes) { 1208 n.queryTimeouts++ 1209 if n.queryTimeouts > maxFindnodeFailures && n.state == known { 1210 return contested, errors.New("too many timeouts") 1211 } 1212 } 1213 return n.state, nil 1214 1215 default: 1216 return n.state, errInvalidEvent 1217 } 1218 } 1219 1220 func (net *Network) checkTopicRegister(data *topicRegister) (*pong, error) { 1221 var pongpkt ingressPacket 1222 if err := decodePacket(data.Pong, &pongpkt); err != nil { 1223 return nil, err 1224 } 1225 if pongpkt.ev != pongPacket { 1226 return nil, errors.New("is not pong packet") 1227 } 1228 if pongpkt.remoteID != net.tab.self.ID { 1229 return nil, errors.New("not signed by us") 1230 } 1231 // check that we previously authorised all topics 1232 // that the other side is trying to register. 1233 if rlpHash(data.Topics) != pongpkt.data.(*pong).TopicHash { 1234 return nil, errors.New("topic hash mismatch") 1235 } 1236 if data.Idx < 0 || int(data.Idx) >= len(data.Topics) { 1237 return nil, errors.New("topic index out of range") 1238 } 1239 return pongpkt.data.(*pong), nil 1240 } 1241 1242 func rlpHash(x interface{}) (h common.Hash) { 1243 hw := sha3.NewLegacyKeccak256() 1244 rlp.Encode(hw, x) 1245 hw.Sum(h[:0]) 1246 return h 1247 } 1248 1249 func (net *Network) handleNeighboursPacket(n *Node, pkt *ingressPacket) error { 1250 if n.pendingNeighbours == nil { 1251 return errNoQuery 1252 } 1253 net.abortTimedEvent(n, neighboursTimeout) 1254 1255 req := pkt.data.(*neighbors) 1256 nodes := make([]*Node, len(req.Nodes)) 1257 for i, rn := range req.Nodes { 1258 nn, err := net.internNodeFromNeighbours(pkt.remoteAddr, rn) 1259 if err != nil { 1260 log.Debug(fmt.Sprintf("invalid neighbour (%v) from %x@%v: %v", rn.IP, n.ID[:8], pkt.remoteAddr, err)) 1261 continue 1262 } 1263 nodes[i] = nn 1264 // Start validation of query results immediately. 1265 // This fills the table quickly. 1266 // TODO: generates way too many packets, maybe do it via queue. 1267 if nn.state == unknown { 1268 net.transition(nn, verifyinit) 1269 } 1270 } 1271 // TODO: don't ignore second packet 1272 n.pendingNeighbours.reply <- nodes 1273 n.pendingNeighbours = nil 1274 // Now that this query is done, start the next one. 1275 n.startNextQuery(net) 1276 return nil 1277 }