gitee.com/liuxuezhan/go-micro-v1.18.0@v1.0.0/network/default.go (about) 1 package network 2 3 import ( 4 "errors" 5 "fmt" 6 "hash/fnv" 7 "io" 8 "math" 9 "sync" 10 "time" 11 12 "github.com/golang/protobuf/proto" 13 "gitee.com/liuxuezhan/go-micro-v1.18.0/client" 14 rtr "gitee.com/liuxuezhan/go-micro-v1.18.0/client/selector/router" 15 pbNet "gitee.com/liuxuezhan/go-micro-v1.18.0/network/proto" 16 "gitee.com/liuxuezhan/go-micro-v1.18.0/network/resolver/dns" 17 "gitee.com/liuxuezhan/go-micro-v1.18.0/proxy" 18 "gitee.com/liuxuezhan/go-micro-v1.18.0/router" 19 pbRtr "gitee.com/liuxuezhan/go-micro-v1.18.0/router/proto" 20 "gitee.com/liuxuezhan/go-micro-v1.18.0/server" 21 "gitee.com/liuxuezhan/go-micro-v1.18.0/transport" 22 "gitee.com/liuxuezhan/go-micro-v1.18.0/tunnel" 23 bun "gitee.com/liuxuezhan/go-micro-v1.18.0/tunnel/broker" 24 tun "gitee.com/liuxuezhan/go-micro-v1.18.0/tunnel/transport" 25 "gitee.com/liuxuezhan/go-micro-v1.18.0/util/backoff" 26 "gitee.com/liuxuezhan/go-micro-v1.18.0/util/log" 27 ) 28 29 var ( 30 // NetworkChannel is the name of the tunnel channel for passing network messages 31 NetworkChannel = "network" 32 // ControlChannel is the name of the tunnel channel for passing control message 33 ControlChannel = "control" 34 // DefaultLink is default network link 35 DefaultLink = "network" 36 // MaxConnections is the max number of network client connections 37 MaxConnections = 3 38 ) 39 40 var ( 41 // ErrClientNotFound is returned when client for tunnel channel could not be found 42 ErrClientNotFound = errors.New("client not found") 43 // ErrPeerLinkNotFound is returned when peer link could not be found in tunnel Links 44 ErrPeerLinkNotFound = errors.New("peer link not found") 45 ) 46 47 // network implements Network interface 48 type network struct { 49 // node is network node 50 *node 51 // options configure the network 52 options Options 53 // rtr is network router 54 router router.Router 55 // prx is network proxy 56 proxy proxy.Proxy 57 // tun is network tunnel 58 tunnel tunnel.Tunnel 59 // server is network server 60 server server.Server 61 // client is network client 62 client client.Client 63 64 // tunClient is a map of tunnel clients keyed over tunnel channel names 65 tunClient map[string]transport.Client 66 // peerLinks is a map of links for each peer 67 peerLinks map[string]tunnel.Link 68 69 sync.RWMutex 70 // connected marks the network as connected 71 connected bool 72 // closed closes the network 73 closed chan bool 74 // whether we've discovered by the network 75 discovered chan bool 76 // solicted checks whether routes were solicited by one node 77 solicited chan string 78 } 79 80 // message is network message 81 type message struct { 82 // msg is transport message 83 msg *transport.Message 84 // session is tunnel session 85 session tunnel.Session 86 } 87 88 // newNetwork returns a new network node 89 func newNetwork(opts ...Option) Network { 90 options := DefaultOptions() 91 92 for _, o := range opts { 93 o(&options) 94 } 95 96 // set the address to a hashed address 97 hasher := fnv.New64() 98 hasher.Write([]byte(options.Address + options.Id)) 99 address := fmt.Sprintf("%d", hasher.Sum64()) 100 101 // set the address to advertise 102 var advertise string 103 var peerAddress string 104 105 if len(options.Advertise) > 0 { 106 advertise = options.Advertise 107 peerAddress = options.Advertise 108 } else { 109 advertise = options.Address 110 peerAddress = address 111 } 112 113 // init tunnel address to the network bind address 114 options.Tunnel.Init( 115 tunnel.Address(options.Address), 116 ) 117 118 // init router Id to the network id 119 options.Router.Init( 120 router.Id(options.Id), 121 router.Address(peerAddress), 122 ) 123 124 // create tunnel client with tunnel transport 125 tunTransport := tun.NewTransport( 126 tun.WithTunnel(options.Tunnel), 127 ) 128 129 // create the tunnel broker 130 tunBroker := bun.NewBroker( 131 bun.WithTunnel(options.Tunnel), 132 ) 133 134 // server is network server 135 server := server.NewServer( 136 server.Id(options.Id), 137 server.Address(peerAddress), 138 server.Advertise(advertise), 139 server.Name(options.Name), 140 server.Transport(tunTransport), 141 server.Broker(tunBroker), 142 ) 143 144 // client is network client 145 client := client.NewClient( 146 client.Broker(tunBroker), 147 client.Transport(tunTransport), 148 client.Selector( 149 rtr.NewSelector( 150 rtr.WithRouter(options.Router), 151 ), 152 ), 153 ) 154 155 network := &network{ 156 node: &node{ 157 id: options.Id, 158 address: peerAddress, 159 peers: make(map[string]*node), 160 }, 161 options: options, 162 router: options.Router, 163 proxy: options.Proxy, 164 tunnel: options.Tunnel, 165 server: server, 166 client: client, 167 tunClient: make(map[string]transport.Client), 168 peerLinks: make(map[string]tunnel.Link), 169 discovered: make(chan bool, 1), 170 solicited: make(chan string, 1), 171 } 172 173 network.node.network = network 174 175 return network 176 } 177 178 func (n *network) Init(opts ...Option) error { 179 n.Lock() 180 defer n.Unlock() 181 182 // TODO: maybe only allow reinit of certain opts 183 for _, o := range opts { 184 o(&n.options) 185 } 186 187 return nil 188 } 189 190 // Options returns network options 191 func (n *network) Options() Options { 192 n.RLock() 193 defer n.RUnlock() 194 195 options := n.options 196 197 return options 198 } 199 200 // Name returns network name 201 func (n *network) Name() string { 202 return n.options.Name 203 } 204 205 func (n *network) initNodes(startup bool) { 206 nodes, err := n.resolveNodes() 207 if err != nil && !startup { 208 log.Debugf("Network failed to resolve nodes: %v", err) 209 return 210 } 211 212 // initialize the tunnel 213 log.Tracef("Network initialising nodes %+v\n", nodes) 214 215 n.tunnel.Init( 216 tunnel.Nodes(nodes...), 217 ) 218 } 219 220 // resolveNodes resolves network nodes to addresses 221 func (n *network) resolveNodes() ([]string, error) { 222 // resolve the network address to network nodes 223 records, err := n.options.Resolver.Resolve(n.options.Name) 224 if err != nil { 225 log.Debugf("Network failed to resolve nodes: %v", err) 226 } 227 228 // keep processing 229 230 nodeMap := make(map[string]bool) 231 232 // collect network node addresses 233 //nolint:prealloc 234 var nodes []string 235 var i int 236 237 for _, record := range records { 238 if _, ok := nodeMap[record.Address]; ok { 239 continue 240 } 241 242 nodeMap[record.Address] = true 243 nodes = append(nodes, record.Address) 244 245 i++ 246 247 // break once MaxConnection nodes has been reached 248 if i == MaxConnections { 249 break 250 } 251 } 252 253 // use the dns resolver to expand peers 254 dns := &dns.Resolver{} 255 256 // append seed nodes if we have them 257 for _, node := range n.options.Nodes { 258 // resolve anything that looks like a host name 259 records, err := dns.Resolve(node) 260 if err != nil { 261 log.Debugf("Failed to resolve %v %v", node, err) 262 continue 263 } 264 265 // add to the node map 266 for _, record := range records { 267 if _, ok := nodeMap[record.Address]; !ok { 268 nodes = append(nodes, record.Address) 269 } 270 } 271 } 272 273 return nodes, nil 274 } 275 276 // handleNetConn handles network announcement messages 277 func (n *network) handleNetConn(s tunnel.Session, msg chan *message) { 278 for { 279 m := new(transport.Message) 280 if err := s.Recv(m); err != nil { 281 log.Debugf("Network tunnel [%s] receive error: %v", NetworkChannel, err) 282 switch err { 283 case io.EOF, tunnel.ErrReadTimeout: 284 s.Close() 285 return 286 } 287 continue 288 } 289 290 // check if peer is set 291 peer := m.Header["Micro-Peer"] 292 293 // check who the message is intended for 294 if len(peer) > 0 && peer != n.options.Id { 295 continue 296 } 297 298 select { 299 case msg <- &message{ 300 msg: m, 301 session: s, 302 }: 303 case <-n.closed: 304 return 305 } 306 } 307 } 308 309 // acceptNetConn accepts connections from NetworkChannel 310 func (n *network) acceptNetConn(l tunnel.Listener, recv chan *message) { 311 var i int 312 for { 313 // accept a connection 314 conn, err := l.Accept() 315 if err != nil { 316 sleep := backoff.Do(i) 317 log.Debugf("Network tunnel [%s] accept error: %v, backing off for %v", ControlChannel, err, sleep) 318 time.Sleep(sleep) 319 if i > 5 { 320 i = 0 321 } 322 i++ 323 continue 324 } 325 326 select { 327 case <-n.closed: 328 if err := conn.Close(); err != nil { 329 log.Debugf("Network tunnel [%s] failed to close connection: %v", NetworkChannel, err) 330 } 331 return 332 default: 333 // go handle NetworkChannel connection 334 go n.handleNetConn(conn, recv) 335 } 336 } 337 } 338 339 // processNetChan processes messages received on NetworkChannel 340 func (n *network) processNetChan(listener tunnel.Listener) { 341 defer listener.Close() 342 343 // receive network message queue 344 recv := make(chan *message, 128) 345 346 // accept NetworkChannel connections 347 go n.acceptNetConn(listener, recv) 348 349 for { 350 select { 351 case m := <-recv: 352 // switch on type of message and take action 353 switch m.msg.Header["Micro-Method"] { 354 case "connect": 355 // mark the time the message has been received 356 now := time.Now() 357 pbNetConnect := &pbNet.Connect{} 358 359 if err := proto.Unmarshal(m.msg.Body, pbNetConnect); err != nil { 360 log.Debugf("Network tunnel [%s] connect unmarshal error: %v", NetworkChannel, err) 361 continue 362 } 363 364 // don't process your own messages 365 if pbNetConnect.Node.Id == n.options.Id { 366 continue 367 } 368 369 log.Debugf("Network received connect message from: %s", pbNetConnect.Node.Id) 370 371 peer := &node{ 372 id: pbNetConnect.Node.Id, 373 address: pbNetConnect.Node.Address, 374 peers: make(map[string]*node), 375 lastSeen: now, 376 } 377 378 // update peer links 379 380 if err := n.updatePeerLinks(pbNetConnect.Node.Address, m); err != nil { 381 log.Debugf("Network failed updating peer links: %s", err) 382 } 383 384 // add peer to the list of node peers 385 if err := n.node.AddPeer(peer); err == ErrPeerExists { 386 log.Debugf("Network peer exists, refreshing: %s", peer.id) 387 // update lastSeen time for the existing node 388 if err := n.RefreshPeer(peer.id, now); err != nil { 389 log.Debugf("Network failed refreshing peer %s: %v", peer.id, err) 390 } 391 } 392 393 // we send the peer message because someone has sent connect 394 // and wants to know what's on the network. The faster we 395 // respond the faster we start to converge 396 397 // get node peers down to MaxDepth encoded in protobuf 398 msg := PeersToProto(n.node, MaxDepth) 399 node := pbNetConnect.Node.Id 400 401 // advertise yourself to the network 402 if err := n.sendTo("peer", NetworkChannel, node, msg); err != nil { 403 log.Debugf("Network failed to advertise peers: %v", err) 404 } 405 406 // advertise all the routes when a new node has connected 407 if err := n.router.Solicit(); err != nil { 408 log.Debugf("Network failed to solicit routes: %s", err) 409 } 410 411 // specify that we're soliciting 412 select { 413 case n.solicited <- node: 414 default: 415 // don't block 416 } 417 case "peer": 418 // mark the time the message has been received 419 now := time.Now() 420 pbNetPeer := &pbNet.Peer{} 421 422 if err := proto.Unmarshal(m.msg.Body, pbNetPeer); err != nil { 423 log.Debugf("Network tunnel [%s] peer unmarshal error: %v", NetworkChannel, err) 424 continue 425 } 426 427 // don't process your own messages 428 if pbNetPeer.Node.Id == n.options.Id { 429 continue 430 } 431 432 log.Debugf("Network received peer message from: %s %s", pbNetPeer.Node.Id, pbNetPeer.Node.Address) 433 434 peer := &node{ 435 id: pbNetPeer.Node.Id, 436 address: pbNetPeer.Node.Address, 437 peers: make(map[string]*node), 438 lastSeen: now, 439 } 440 441 // update peer links 442 443 if err := n.updatePeerLinks(pbNetPeer.Node.Address, m); err != nil { 444 log.Debugf("Network failed updating peer links: %s", err) 445 } 446 447 if err := n.node.AddPeer(peer); err == nil { 448 // send a solicit message when discovering new peer 449 msg := &pbRtr.Solicit{ 450 Id: n.options.Id, 451 } 452 453 node := pbNetPeer.Node.Id 454 455 // only solicit this peer 456 if err := n.sendTo("solicit", ControlChannel, node, msg); err != nil { 457 log.Debugf("Network failed to send solicit message: %s", err) 458 } 459 460 continue 461 // we're expecting any error to be ErrPeerExists 462 } else if err != ErrPeerExists { 463 log.Debugf("Network got error adding peer %v", err) 464 continue 465 } 466 467 log.Debugf("Network peer exists, refreshing: %s", pbNetPeer.Node.Id) 468 469 // update lastSeen time for the peer 470 if err := n.RefreshPeer(pbNetPeer.Node.Id, now); err != nil { 471 log.Debugf("Network failed refreshing peer %s: %v", pbNetPeer.Node.Id, err) 472 } 473 474 // NOTE: we don't unpack MaxDepth toplogy 475 peer = UnpackPeerTopology(pbNetPeer, now, MaxDepth-1) 476 log.Tracef("Network updating topology of node: %s", n.node.id) 477 if err := n.node.UpdatePeer(peer); err != nil { 478 log.Debugf("Network failed to update peers: %v", err) 479 } 480 481 // tell the connect loop that we've been discovered 482 // so it stops sending connect messages out 483 select { 484 case n.discovered <- true: 485 default: 486 // don't block here 487 } 488 case "close": 489 pbNetClose := &pbNet.Close{} 490 if err := proto.Unmarshal(m.msg.Body, pbNetClose); err != nil { 491 log.Debugf("Network tunnel [%s] close unmarshal error: %v", NetworkChannel, err) 492 continue 493 } 494 495 // don't process your own messages 496 if pbNetClose.Node.Id == n.options.Id { 497 continue 498 } 499 500 log.Debugf("Network received close message from: %s", pbNetClose.Node.Id) 501 502 peer := &node{ 503 id: pbNetClose.Node.Id, 504 address: pbNetClose.Node.Address, 505 } 506 507 if err := n.DeletePeerNode(peer.id); err != nil { 508 log.Debugf("Network failed to delete node %s routes: %v", peer.id, err) 509 } 510 511 if err := n.prunePeerRoutes(peer); err != nil { 512 log.Debugf("Network failed pruning peer %s routes: %v", peer.id, err) 513 } 514 515 // delete peer from the peerLinks 516 n.Lock() 517 delete(n.peerLinks, pbNetClose.Node.Address) 518 n.Unlock() 519 } 520 case <-n.closed: 521 return 522 } 523 } 524 } 525 526 // pruneRoutes prunes routes return by given query 527 func (n *network) pruneRoutes(q ...router.QueryOption) error { 528 routes, err := n.router.Table().Query(q...) 529 if err != nil && err != router.ErrRouteNotFound { 530 return err 531 } 532 533 for _, route := range routes { 534 if err := n.router.Table().Delete(route); err != nil && err != router.ErrRouteNotFound { 535 return err 536 } 537 } 538 539 return nil 540 } 541 542 // pruneNodeRoutes prunes routes that were either originated by or routable via given node 543 func (n *network) prunePeerRoutes(peer *node) error { 544 // lookup all routes originated by router 545 q := []router.QueryOption{ 546 router.QueryRouter(peer.id), 547 } 548 if err := n.pruneRoutes(q...); err != nil { 549 return err 550 } 551 552 // lookup all routes routable via gw 553 q = []router.QueryOption{ 554 router.QueryGateway(peer.address), 555 } 556 if err := n.pruneRoutes(q...); err != nil { 557 return err 558 } 559 560 return nil 561 } 562 563 // manage the process of announcing to peers and prune any peer nodes that have not been 564 // seen for a period of time. Also removes all the routes either originated by or routable 565 //by the stale nodes. it also resolves nodes periodically and adds them to the tunnel 566 func (n *network) manage() { 567 announce := time.NewTicker(AnnounceTime) 568 defer announce.Stop() 569 prune := time.NewTicker(PruneTime) 570 defer prune.Stop() 571 resolve := time.NewTicker(ResolveTime) 572 defer resolve.Stop() 573 574 for { 575 select { 576 case <-n.closed: 577 return 578 case <-announce.C: 579 msg := PeersToProto(n.node, MaxDepth) 580 // advertise yourself to the network 581 if err := n.sendMsg("peer", NetworkChannel, msg); err != nil { 582 log.Debugf("Network failed to advertise peers: %v", err) 583 } 584 case <-prune.C: 585 pruned := n.PruneStalePeers(PruneTime) 586 587 for id, peer := range pruned { 588 log.Debugf("Network peer exceeded prune time: %s", id) 589 590 n.Lock() 591 delete(n.peerLinks, peer.address) 592 n.Unlock() 593 594 if err := n.prunePeerRoutes(peer); err != nil { 595 log.Debugf("Network failed pruning peer %s routes: %v", id, err) 596 } 597 } 598 599 // get a list of all routes 600 routes, err := n.options.Router.Table().List() 601 if err != nil { 602 log.Debugf("Network failed listing routes when pruning peers: %v", err) 603 continue 604 } 605 606 // collect all the router IDs in the routing table 607 routers := make(map[string]bool) 608 609 for _, route := range routes { 610 // check if its been processed 611 if _, ok := routers[route.Router]; ok { 612 continue 613 } 614 615 // mark as processed 616 routers[route.Router] = true 617 618 // if the router is NOT in our peer graph, delete all routes originated by it 619 if peer := n.node.GetPeerNode(route.Router); peer != nil { 620 continue 621 } 622 623 if err := n.pruneRoutes(router.QueryRouter(route.Router)); err != nil { 624 log.Debugf("Network failed deleting routes by %s: %v", route.Router, err) 625 } 626 } 627 case <-resolve.C: 628 n.initNodes(false) 629 } 630 } 631 } 632 633 // sendTo sends a message to a specific node as a one off. 634 // we need this because when links die, we have no discovery info, 635 // and sending to an existing multicast link doesn't immediately work 636 func (n *network) sendTo(method, channel, peer string, msg proto.Message) error { 637 body, err := proto.Marshal(msg) 638 if err != nil { 639 return err 640 } 641 c, err := n.tunnel.Dial(channel, tunnel.DialMode(tunnel.Multicast)) 642 if err != nil { 643 return err 644 } 645 defer c.Close() 646 647 log.Debugf("Network sending %s message from: %s to %s", method, n.options.Id, peer) 648 649 return c.Send(&transport.Message{ 650 Header: map[string]string{ 651 "Micro-Method": method, 652 "Micro-Peer": peer, 653 }, 654 Body: body, 655 }) 656 } 657 658 // sendMsg sends a message to the tunnel channel 659 func (n *network) sendMsg(method, channel string, msg proto.Message) error { 660 body, err := proto.Marshal(msg) 661 if err != nil { 662 return err 663 } 664 665 // check if the channel client is initialized 666 n.RLock() 667 client, ok := n.tunClient[channel] 668 if !ok || client == nil { 669 n.RUnlock() 670 return ErrClientNotFound 671 } 672 n.RUnlock() 673 674 log.Debugf("Network sending %s message from: %s", method, n.options.Id) 675 676 return client.Send(&transport.Message{ 677 Header: map[string]string{ 678 "Micro-Method": method, 679 }, 680 Body: body, 681 }) 682 } 683 684 // updatePeerLinks updates link for a given peer 685 func (n *network) updatePeerLinks(peerAddr string, m *message) error { 686 n.Lock() 687 defer n.Unlock() 688 689 linkId := m.msg.Header["Micro-Link"] 690 691 log.Tracef("Network looking up link %s in the peer links", linkId) 692 693 // lookup the peer link 694 var peerLink tunnel.Link 695 696 for _, link := range n.tunnel.Links() { 697 if link.Id() == linkId { 698 peerLink = link 699 break 700 } 701 } 702 703 if peerLink == nil { 704 return ErrPeerLinkNotFound 705 } 706 707 // if the peerLink is found in the returned links update peerLinks 708 log.Tracef("Network updating peer links for peer %s", peerAddr) 709 710 // add peerLink to the peerLinks map 711 if link, ok := n.peerLinks[peerAddr]; ok { 712 // if the existing has better Length then the new, replace it 713 if link.Length() < peerLink.Length() { 714 n.peerLinks[peerAddr] = peerLink 715 } 716 } else { 717 n.peerLinks[peerAddr] = peerLink 718 } 719 720 return nil 721 } 722 723 // handleCtrlConn handles ControlChannel connections 724 func (n *network) handleCtrlConn(s tunnel.Session, msg chan *message) { 725 for { 726 m := new(transport.Message) 727 if err := s.Recv(m); err != nil { 728 log.Debugf("Network tunnel [%s] receive error: %v", ControlChannel, err) 729 switch err { 730 case io.EOF, tunnel.ErrReadTimeout: 731 s.Close() 732 return 733 } 734 continue 735 } 736 737 // check if peer is set 738 peer := m.Header["Micro-Peer"] 739 740 // check who the message is intended for 741 if len(peer) > 0 && peer != n.options.Id { 742 continue 743 } 744 745 select { 746 case msg <- &message{ 747 msg: m, 748 session: s, 749 }: 750 case <-n.closed: 751 return 752 } 753 } 754 } 755 756 // acceptCtrlConn accepts connections from ControlChannel 757 func (n *network) acceptCtrlConn(l tunnel.Listener, recv chan *message) { 758 var i int 759 for { 760 // accept a connection 761 conn, err := l.Accept() 762 if err != nil { 763 sleep := backoff.Do(i) 764 log.Debugf("Network tunnel [%s] accept error: %v, backing off for %v", ControlChannel, err, sleep) 765 time.Sleep(sleep) 766 if i > 5 { 767 // reset the counter 768 i = 0 769 } 770 i++ 771 continue 772 } 773 774 select { 775 case <-n.closed: 776 if err := conn.Close(); err != nil { 777 log.Debugf("Network tunnel [%s] failed to close connection: %v", ControlChannel, err) 778 } 779 return 780 default: 781 // go handle ControlChannel connection 782 go n.handleCtrlConn(conn, recv) 783 } 784 } 785 } 786 787 // getHopCount queries network graph and returns hop count for given router 788 // - Routes for local services have hop count 1 789 // - Routes with ID of adjacent nodes have hop count 2 790 // - Routes by peers of the advertiser have hop count 3 791 // - Routes beyond node neighbourhood have hop count 4 792 func (n *network) getHopCount(rtr string) int { 793 // make sure node.peers are not modified 794 n.node.RLock() 795 defer n.node.RUnlock() 796 797 // we are the origin of the route 798 if rtr == n.options.Id { 799 return 1 800 } 801 802 // the route origin is our peer 803 if _, ok := n.peers[rtr]; ok { 804 return 10 805 } 806 807 // the route origin is the peer of our peer 808 for _, peer := range n.peers { 809 for id := range peer.peers { 810 if rtr == id { 811 return 100 812 } 813 } 814 } 815 // otherwise we are three hops away 816 return 1000 817 } 818 819 // getRouteMetric calculates router metric and returns it 820 // Route metric is calculated based on link status and route hopd count 821 func (n *network) getRouteMetric(router string, gateway string, link string) int64 { 822 // set the route metric 823 n.RLock() 824 defer n.RUnlock() 825 826 if link == "local" && gateway == "" { 827 return 1 828 } 829 830 if link == "local" && gateway != "" { 831 return 2 832 } 833 834 log.Tracef("Network looking up %s link to gateway: %s", link, gateway) 835 if link, ok := n.peerLinks[gateway]; ok { 836 // maka sure delay is non-zero 837 delay := link.Delay() 838 if delay == 0 { 839 delay = 1 840 } 841 // get the route hop count 842 hops := n.getHopCount(router) 843 // make sure length is non-zero 844 length := link.Length() 845 if length == 0 { 846 log.Debugf("Link length is 0 %v %v", link, link.Length()) 847 length = 10e9 848 } 849 log.Tracef("Network calculated metric %v delay %v length %v distance %v", (delay*length*int64(hops))/10e6, delay, length, hops) 850 return (delay * length * int64(hops)) / 10e6 851 } 852 853 log.Debugf("Network failed to find a link to gateway: %s", gateway) 854 855 return math.MaxInt64 856 } 857 858 // processCtrlChan processes messages received on ControlChannel 859 func (n *network) processCtrlChan(listener tunnel.Listener) { 860 defer listener.Close() 861 862 // receive control message queue 863 recv := make(chan *message, 128) 864 865 // accept ControlChannel cconnections 866 go n.acceptCtrlConn(listener, recv) 867 868 for { 869 select { 870 case m := <-recv: 871 // switch on type of message and take action 872 switch m.msg.Header["Micro-Method"] { 873 case "advert": 874 pbRtrAdvert := &pbRtr.Advert{} 875 876 if err := proto.Unmarshal(m.msg.Body, pbRtrAdvert); err != nil { 877 log.Debugf("Network fail to unmarshal advert message: %v", err) 878 continue 879 } 880 881 // don't process your own messages 882 if pbRtrAdvert.Id == n.options.Id { 883 continue 884 } 885 886 log.Debugf("Network received advert message from: %s", pbRtrAdvert.Id) 887 888 // loookup advertising node in our peer topology 889 advertNode := n.node.GetPeerNode(pbRtrAdvert.Id) 890 if advertNode == nil { 891 // if we can't find the node in our topology (MaxDepth) we skipp prcessing adverts 892 log.Debugf("Network skipping advert message from unknown peer: %s", pbRtrAdvert.Id) 893 continue 894 } 895 896 var events []*router.Event 897 898 for _, event := range pbRtrAdvert.Events { 899 // we know the advertising node is not the origin of the route 900 if pbRtrAdvert.Id != event.Route.Router { 901 // if the origin router is not the advertising node peer 902 // we can't rule out potential routing loops so we bail here 903 if peer := advertNode.GetPeerNode(event.Route.Router); peer == nil { 904 log.Debugf("Network skipping advert message from peer: %s", pbRtrAdvert.Id) 905 continue 906 } 907 } 908 909 route := router.Route{ 910 Service: event.Route.Service, 911 Address: event.Route.Address, 912 Gateway: event.Route.Gateway, 913 Network: event.Route.Network, 914 Router: event.Route.Router, 915 Link: event.Route.Link, 916 Metric: event.Route.Metric, 917 } 918 919 // calculate route metric and add to the advertised metric 920 // we need to make sure we do not overflow math.MaxInt64 921 metric := n.getRouteMetric(event.Route.Router, event.Route.Gateway, event.Route.Link) 922 log.Tracef("Network metric for router %s and gateway %s: %v", event.Route.Router, event.Route.Gateway, metric) 923 924 // check we don't overflow max int 64 925 if d := route.Metric + metric; d <= 0 { 926 // set to max int64 if we overflow 927 route.Metric = math.MaxInt64 928 } else { 929 // set the combined value of metrics otherwise 930 route.Metric = d 931 } 932 933 // create router event 934 e := &router.Event{ 935 Type: router.EventType(event.Type), 936 Timestamp: time.Unix(0, pbRtrAdvert.Timestamp), 937 Route: route, 938 } 939 events = append(events, e) 940 } 941 942 // if no events are eligible for processing continue 943 if len(events) == 0 { 944 log.Tracef("Network no events to be processed by router: %s", n.options.Id) 945 continue 946 } 947 948 // create an advert and process it 949 advert := &router.Advert{ 950 Id: pbRtrAdvert.Id, 951 Type: router.AdvertType(pbRtrAdvert.Type), 952 Timestamp: time.Unix(0, pbRtrAdvert.Timestamp), 953 TTL: time.Duration(pbRtrAdvert.Ttl), 954 Events: events, 955 } 956 957 log.Debugf("Network router %s processing advert: %s", n.Id(), advert.Id) 958 if err := n.router.Process(advert); err != nil { 959 log.Debugf("Network failed to process advert %s: %v", advert.Id, err) 960 } 961 case "solicit": 962 pbRtrSolicit := &pbRtr.Solicit{} 963 if err := proto.Unmarshal(m.msg.Body, pbRtrSolicit); err != nil { 964 log.Debugf("Network fail to unmarshal solicit message: %v", err) 965 continue 966 } 967 968 log.Debugf("Network received solicit message from: %s", pbRtrSolicit.Id) 969 970 // ignore solicitation when requested by you 971 if pbRtrSolicit.Id == n.options.Id { 972 continue 973 } 974 975 log.Debugf("Network router flushing routes for: %s", pbRtrSolicit.Id) 976 977 // advertise all the routes when a new node has connected 978 if err := n.router.Solicit(); err != nil { 979 log.Debugf("Network failed to solicit routes: %s", err) 980 } 981 982 // specify that someone solicited the route 983 select { 984 case n.solicited <- pbRtrSolicit.Id: 985 default: 986 // don't block 987 } 988 } 989 case <-n.closed: 990 return 991 } 992 } 993 } 994 995 // advertise advertises routes to the network 996 func (n *network) advertise(advertChan <-chan *router.Advert) { 997 hasher := fnv.New64() 998 for { 999 select { 1000 // process local adverts and randomly fire them at other nodes 1001 case advert := <-advertChan: 1002 // create a proto advert 1003 var events []*pbRtr.Event 1004 1005 for _, event := range advert.Events { 1006 // the routes service address 1007 address := event.Route.Address 1008 1009 // only hash the address if we're advertising our own local routes 1010 if event.Route.Router == advert.Id { 1011 // hash the service before advertising it 1012 hasher.Reset() 1013 // routes for multiple instances of a service will be collapsed here. 1014 // TODO: once we store labels in the table this may need to change 1015 // to include the labels in case they differ but highly unlikely 1016 hasher.Write([]byte(event.Route.Service + n.node.Address())) 1017 address = fmt.Sprintf("%d", hasher.Sum64()) 1018 } 1019 // calculate route metric to advertise 1020 metric := n.getRouteMetric(event.Route.Router, event.Route.Gateway, event.Route.Link) 1021 // NOTE: we override Gateway, Link and Address here 1022 route := &pbRtr.Route{ 1023 Service: event.Route.Service, 1024 Address: address, 1025 Gateway: n.node.Address(), 1026 Network: event.Route.Network, 1027 Router: event.Route.Router, 1028 Link: DefaultLink, 1029 Metric: metric, 1030 } 1031 e := &pbRtr.Event{ 1032 Type: pbRtr.EventType(event.Type), 1033 Timestamp: event.Timestamp.UnixNano(), 1034 Route: route, 1035 } 1036 events = append(events, e) 1037 } 1038 1039 msg := &pbRtr.Advert{ 1040 Id: advert.Id, 1041 Type: pbRtr.AdvertType(advert.Type), 1042 Timestamp: advert.Timestamp.UnixNano(), 1043 Events: events, 1044 } 1045 1046 // send the advert to all on the control channel 1047 // since its not a solicitation 1048 if advert.Type != router.Solicitation { 1049 if err := n.sendMsg("advert", ControlChannel, msg); err != nil { 1050 log.Debugf("Network failed to advertise routes: %v", err) 1051 } 1052 continue 1053 } 1054 1055 // it's a solication, someone asked for it 1056 // so we're going to pick off the node and send it 1057 select { 1058 case node := <-n.solicited: 1059 // someone requested the route 1060 n.sendTo("advert", ControlChannel, node, msg) 1061 default: 1062 // send to all since we can't get anything 1063 n.sendMsg("advert", ControlChannel, msg) 1064 } 1065 case <-n.closed: 1066 return 1067 } 1068 } 1069 } 1070 1071 func (n *network) sendConnect() { 1072 // send connect message to NetworkChannel 1073 // NOTE: in theory we could do this as soon as 1074 // Dial to NetworkChannel succeeds, but instead 1075 // we initialize all other node resources first 1076 msg := &pbNet.Connect{ 1077 Node: &pbNet.Node{ 1078 Id: n.node.id, 1079 Address: n.node.address, 1080 }, 1081 } 1082 1083 if err := n.sendMsg("connect", NetworkChannel, msg); err != nil { 1084 log.Debugf("Network failed to send connect message: %s", err) 1085 } 1086 } 1087 1088 // connect will wait for a link to be established and send the connect 1089 // message. We're trying to ensure convergence pretty quickly. So we want 1090 // to hear back. In the case we become completely disconnected we'll 1091 // connect again once a new link is established 1092 func (n *network) connect() { 1093 // discovered lets us know what we received a peer message back 1094 var discovered bool 1095 var attempts int 1096 1097 // our advertise address 1098 loopback := n.server.Options().Advertise 1099 // actual address 1100 address := n.tunnel.Address() 1101 1102 for { 1103 // connected is used to define if the link is connected 1104 var connected bool 1105 1106 // check the links state 1107 for _, link := range n.tunnel.Links() { 1108 // skip loopback 1109 if link.Loopback() { 1110 continue 1111 } 1112 1113 // if remote is ourselves 1114 switch link.Remote() { 1115 case loopback, address: 1116 continue 1117 } 1118 1119 if link.State() == "connected" { 1120 connected = true 1121 break 1122 } 1123 } 1124 1125 // if we're not connected wait 1126 if !connected { 1127 // reset discovered 1128 discovered = false 1129 // sleep for a second 1130 time.Sleep(time.Second) 1131 // now try again 1132 continue 1133 } 1134 1135 // we're connected but are we discovered? 1136 if !discovered { 1137 // recreate the clients because all the tunnel links are gone 1138 // so we haven't send discovery beneath 1139 if err := n.createClients(); err != nil { 1140 log.Debugf("Failed to recreate network/control clients: %v", err) 1141 continue 1142 } 1143 1144 // send the connect message 1145 n.sendConnect() 1146 } 1147 1148 // check if we've been discovered 1149 select { 1150 case <-n.discovered: 1151 discovered = true 1152 attempts = 0 1153 case <-n.closed: 1154 return 1155 case <-time.After(time.Second + backoff.Do(attempts)): 1156 // we have to try again 1157 attempts++ 1158 1159 // reset attempts 5 == ~2mins 1160 if attempts > 5 { 1161 attempts = 0 1162 } 1163 } 1164 } 1165 } 1166 1167 // Connect connects the network 1168 func (n *network) Connect() error { 1169 n.Lock() 1170 defer n.Unlock() 1171 1172 // connect network tunnel 1173 if err := n.tunnel.Connect(); err != nil { 1174 return err 1175 } 1176 1177 // return if already connected 1178 if n.connected { 1179 // initialise the nodes 1180 n.initNodes(false) 1181 // send the connect message 1182 go n.sendConnect() 1183 return nil 1184 } 1185 1186 // initialise the nodes 1187 n.initNodes(true) 1188 1189 // set our internal node address 1190 // if advertise address is not set 1191 if len(n.options.Advertise) == 0 { 1192 n.server.Init(server.Advertise(n.tunnel.Address())) 1193 } 1194 1195 // listen on NetworkChannel 1196 netListener, err := n.tunnel.Listen( 1197 NetworkChannel, 1198 tunnel.ListenMode(tunnel.Multicast), 1199 tunnel.ListenTimeout(AnnounceTime*2), 1200 ) 1201 if err != nil { 1202 return err 1203 } 1204 1205 // listen on ControlChannel 1206 ctrlListener, err := n.tunnel.Listen( 1207 ControlChannel, 1208 tunnel.ListenMode(tunnel.Multicast), 1209 tunnel.ListenTimeout(router.AdvertiseTableTick*2), 1210 ) 1211 if err != nil { 1212 return err 1213 } 1214 1215 // dial into ControlChannel to send route adverts 1216 ctrlClient, err := n.tunnel.Dial(ControlChannel, tunnel.DialMode(tunnel.Multicast)) 1217 if err != nil { 1218 return err 1219 } 1220 1221 n.tunClient[ControlChannel] = ctrlClient 1222 1223 // dial into NetworkChannel to send network messages 1224 netClient, err := n.tunnel.Dial(NetworkChannel, tunnel.DialMode(tunnel.Multicast)) 1225 if err != nil { 1226 return err 1227 } 1228 1229 n.tunClient[NetworkChannel] = netClient 1230 1231 // create closed channel 1232 n.closed = make(chan bool) 1233 1234 // start the router 1235 if err := n.options.Router.Start(); err != nil { 1236 return err 1237 } 1238 1239 // start advertising routes 1240 advertChan, err := n.options.Router.Advertise() 1241 if err != nil { 1242 return err 1243 } 1244 1245 // start the server 1246 if err := n.server.Start(); err != nil { 1247 return err 1248 } 1249 1250 // advertise service routes 1251 go n.advertise(advertChan) 1252 // listen to network messages 1253 go n.processNetChan(netListener) 1254 // accept and process routes 1255 go n.processCtrlChan(ctrlListener) 1256 // manage connection once links are established 1257 go n.connect() 1258 // resolve nodes, broadcast announcements and prune stale nodes 1259 go n.manage() 1260 1261 // we're now connected 1262 n.connected = true 1263 1264 return nil 1265 } 1266 1267 func (n *network) close() error { 1268 // stop the server 1269 if err := n.server.Stop(); err != nil { 1270 return err 1271 } 1272 1273 // stop the router 1274 if err := n.router.Stop(); err != nil { 1275 return err 1276 } 1277 1278 // close the tunnel 1279 if err := n.tunnel.Close(); err != nil { 1280 return err 1281 } 1282 1283 return nil 1284 } 1285 1286 // createClients is used to create new clients in the event we lose all the tunnels 1287 func (n *network) createClients() error { 1288 // dial into ControlChannel to send route adverts 1289 ctrlClient, err := n.tunnel.Dial(ControlChannel, tunnel.DialMode(tunnel.Multicast)) 1290 if err != nil { 1291 return err 1292 } 1293 1294 // dial into NetworkChannel to send network messages 1295 netClient, err := n.tunnel.Dial(NetworkChannel, tunnel.DialMode(tunnel.Multicast)) 1296 if err != nil { 1297 return err 1298 } 1299 1300 n.Lock() 1301 defer n.Unlock() 1302 1303 // set the control client 1304 c, ok := n.tunClient[ControlChannel] 1305 if ok { 1306 c.Close() 1307 } 1308 n.tunClient[ControlChannel] = ctrlClient 1309 1310 // set the network client 1311 c, ok = n.tunClient[NetworkChannel] 1312 if ok { 1313 c.Close() 1314 } 1315 n.tunClient[NetworkChannel] = netClient 1316 1317 return nil 1318 } 1319 1320 // Close closes network connection 1321 func (n *network) Close() error { 1322 n.Lock() 1323 1324 if !n.connected { 1325 n.Unlock() 1326 return nil 1327 } 1328 1329 select { 1330 case <-n.closed: 1331 n.Unlock() 1332 return nil 1333 default: 1334 // TODO: send close message to the network channel 1335 close(n.closed) 1336 // set connected to false 1337 n.connected = false 1338 1339 // unlock the lock otherwise we'll deadlock sending the close 1340 n.Unlock() 1341 1342 msg := &pbNet.Close{ 1343 Node: &pbNet.Node{ 1344 Id: n.node.id, 1345 Address: n.node.address, 1346 }, 1347 } 1348 1349 if err := n.sendMsg("close", NetworkChannel, msg); err != nil { 1350 log.Debugf("Network failed to send close message: %s", err) 1351 } 1352 } 1353 1354 return n.close() 1355 } 1356 1357 // Client returns network client 1358 func (n *network) Client() client.Client { 1359 return n.client 1360 } 1361 1362 // Server returns network server 1363 func (n *network) Server() server.Server { 1364 return n.server 1365 }