github.com/annwntech/go-micro/v2@v2.9.5/network/default.go (about) 1 package network 2 3 import ( 4 "errors" 5 "fmt" 6 "hash/fnv" 7 "io" 8 "math" 9 "math/rand" 10 "sort" 11 "sync" 12 "time" 13 14 "github.com/golang/protobuf/proto" 15 "github.com/annwntech/go-micro/v2/client" 16 cmucp "github.com/annwntech/go-micro/v2/client/mucp" 17 rtr "github.com/annwntech/go-micro/v2/client/selector/router" 18 "github.com/annwntech/go-micro/v2/logger" 19 "github.com/annwntech/go-micro/v2/network/resolver/dns" 20 pbNet "github.com/annwntech/go-micro/v2/network/service/proto" 21 "github.com/annwntech/go-micro/v2/proxy" 22 "github.com/annwntech/go-micro/v2/router" 23 pbRtr "github.com/annwntech/go-micro/v2/router/service/proto" 24 "github.com/annwntech/go-micro/v2/server" 25 smucp "github.com/annwntech/go-micro/v2/server/mucp" 26 "github.com/annwntech/go-micro/v2/transport" 27 "github.com/annwntech/go-micro/v2/tunnel" 28 bun "github.com/annwntech/go-micro/v2/tunnel/broker" 29 tun "github.com/annwntech/go-micro/v2/tunnel/transport" 30 "github.com/annwntech/go-micro/v2/util/backoff" 31 pbUtil "github.com/annwntech/go-micro/v2/util/proto" 32 ) 33 34 var ( 35 // NetworkChannel is the name of the tunnel channel for passing network messages 36 NetworkChannel = "network" 37 // ControlChannel is the name of the tunnel channel for passing control message 38 ControlChannel = "control" 39 // DefaultLink is default network link 40 DefaultLink = "network" 41 // MaxConnections is the max number of network client connections 42 MaxConnections = 3 43 // MaxPeerErrors is the max number of peer errors before we remove it from network graph 44 MaxPeerErrors = 3 45 ) 46 47 var ( 48 // ErrClientNotFound is returned when client for tunnel channel could not be found 49 ErrClientNotFound = errors.New("client not found") 50 // ErrPeerLinkNotFound is returned when peer link could not be found in tunnel Links 51 ErrPeerLinkNotFound = errors.New("peer link not found") 52 // ErrPeerMaxExceeded is returned when peer has reached its max error count limit 53 ErrPeerMaxExceeded = errors.New("peer max errors exceeded") 54 ) 55 56 // network implements Network interface 57 type network struct { 58 // node is network node 59 *node 60 // options configure the network 61 options Options 62 // rtr is network router 63 router router.Router 64 // proxy is network proxy 65 proxy proxy.Proxy 66 // tunnel is network tunnel 67 tunnel tunnel.Tunnel 68 // server is network server 69 server server.Server 70 // client is network client 71 client client.Client 72 73 // tunClient is a map of tunnel channel clients 74 tunClient map[string]tunnel.Session 75 // peerLinks is a map of links for each peer 76 peerLinks map[string]tunnel.Link 77 78 sync.RWMutex 79 // connected marks the network as connected 80 connected bool 81 // closed closes the network 82 closed chan bool 83 // whether we've discovered by the network 84 discovered chan bool 85 } 86 87 // message is network message 88 type message struct { 89 // msg is transport message 90 msg *transport.Message 91 // session is tunnel session 92 session tunnel.Session 93 } 94 95 // newNetwork returns a new network node 96 func newNetwork(opts ...Option) Network { 97 // create default options 98 options := DefaultOptions() 99 // initialize network options 100 for _, o := range opts { 101 o(&options) 102 } 103 104 // set the address to a hashed address 105 hasher := fnv.New64() 106 hasher.Write([]byte(options.Address + options.Id)) 107 address := fmt.Sprintf("%d", hasher.Sum64()) 108 109 // set the address to advertise 110 var advertise string 111 var peerAddress string 112 113 if len(options.Advertise) > 0 { 114 advertise = options.Advertise 115 peerAddress = options.Advertise 116 } else { 117 advertise = options.Address 118 peerAddress = address 119 } 120 121 // init tunnel address to the network bind address 122 options.Tunnel.Init( 123 tunnel.Address(options.Address), 124 ) 125 126 // init router Id to the network id 127 options.Router.Init( 128 router.Id(options.Id), 129 router.Address(peerAddress), 130 ) 131 132 // create tunnel client with tunnel transport 133 tunTransport := tun.NewTransport( 134 tun.WithTunnel(options.Tunnel), 135 ) 136 137 // create the tunnel broker 138 tunBroker := bun.NewBroker( 139 bun.WithTunnel(options.Tunnel), 140 ) 141 142 // server is network server 143 server := smucp.NewServer( 144 server.Id(options.Id), 145 server.Address(peerAddress), 146 server.Advertise(advertise), 147 server.Name(options.Name), 148 server.Transport(tunTransport), 149 server.Broker(tunBroker), 150 ) 151 152 // client is network client 153 client := cmucp.NewClient( 154 client.Broker(tunBroker), 155 client.Transport(tunTransport), 156 client.Selector( 157 rtr.NewSelector( 158 rtr.WithRouter(options.Router), 159 ), 160 ), 161 ) 162 163 network := &network{ 164 node: &node{ 165 id: options.Id, 166 address: peerAddress, 167 peers: make(map[string]*node), 168 status: newStatus(), 169 }, 170 options: options, 171 router: options.Router, 172 proxy: options.Proxy, 173 tunnel: options.Tunnel, 174 server: server, 175 client: client, 176 tunClient: make(map[string]tunnel.Session), 177 peerLinks: make(map[string]tunnel.Link), 178 discovered: make(chan bool, 1), 179 } 180 181 network.node.network = network 182 183 return network 184 } 185 186 func (n *network) Init(opts ...Option) error { 187 n.Lock() 188 defer n.Unlock() 189 190 // TODO: maybe only allow reinit of certain opts 191 for _, o := range opts { 192 o(&n.options) 193 } 194 195 return nil 196 } 197 198 // Options returns network options 199 func (n *network) Options() Options { 200 n.RLock() 201 defer n.RUnlock() 202 203 options := n.options 204 205 return options 206 } 207 208 // Name returns network name 209 func (n *network) Name() string { 210 n.RLock() 211 defer n.RUnlock() 212 213 name := n.options.Name 214 215 return name 216 } 217 218 // acceptNetConn accepts connections from NetworkChannel 219 func (n *network) acceptNetConn(l tunnel.Listener, recv chan *message) { 220 var i int 221 for { 222 // accept a connection 223 conn, err := l.Accept() 224 if err != nil { 225 sleep := backoff.Do(i) 226 logger.Debugf("Network tunnel [%s] accept error: %v, backing off for %v", ControlChannel, err, sleep) 227 time.Sleep(sleep) 228 i++ 229 continue 230 } 231 232 select { 233 case <-n.closed: 234 if err := conn.Close(); err != nil { 235 logger.Debugf("Network tunnel [%s] failed to close connection: %v", NetworkChannel, err) 236 } 237 return 238 default: 239 // go handle NetworkChannel connection 240 go n.handleNetConn(conn, recv) 241 } 242 } 243 } 244 245 // acceptCtrlConn accepts connections from ControlChannel 246 func (n *network) acceptCtrlConn(l tunnel.Listener, recv chan *message) { 247 var i int 248 for { 249 // accept a connection 250 conn, err := l.Accept() 251 if err != nil { 252 sleep := backoff.Do(i) 253 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 254 logger.Debugf("Network tunnel [%s] accept error: %v, backing off for %v", ControlChannel, err, sleep) 255 } 256 time.Sleep(sleep) 257 i++ 258 continue 259 } 260 261 select { 262 case <-n.closed: 263 if err := conn.Close(); err != nil { 264 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 265 logger.Debugf("Network tunnel [%s] failed to close connection: %v", ControlChannel, err) 266 } 267 } 268 return 269 default: 270 // go handle ControlChannel connection 271 go n.handleCtrlConn(conn, recv) 272 } 273 } 274 } 275 276 // maskRoute will mask the route so that we apply the right values 277 func (n *network) maskRoute(r *pbRtr.Route) { 278 hasher := fnv.New64() 279 // the routes service address 280 address := r.Address 281 282 // only hash the address if we're advertising our own local routes 283 // avoid hashing * based routes 284 if r.Router == n.Id() && r.Address != "*" { 285 // hash the service before advertising it 286 hasher.Reset() 287 // routes for multiple instances of a service will be collapsed here. 288 // TODO: once we store labels in the table this may need to change 289 // to include the labels in case they differ but highly unlikely 290 hasher.Write([]byte(r.Service + n.Address())) 291 address = fmt.Sprintf("%d", hasher.Sum64()) 292 } 293 294 // calculate route metric to advertise 295 metric := n.getRouteMetric(r.Router, r.Gateway, r.Link) 296 297 // NOTE: we override Gateway, Link and Address here 298 r.Address = address 299 r.Gateway = n.Address() 300 r.Link = DefaultLink 301 r.Metric = metric 302 } 303 304 // advertise advertises routes to the network 305 func (n *network) advertise(advertChan <-chan *router.Advert) { 306 rnd := rand.New(rand.NewSource(time.Now().UnixNano())) 307 for { 308 select { 309 // process local adverts and randomly fire them at other nodes 310 case advert := <-advertChan: 311 // create a proto advert 312 var events []*pbRtr.Event 313 314 for _, event := range advert.Events { 315 // make a copy of the route 316 route := &pbRtr.Route{ 317 Service: event.Route.Service, 318 Address: event.Route.Address, 319 Gateway: event.Route.Gateway, 320 Network: event.Route.Network, 321 Router: event.Route.Router, 322 Link: event.Route.Link, 323 Metric: event.Route.Metric, 324 } 325 326 // override the various values 327 n.maskRoute(route) 328 329 e := &pbRtr.Event{ 330 Type: pbRtr.EventType(event.Type), 331 Timestamp: event.Timestamp.UnixNano(), 332 Route: route, 333 } 334 335 events = append(events, e) 336 } 337 338 msg := &pbRtr.Advert{ 339 Id: advert.Id, 340 Type: pbRtr.AdvertType(advert.Type), 341 Timestamp: advert.Timestamp.UnixNano(), 342 Events: events, 343 } 344 345 // get a list of node peers 346 peers := n.Peers() 347 348 // continue if there is no one to send to 349 if len(peers) == 0 { 350 continue 351 } 352 353 // advertise to max 3 peers 354 max := len(peers) 355 if max > 3 { 356 max = 3 357 } 358 359 for i := 0; i < max; i++ { 360 if peer := n.node.GetPeerNode(peers[rnd.Intn(len(peers))].Id()); peer != nil { 361 if err := n.sendTo("advert", ControlChannel, peer, msg); err != nil { 362 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 363 logger.Debugf("Network failed to advertise routes to %s: %v", peer.Id(), err) 364 } 365 } 366 } 367 } 368 case <-n.closed: 369 return 370 } 371 } 372 } 373 374 // initNodes initializes tunnel with a list of resolved nodes 375 func (n *network) initNodes(startup bool) { 376 nodes, err := n.resolveNodes() 377 // NOTE: this condition never fires 378 // as resolveNodes() never returns error 379 if err != nil && !startup { 380 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 381 logger.Debugf("Network failed to init nodes: %v", err) 382 } 383 return 384 } 385 386 // strip self 387 var init []string 388 389 // our current address 390 advertised := n.server.Options().Advertise 391 392 for _, node := range nodes { 393 // skip self 394 if node == advertised { 395 continue 396 } 397 // add the node 398 init = append(init, node) 399 } 400 401 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 402 // initialize the tunnel 403 logger.Tracef("Network initialising nodes %+v\n", init) 404 } 405 406 n.tunnel.Init( 407 tunnel.Nodes(nodes...), 408 ) 409 } 410 411 // resolveNodes resolves network nodes to addresses 412 func (n *network) resolveNodes() ([]string, error) { 413 // resolve the network address to network nodes 414 records, err := n.options.Resolver.Resolve(n.options.Name) 415 if err != nil { 416 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 417 logger.Debugf("Network failed to resolve nodes: %v", err) 418 } 419 } 420 421 // sort by lowest priority 422 if err == nil { 423 sort.Slice(records, func(i, j int) bool { return records[i].Priority < records[j].Priority }) 424 } 425 426 // keep processing 427 428 nodeMap := make(map[string]bool) 429 430 // collect network node addresses 431 //nolint:prealloc 432 var nodes []string 433 var i int 434 435 for _, record := range records { 436 if _, ok := nodeMap[record.Address]; ok { 437 continue 438 } 439 440 nodeMap[record.Address] = true 441 nodes = append(nodes, record.Address) 442 443 i++ 444 445 // break once MaxConnection nodes has been reached 446 if i == MaxConnections { 447 break 448 } 449 } 450 451 // use the DNS resolver to expand peers 452 dns := &dns.Resolver{} 453 454 // append seed nodes if we have them 455 for _, node := range n.options.Nodes { 456 // resolve anything that looks like a host name 457 records, err := dns.Resolve(node) 458 if err != nil { 459 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 460 logger.Debugf("Failed to resolve %v %v", node, err) 461 } 462 continue 463 } 464 465 // add to the node map 466 for _, record := range records { 467 if _, ok := nodeMap[record.Address]; !ok { 468 nodes = append(nodes, record.Address) 469 } 470 } 471 } 472 473 return nodes, nil 474 } 475 476 // handleNetConn handles network announcement messages 477 func (n *network) handleNetConn(s tunnel.Session, msg chan *message) { 478 for { 479 m := new(transport.Message) 480 if err := s.Recv(m); err != nil { 481 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 482 logger.Debugf("Network tunnel [%s] receive error: %v", NetworkChannel, err) 483 } 484 switch err { 485 case io.EOF, tunnel.ErrReadTimeout: 486 s.Close() 487 return 488 } 489 continue 490 } 491 492 // check if peer is set 493 peer := m.Header["Micro-Peer"] 494 495 // check who the message is intended for 496 if len(peer) > 0 && peer != n.options.Id { 497 continue 498 } 499 500 select { 501 case msg <- &message{ 502 msg: m, 503 session: s, 504 }: 505 case <-n.closed: 506 return 507 } 508 } 509 } 510 511 // handleCtrlConn handles ControlChannel connections 512 func (n *network) handleCtrlConn(s tunnel.Session, msg chan *message) { 513 for { 514 m := new(transport.Message) 515 if err := s.Recv(m); err != nil { 516 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 517 logger.Debugf("Network tunnel [%s] receive error: %v", ControlChannel, err) 518 } 519 switch err { 520 case io.EOF, tunnel.ErrReadTimeout: 521 s.Close() 522 return 523 } 524 continue 525 } 526 527 // check if peer is set 528 peer := m.Header["Micro-Peer"] 529 530 // check who the message is intended for 531 if len(peer) > 0 && peer != n.options.Id { 532 continue 533 } 534 535 select { 536 case msg <- &message{ 537 msg: m, 538 session: s, 539 }: 540 case <-n.closed: 541 return 542 } 543 } 544 } 545 546 // getHopCount queries network graph and returns hop count for given router 547 // NOTE: this should be called getHopeMetric 548 // - Routes for local services have hop count 1 549 // - Routes with ID of adjacent nodes have hop count 10 550 // - Routes by peers of the advertiser have hop count 100 551 // - Routes beyond node neighbourhood have hop count 1000 552 func (n *network) getHopCount(rtr string) int { 553 // make sure node.peers are not modified 554 n.node.RLock() 555 defer n.node.RUnlock() 556 557 // we are the origin of the route 558 if rtr == n.options.Id { 559 return 1 560 } 561 562 // the route origin is our peer 563 if _, ok := n.node.peers[rtr]; ok { 564 return 10 565 } 566 567 // the route origin is the peer of our peer 568 for _, peer := range n.node.peers { 569 for id := range peer.peers { 570 if rtr == id { 571 return 100 572 } 573 } 574 } 575 // otherwise we are three hops away 576 return 1000 577 } 578 579 // getRouteMetric calculates router metric and returns it 580 // Route metric is calculated based on link status and route hopd count 581 func (n *network) getRouteMetric(router string, gateway string, link string) int64 { 582 // set the route metric 583 n.RLock() 584 defer n.RUnlock() 585 586 // local links are marked as 1 587 if link == "local" && gateway == "" { 588 return 1 589 } 590 591 // local links from other gateways as 2 592 if link == "local" && gateway != "" { 593 return 2 594 } 595 596 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 597 logger.Tracef("Network looking up %s link to gateway: %s", link, gateway) 598 } 599 // attempt to find link based on gateway address 600 lnk, ok := n.peerLinks[gateway] 601 if !ok { 602 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 603 logger.Debugf("Network failed to find a link to gateway: %s", gateway) 604 } 605 // no link found so infinite metric returned 606 return math.MaxInt64 607 } 608 609 // calculating metric 610 611 delay := lnk.Delay() 612 hops := n.getHopCount(router) 613 length := lnk.Length() 614 615 // make sure delay is non-zero 616 if delay == 0 { 617 delay = 1 618 } 619 620 // make sure length is non-zero 621 if length == 0 { 622 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 623 logger.Debugf("Link length is 0 %v %v", link, lnk.Length()) 624 } 625 length = 10e9 626 } 627 628 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 629 logger.Tracef("Network calculated metric %v delay %v length %v distance %v", (delay*length*int64(hops))/10e6, delay, length, hops) 630 } 631 632 return (delay * length * int64(hops)) / 10e6 633 } 634 635 // processCtrlChan processes messages received on ControlChannel 636 func (n *network) processCtrlChan(listener tunnel.Listener) { 637 defer listener.Close() 638 639 // receive control message queue 640 recv := make(chan *message, 128) 641 642 // accept ControlChannel cconnections 643 go n.acceptCtrlConn(listener, recv) 644 645 for { 646 select { 647 case m := <-recv: 648 // switch on type of message and take action 649 switch m.msg.Header["Micro-Method"] { 650 case "advert": 651 pbRtrAdvert := &pbRtr.Advert{} 652 653 if err := proto.Unmarshal(m.msg.Body, pbRtrAdvert); err != nil { 654 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 655 logger.Debugf("Network fail to unmarshal advert message: %v", err) 656 } 657 continue 658 } 659 660 // don't process your own messages 661 if pbRtrAdvert.Id == n.options.Id { 662 continue 663 } 664 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 665 logger.Debugf("Network received advert message from: %s", pbRtrAdvert.Id) 666 } 667 668 // loookup advertising node in our peer topology 669 advertNode := n.node.GetPeerNode(pbRtrAdvert.Id) 670 if advertNode == nil { 671 // if we can't find the node in our topology (MaxDepth) we skipp prcessing adverts 672 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 673 logger.Debugf("Network skipping advert message from unknown peer: %s", pbRtrAdvert.Id) 674 } 675 continue 676 } 677 678 var events []*router.Event 679 680 for _, event := range pbRtrAdvert.Events { 681 // for backwards compatibility reasons 682 if event == nil || event.Route == nil { 683 continue 684 } 685 686 // we know the advertising node is not the origin of the route 687 if pbRtrAdvert.Id != event.Route.Router { 688 // if the origin router is not the advertising node peer 689 // we can't rule out potential routing loops so we bail here 690 if peer := advertNode.GetPeerNode(event.Route.Router); peer == nil { 691 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 692 logger.Debugf("Network skipping advert message from peer: %s", pbRtrAdvert.Id) 693 } 694 continue 695 } 696 } 697 698 route := router.Route{ 699 Service: event.Route.Service, 700 Address: event.Route.Address, 701 Gateway: event.Route.Gateway, 702 Network: event.Route.Network, 703 Router: event.Route.Router, 704 Link: event.Route.Link, 705 Metric: event.Route.Metric, 706 } 707 708 // calculate route metric and add to the advertised metric 709 // we need to make sure we do not overflow math.MaxInt64 710 metric := n.getRouteMetric(event.Route.Router, event.Route.Gateway, event.Route.Link) 711 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 712 logger.Tracef("Network metric for router %s and gateway %s: %v", event.Route.Router, event.Route.Gateway, metric) 713 } 714 715 // check we don't overflow max int 64 716 if d := route.Metric + metric; d <= 0 { 717 // set to max int64 if we overflow 718 route.Metric = math.MaxInt64 719 } else { 720 // set the combined value of metrics otherwise 721 route.Metric = d 722 } 723 724 // create router event 725 e := &router.Event{ 726 Type: router.EventType(event.Type), 727 Timestamp: time.Unix(0, pbRtrAdvert.Timestamp), 728 Route: route, 729 } 730 events = append(events, e) 731 } 732 733 // if no events are eligible for processing continue 734 if len(events) == 0 { 735 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 736 logger.Tracef("Network no events to be processed by router: %s", n.options.Id) 737 } 738 continue 739 } 740 741 // create an advert and process it 742 advert := &router.Advert{ 743 Id: pbRtrAdvert.Id, 744 Type: router.AdvertType(pbRtrAdvert.Type), 745 Timestamp: time.Unix(0, pbRtrAdvert.Timestamp), 746 TTL: time.Duration(pbRtrAdvert.Ttl), 747 Events: events, 748 } 749 750 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 751 logger.Tracef("Network router %s processing advert: %s", n.Id(), advert.Id) 752 } 753 if err := n.router.Process(advert); err != nil { 754 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 755 logger.Debugf("Network failed to process advert %s: %v", advert.Id, err) 756 } 757 } 758 } 759 case <-n.closed: 760 return 761 } 762 } 763 } 764 765 // processNetChan processes messages received on NetworkChannel 766 func (n *network) processNetChan(listener tunnel.Listener) { 767 defer listener.Close() 768 769 // receive network message queue 770 recv := make(chan *message, 128) 771 772 // accept NetworkChannel connections 773 go n.acceptNetConn(listener, recv) 774 775 for { 776 select { 777 case m := <-recv: 778 // switch on type of message and take action 779 switch m.msg.Header["Micro-Method"] { 780 case "connect": 781 // mark the time the message has been received 782 now := time.Now() 783 784 pbNetConnect := &pbNet.Connect{} 785 if err := proto.Unmarshal(m.msg.Body, pbNetConnect); err != nil { 786 logger.Debugf("Network tunnel [%s] connect unmarshal error: %v", NetworkChannel, err) 787 continue 788 } 789 790 // don't process your own messages 791 if pbNetConnect.Node.Id == n.options.Id { 792 continue 793 } 794 795 logger.Debugf("Network received connect message from: %s", pbNetConnect.Node.Id) 796 797 peer := &node{ 798 id: pbNetConnect.Node.Id, 799 address: pbNetConnect.Node.Address, 800 link: m.msg.Header["Micro-Link"], 801 peers: make(map[string]*node), 802 status: newStatus(), 803 lastSeen: now, 804 } 805 806 // update peer links 807 808 // TODO: should we do this only if we manage to add a peer 809 // What should we do if the peer links failed to be updated? 810 if err := n.updatePeerLinks(peer); err != nil { 811 logger.Debugf("Network failed updating peer links: %s", err) 812 } 813 814 // add peer to the list of node peers 815 if err := n.AddPeer(peer); err == ErrPeerExists { 816 logger.Tracef("Network peer exists, refreshing: %s", peer.id) 817 // update lastSeen time for the peer 818 if err := n.RefreshPeer(peer.id, peer.link, now); err != nil { 819 logger.Debugf("Network failed refreshing peer %s: %v", peer.id, err) 820 } 821 } 822 823 // we send the sync message because someone has sent connect 824 // and wants to either connect or reconnect to the network 825 // The faster it gets the network config (routes and peer graph) 826 // the faster the network converges to a stable state 827 828 go func() { 829 // get node peer graph to send back to the connecting node 830 node := PeersToProto(n.node, MaxDepth) 831 832 msg := &pbNet.Sync{ 833 Peer: node, 834 } 835 836 // get a list of the best routes for each service in our routing table 837 routes, err := n.getProtoRoutes() 838 if err != nil { 839 logger.Debugf("Network node %s failed listing routes: %v", n.id, err) 840 } 841 // attached the routes to the message 842 msg.Routes = routes 843 844 // send sync message to the newly connected peer 845 if err := n.sendTo("sync", NetworkChannel, peer, msg); err != nil { 846 logger.Debugf("Network failed to send sync message: %v", err) 847 } 848 }() 849 case "peer": 850 // mark the time the message has been received 851 now := time.Now() 852 pbNetPeer := &pbNet.Peer{} 853 854 if err := proto.Unmarshal(m.msg.Body, pbNetPeer); err != nil { 855 logger.Debugf("Network tunnel [%s] peer unmarshal error: %v", NetworkChannel, err) 856 continue 857 } 858 859 // don't process your own messages 860 if pbNetPeer.Node.Id == n.options.Id { 861 continue 862 } 863 864 logger.Debugf("Network received peer message from: %s %s", pbNetPeer.Node.Id, pbNetPeer.Node.Address) 865 866 peer := &node{ 867 id: pbNetPeer.Node.Id, 868 address: pbNetPeer.Node.Address, 869 link: m.msg.Header["Micro-Link"], 870 peers: make(map[string]*node), 871 status: newPeerStatus(pbNetPeer), 872 lastSeen: now, 873 } 874 875 // update peer links 876 877 // TODO: should we do this only if we manage to add a peer 878 // What should we do if the peer links failed to be updated? 879 if err := n.updatePeerLinks(peer); err != nil { 880 logger.Debugf("Network failed updating peer links: %s", err) 881 } 882 883 // if it's a new peer i.e. we do not have it in our graph, we request full sync 884 if err := n.node.AddPeer(peer); err == nil { 885 go func() { 886 // marshal node graph into protobuf 887 node := PeersToProto(n.node, MaxDepth) 888 889 msg := &pbNet.Sync{ 890 Peer: node, 891 } 892 893 // get a list of the best routes for each service in our routing table 894 routes, err := n.getProtoRoutes() 895 if err != nil { 896 logger.Debugf("Network node %s failed listing routes: %v", n.id, err) 897 } 898 // attached the routes to the message 899 msg.Routes = routes 900 901 // send sync message to the newly connected peer 902 if err := n.sendTo("sync", NetworkChannel, peer, msg); err != nil { 903 logger.Debugf("Network failed to send sync message: %v", err) 904 } 905 }() 906 907 continue 908 // if we already have the peer in our graph, skip further steps 909 } else if err != ErrPeerExists { 910 logger.Debugf("Network got error adding peer %v", err) 911 continue 912 } 913 914 logger.Tracef("Network peer exists, refreshing: %s", pbNetPeer.Node.Id) 915 916 // update lastSeen time for the peer 917 if err := n.RefreshPeer(peer.id, peer.link, now); err != nil { 918 logger.Debugf("Network failed refreshing peer %s: %v", pbNetPeer.Node.Id, err) 919 } 920 921 // NOTE: we don't unpack MaxDepth toplogy 922 peer = UnpackPeerTopology(pbNetPeer, now, MaxDepth-1) 923 // update the link 924 peer.link = m.msg.Header["Micro-Link"] 925 926 logger.Tracef("Network updating topology of node: %s", n.node.id) 927 if err := n.node.UpdatePeer(peer); err != nil { 928 logger.Debugf("Network failed to update peers: %v", err) 929 } 930 931 // tell the connect loop that we've been discovered 932 // so it stops sending connect messages out 933 select { 934 case n.discovered <- true: 935 default: 936 // don't block here 937 } 938 case "sync": 939 // record the timestamp of the message receipt 940 now := time.Now() 941 942 pbNetSync := &pbNet.Sync{} 943 if err := proto.Unmarshal(m.msg.Body, pbNetSync); err != nil { 944 logger.Debugf("Network tunnel [%s] sync unmarshal error: %v", NetworkChannel, err) 945 continue 946 } 947 948 // don't process your own messages 949 if pbNetSync.Peer.Node.Id == n.options.Id { 950 continue 951 } 952 953 logger.Debugf("Network received sync message from: %s", pbNetSync.Peer.Node.Id) 954 955 peer := &node{ 956 id: pbNetSync.Peer.Node.Id, 957 address: pbNetSync.Peer.Node.Address, 958 link: m.msg.Header["Micro-Link"], 959 peers: make(map[string]*node), 960 status: newPeerStatus(pbNetSync.Peer), 961 lastSeen: now, 962 } 963 964 // update peer links 965 966 // TODO: should we do this only if we manage to add a peer 967 // What should we do if the peer links failed to be updated? 968 if err := n.updatePeerLinks(peer); err != nil { 969 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 970 logger.Debugf("Network failed updating peer links: %s", err) 971 } 972 } 973 974 // add peer to the list of node peers 975 if err := n.node.AddPeer(peer); err == ErrPeerExists { 976 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 977 logger.Tracef("Network peer exists, refreshing: %s", peer.id) 978 } 979 // update lastSeen time for the existing node 980 if err := n.RefreshPeer(peer.id, peer.link, now); err != nil { 981 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 982 logger.Debugf("Network failed refreshing peer %s: %v", peer.id, err) 983 } 984 } 985 } 986 987 // when we receive a sync message we update our routing table 988 // and send a peer message back to the network to announce our presence 989 990 // add all the routes we have received in the sync message 991 for _, pbRoute := range pbNetSync.Routes { 992 // unmarshal the routes received from remote peer 993 route := pbUtil.ProtoToRoute(pbRoute) 994 // continue if we are the originator of the route 995 if route.Router == n.router.Options().Id { 996 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 997 logger.Debugf("Network node %s skipping route addition: route already present", n.id) 998 } 999 continue 1000 } 1001 1002 metric := n.getRouteMetric(route.Router, route.Gateway, route.Link) 1003 // check we don't overflow max int 64 1004 if d := route.Metric + metric; d <= 0 { 1005 // set to max int64 if we overflow 1006 route.Metric = math.MaxInt64 1007 } else { 1008 // set the combined value of metrics otherwise 1009 route.Metric = d 1010 } 1011 1012 ///////////////////////////////////////////////////////////////////// 1013 // maybe we should not be this clever ¯\_(ツ)_/¯ // 1014 ///////////////////////////////////////////////////////////////////// 1015 // lookup best routes for the services in the just received route 1016 q := []router.QueryOption{ 1017 router.QueryService(route.Service), 1018 router.QueryStrategy(n.router.Options().Advertise), 1019 } 1020 1021 routes, err := n.router.Table().Query(q...) 1022 if err != nil && err != router.ErrRouteNotFound { 1023 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1024 logger.Debugf("Network node %s failed listing best routes for %s: %v", n.id, route.Service, err) 1025 } 1026 continue 1027 } 1028 1029 // we found no routes for the given service 1030 // create the new route we have just received 1031 if len(routes) == 0 { 1032 if err := n.router.Table().Create(route); err != nil && err != router.ErrDuplicateRoute { 1033 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1034 logger.Debugf("Network node %s failed to add route: %v", n.id, err) 1035 } 1036 } 1037 continue 1038 } 1039 1040 // find the best route for the given service 1041 // from the routes that we would advertise 1042 bestRoute := routes[0] 1043 for _, r := range routes[0:] { 1044 if bestRoute.Metric > r.Metric { 1045 bestRoute = r 1046 } 1047 } 1048 1049 // Take the best route to given service and: 1050 // only add new routes if the metric is better 1051 // than the metric of our best route 1052 1053 if bestRoute.Metric <= route.Metric { 1054 continue 1055 } 1056 /////////////////////////////////////////////////////////////////////// 1057 /////////////////////////////////////////////////////////////////////// 1058 1059 // add route to the routing table 1060 if err := n.router.Table().Create(route); err != nil && err != router.ErrDuplicateRoute { 1061 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1062 logger.Debugf("Network node %s failed to add route: %v", n.id, err) 1063 } 1064 } 1065 } 1066 1067 // update your sync timestamp 1068 // NOTE: this might go away as we will be doing full table advert to random peer 1069 if err := n.RefreshSync(now); err != nil { 1070 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1071 logger.Debugf("Network failed refreshing sync time: %v", err) 1072 } 1073 } 1074 1075 go func() { 1076 // get node peer graph to send back to the syncing node 1077 msg := PeersToProto(n.node, MaxDepth) 1078 1079 // advertise yourself to the new node 1080 if err := n.sendTo("peer", NetworkChannel, peer, msg); err != nil { 1081 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1082 logger.Debugf("Network failed to advertise peers: %v", err) 1083 } 1084 } 1085 }() 1086 case "close": 1087 pbNetClose := &pbNet.Close{} 1088 if err := proto.Unmarshal(m.msg.Body, pbNetClose); err != nil { 1089 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1090 logger.Debugf("Network tunnel [%s] close unmarshal error: %v", NetworkChannel, err) 1091 } 1092 continue 1093 } 1094 1095 // don't process your own messages 1096 if pbNetClose.Node.Id == n.options.Id { 1097 continue 1098 } 1099 1100 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1101 logger.Debugf("Network received close message from: %s", pbNetClose.Node.Id) 1102 } 1103 1104 peer := &node{ 1105 id: pbNetClose.Node.Id, 1106 address: pbNetClose.Node.Address, 1107 } 1108 1109 if err := n.DeletePeerNode(peer.id); err != nil { 1110 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1111 logger.Debugf("Network failed to delete node %s routes: %v", peer.id, err) 1112 } 1113 } 1114 1115 if err := n.prunePeerRoutes(peer); err != nil { 1116 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1117 logger.Debugf("Network failed pruning peer %s routes: %v", peer.id, err) 1118 } 1119 } 1120 1121 // NOTE: we should maybe advertise this to the network so we converge faster on closed nodes 1122 // as opposed to our waiting until the node eventually gets pruned; something to think about 1123 1124 // delete peer from the peerLinks 1125 n.Lock() 1126 delete(n.peerLinks, pbNetClose.Node.Address) 1127 n.Unlock() 1128 } 1129 case <-n.closed: 1130 return 1131 } 1132 } 1133 } 1134 1135 // pruneRoutes prunes routes return by given query 1136 func (n *network) pruneRoutes(q ...router.QueryOption) error { 1137 routes, err := n.router.Table().Query(q...) 1138 if err != nil && err != router.ErrRouteNotFound { 1139 return err 1140 } 1141 1142 for _, route := range routes { 1143 if err := n.router.Table().Delete(route); err != nil && err != router.ErrRouteNotFound { 1144 return err 1145 } 1146 } 1147 1148 return nil 1149 } 1150 1151 // pruneNodeRoutes prunes routes that were either originated by or routable via given node 1152 func (n *network) prunePeerRoutes(peer *node) error { 1153 // lookup all routes originated by router 1154 q := []router.QueryOption{ 1155 router.QueryRouter(peer.id), 1156 } 1157 if err := n.pruneRoutes(q...); err != nil { 1158 return err 1159 } 1160 1161 // lookup all routes routable via gw 1162 q = []router.QueryOption{ 1163 router.QueryGateway(peer.address), 1164 } 1165 if err := n.pruneRoutes(q...); err != nil { 1166 return err 1167 } 1168 1169 return nil 1170 } 1171 1172 // manage the process of announcing to peers and prune any peer nodes that have not been 1173 // seen for a period of time. Also removes all the routes either originated by or routable 1174 // by the stale nodes. it also resolves nodes periodically and adds them to the tunnel 1175 func (n *network) manage() { 1176 rnd := rand.New(rand.NewSource(time.Now().UnixNano())) 1177 announce := time.NewTicker(AnnounceTime) 1178 defer announce.Stop() 1179 prune := time.NewTicker(PruneTime) 1180 defer prune.Stop() 1181 resolve := time.NewTicker(ResolveTime) 1182 defer resolve.Stop() 1183 netsync := time.NewTicker(SyncTime) 1184 defer netsync.Stop() 1185 1186 // list of links we've sent to 1187 links := make(map[string]time.Time) 1188 1189 for { 1190 select { 1191 case <-n.closed: 1192 return 1193 case <-announce.C: 1194 current := make(map[string]time.Time) 1195 1196 // build link map of current links 1197 for _, link := range n.tunnel.Links() { 1198 if n.isLoopback(link) { 1199 continue 1200 } 1201 // get an existing timestamp if it exists 1202 current[link.Id()] = links[link.Id()] 1203 } 1204 1205 // replace link map 1206 // we do this because a growing map is not 1207 // garbage collected 1208 links = current 1209 1210 n.RLock() 1211 var i int 1212 // create a list of peers to send to 1213 var peers []*node 1214 1215 // check peers to see if they need to be sent to 1216 for _, peer := range n.peers { 1217 if i >= 3 { 1218 break 1219 } 1220 1221 // get last sent 1222 lastSent := links[peer.link] 1223 1224 // check when we last sent to the peer 1225 // and send a peer message if we havent 1226 if lastSent.IsZero() || time.Since(lastSent) > KeepAliveTime { 1227 link := peer.link 1228 id := peer.id 1229 1230 // might not exist for some weird reason 1231 if len(link) == 0 { 1232 // set the link via peer links 1233 l, ok := n.peerLinks[peer.address] 1234 if ok { 1235 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1236 logger.Debugf("Network link not found for peer %s cannot announce", peer.id) 1237 } 1238 continue 1239 } 1240 link = l.Id() 1241 } 1242 1243 // add to the list of peers we're going to send to 1244 peers = append(peers, &node{ 1245 id: id, 1246 link: link, 1247 }) 1248 1249 // increment our count 1250 i++ 1251 } 1252 } 1253 1254 n.RUnlock() 1255 1256 // peers to proto 1257 msg := PeersToProto(n.node, MaxDepth) 1258 1259 // we're only going to send to max 3 peers at any given tick 1260 for _, peer := range peers { 1261 // advertise yourself to the network 1262 if err := n.sendTo("peer", NetworkChannel, peer, msg); err != nil { 1263 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1264 logger.Debugf("Network failed to advertise peer %s: %v", peer.id, err) 1265 } 1266 continue 1267 } 1268 1269 // update last sent time 1270 links[peer.link] = time.Now() 1271 } 1272 1273 // now look at links we may not have sent to. this may occur 1274 // where a connect message was lost 1275 for link, lastSent := range links { 1276 if !lastSent.IsZero() || time.Since(lastSent) < KeepAliveTime { 1277 continue 1278 } 1279 1280 peer := &node{ 1281 // unknown id of the peer 1282 link: link, 1283 } 1284 1285 // unknown link and peer so lets do the connect flow 1286 if err := n.sendTo("connect", NetworkChannel, peer, msg); err != nil { 1287 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1288 logger.Debugf("Network failed to connect %s: %v", peer.id, err) 1289 } 1290 continue 1291 } 1292 1293 links[peer.link] = time.Now() 1294 } 1295 case <-prune.C: 1296 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1297 logger.Debugf("Network node %s pruning stale peers", n.id) 1298 } 1299 pruned := n.PruneStalePeers(PruneTime) 1300 1301 for id, peer := range pruned { 1302 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1303 logger.Debugf("Network peer exceeded prune time: %s", id) 1304 } 1305 n.Lock() 1306 delete(n.peerLinks, peer.address) 1307 n.Unlock() 1308 1309 if err := n.prunePeerRoutes(peer); err != nil { 1310 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1311 logger.Debugf("Network failed pruning peer %s routes: %v", id, err) 1312 } 1313 } 1314 } 1315 1316 // get a list of all routes 1317 routes, err := n.options.Router.Table().List() 1318 if err != nil { 1319 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1320 logger.Debugf("Network failed listing routes when pruning peers: %v", err) 1321 } 1322 continue 1323 } 1324 1325 // collect all the router IDs in the routing table 1326 routers := make(map[string]bool) 1327 1328 for _, route := range routes { 1329 // check if its been processed 1330 if _, ok := routers[route.Router]; ok { 1331 continue 1332 } 1333 1334 // mark as processed 1335 routers[route.Router] = true 1336 1337 // if the router is in our peer graph do NOT delete routes originated by it 1338 if peer := n.node.GetPeerNode(route.Router); peer != nil { 1339 continue 1340 } 1341 // otherwise delete all the routes originated by it 1342 if err := n.pruneRoutes(router.QueryRouter(route.Router)); err != nil { 1343 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1344 logger.Debugf("Network failed deleting routes by %s: %v", route.Router, err) 1345 } 1346 } 1347 } 1348 case <-netsync.C: 1349 // get a list of node peers 1350 peers := n.Peers() 1351 1352 // skip when there are no peers 1353 if len(peers) == 0 { 1354 continue 1355 } 1356 1357 // pick a random peer from the list of peers and request full sync 1358 peer := n.node.GetPeerNode(peers[rnd.Intn(len(peers))].Id()) 1359 // skip if we can't find randmly selected peer 1360 if peer == nil { 1361 continue 1362 } 1363 1364 go func() { 1365 // get node peer graph to send back to the connecting node 1366 node := PeersToProto(n.node, MaxDepth) 1367 1368 msg := &pbNet.Sync{ 1369 Peer: node, 1370 } 1371 1372 // get a list of the best routes for each service in our routing table 1373 routes, err := n.getProtoRoutes() 1374 if err != nil { 1375 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1376 logger.Debugf("Network node %s failed listing routes: %v", n.id, err) 1377 } 1378 } 1379 // attached the routes to the message 1380 msg.Routes = routes 1381 1382 // send sync message to the newly connected peer 1383 if err := n.sendTo("sync", NetworkChannel, peer, msg); err != nil { 1384 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1385 logger.Debugf("Network failed to send sync message: %v", err) 1386 } 1387 } 1388 }() 1389 case <-resolve.C: 1390 n.initNodes(false) 1391 } 1392 } 1393 } 1394 1395 // getAdvertProtoRoutes returns a list of routes to advertise to remote peer 1396 // based on the advertisement strategy encoded in protobuf 1397 // It returns error if the routes failed to be retrieved from the routing table 1398 func (n *network) getProtoRoutes() ([]*pbRtr.Route, error) { 1399 // get a list of the best routes for each service in our routing table 1400 q := []router.QueryOption{ 1401 router.QueryStrategy(n.router.Options().Advertise), 1402 } 1403 1404 routes, err := n.router.Table().Query(q...) 1405 if err != nil && err != router.ErrRouteNotFound { 1406 return nil, err 1407 } 1408 1409 // encode the routes to protobuf 1410 pbRoutes := make([]*pbRtr.Route, 0, len(routes)) 1411 for _, route := range routes { 1412 // generate new route proto 1413 pbRoute := pbUtil.RouteToProto(route) 1414 // mask the route before outbounding 1415 n.maskRoute(pbRoute) 1416 // add to list of routes 1417 pbRoutes = append(pbRoutes, pbRoute) 1418 } 1419 1420 return pbRoutes, nil 1421 } 1422 1423 func (n *network) sendConnect() { 1424 // send connect message to NetworkChannel 1425 // NOTE: in theory we could do this as soon as 1426 // Dial to NetworkChannel succeeds, but instead 1427 // we initialize all other node resources first 1428 msg := &pbNet.Connect{ 1429 Node: &pbNet.Node{ 1430 Id: n.node.id, 1431 Address: n.node.address, 1432 }, 1433 } 1434 1435 if err := n.sendMsg("connect", NetworkChannel, msg); err != nil { 1436 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1437 logger.Debugf("Network failed to send connect message: %s", err) 1438 } 1439 } 1440 } 1441 1442 // sendTo sends a message to a specific node as a one off. 1443 // we need this because when links die, we have no discovery info, 1444 // and sending to an existing multicast link doesn't immediately work 1445 func (n *network) sendTo(method, channel string, peer *node, msg proto.Message) error { 1446 body, err := proto.Marshal(msg) 1447 if err != nil { 1448 return err 1449 } 1450 1451 // Create a unicast connection to the peer but don't do the open/accept flow 1452 c, err := n.tunnel.Dial(channel, tunnel.DialWait(false), tunnel.DialLink(peer.link)) 1453 if err != nil { 1454 if peerNode := n.GetPeerNode(peer.id); peerNode != nil { 1455 // update node status when error happens 1456 peerNode.status.err.Update(err) 1457 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1458 logger.Debugf("Network increment peer %v error count to: %d", peerNode, peerNode, peerNode.status.Error().Count()) 1459 } 1460 if count := peerNode.status.Error().Count(); count == MaxPeerErrors { 1461 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1462 logger.Debugf("Network peer %v error count exceeded %d. Prunning.", peerNode, MaxPeerErrors) 1463 } 1464 n.PrunePeer(peerNode.id) 1465 } 1466 } 1467 return err 1468 } 1469 defer c.Close() 1470 1471 id := peer.id 1472 1473 if len(id) == 0 { 1474 id = peer.link 1475 } 1476 1477 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1478 logger.Debugf("Network sending %s message from: %s to %s", method, n.options.Id, id) 1479 } 1480 tmsg := &transport.Message{ 1481 Header: map[string]string{ 1482 "Micro-Method": method, 1483 }, 1484 Body: body, 1485 } 1486 1487 // setting the peer header 1488 if len(peer.id) > 0 { 1489 tmsg.Header["Micro-Peer"] = peer.id 1490 } 1491 1492 if err := c.Send(tmsg); err != nil { 1493 // TODO: Lookup peer in our graph 1494 if peerNode := n.GetPeerNode(peer.id); peerNode != nil { 1495 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1496 logger.Debugf("Network found peer %s: %v", peer.id, peerNode) 1497 } 1498 // update node status when error happens 1499 peerNode.status.err.Update(err) 1500 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1501 logger.Debugf("Network increment node peer %p %v count to: %d", peerNode, peerNode, peerNode.status.Error().Count()) 1502 } 1503 if count := peerNode.status.Error().Count(); count == MaxPeerErrors { 1504 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1505 logger.Debugf("Network node peer %v count exceeded %d: %d", peerNode, MaxPeerErrors, peerNode.status.Error().Count()) 1506 } 1507 n.PrunePeer(peerNode.id) 1508 } 1509 } 1510 return err 1511 } 1512 1513 return nil 1514 } 1515 1516 // sendMsg sends a message to the tunnel channel 1517 func (n *network) sendMsg(method, channel string, msg proto.Message) error { 1518 body, err := proto.Marshal(msg) 1519 if err != nil { 1520 return err 1521 } 1522 1523 // check if the channel client is initialized 1524 n.RLock() 1525 client, ok := n.tunClient[channel] 1526 if !ok || client == nil { 1527 n.RUnlock() 1528 return ErrClientNotFound 1529 } 1530 n.RUnlock() 1531 1532 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1533 logger.Debugf("Network sending %s message from: %s", method, n.options.Id) 1534 } 1535 1536 return client.Send(&transport.Message{ 1537 Header: map[string]string{ 1538 "Micro-Method": method, 1539 }, 1540 Body: body, 1541 }) 1542 } 1543 1544 // updatePeerLinks updates link for a given peer 1545 func (n *network) updatePeerLinks(peer *node) error { 1546 n.Lock() 1547 defer n.Unlock() 1548 1549 linkId := peer.link 1550 1551 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 1552 logger.Tracef("Network looking up link %s in the peer links", linkId) 1553 } 1554 1555 // lookup the peer link 1556 var peerLink tunnel.Link 1557 1558 for _, link := range n.tunnel.Links() { 1559 if link.Id() == linkId { 1560 peerLink = link 1561 break 1562 } 1563 } 1564 1565 if peerLink == nil { 1566 return ErrPeerLinkNotFound 1567 } 1568 1569 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 1570 // if the peerLink is found in the returned links update peerLinks 1571 logger.Tracef("Network updating peer links for peer %s", peer.address) 1572 } 1573 1574 // lookup a link and update it if better link is available 1575 if link, ok := n.peerLinks[peer.address]; ok { 1576 // if the existing has better Length then the new, replace it 1577 if link.Length() < peerLink.Length() { 1578 n.peerLinks[peer.address] = peerLink 1579 } 1580 return nil 1581 } 1582 1583 // add peerLink to the peerLinks map 1584 n.peerLinks[peer.address] = peerLink 1585 1586 return nil 1587 } 1588 1589 // isLoopback checks if a link is a loopback to ourselves 1590 func (n *network) isLoopback(link tunnel.Link) bool { 1591 // skip loopback 1592 if link.Loopback() { 1593 return true 1594 } 1595 1596 // our advertise address 1597 loopback := n.server.Options().Advertise 1598 // actual address 1599 address := n.tunnel.Address() 1600 1601 // if remote is ourselves 1602 switch link.Remote() { 1603 case loopback, address: 1604 return true 1605 } 1606 1607 return false 1608 } 1609 1610 // connect will wait for a link to be established and send the connect 1611 // message. We're trying to ensure convergence pretty quickly. So we want 1612 // to hear back. In the case we become completely disconnected we'll 1613 // connect again once a new link is established 1614 func (n *network) connect() { 1615 // discovered lets us know what we received a peer message back 1616 var discovered bool 1617 var attempts int 1618 1619 for { 1620 // connected is used to define if the link is connected 1621 var connected bool 1622 1623 // check the links state 1624 for _, link := range n.tunnel.Links() { 1625 // skip loopback 1626 if n.isLoopback(link) { 1627 continue 1628 } 1629 1630 if link.State() == "connected" { 1631 connected = true 1632 break 1633 } 1634 } 1635 1636 // if we're not connected wait 1637 if !connected { 1638 // reset discovered 1639 discovered = false 1640 // sleep for a second 1641 time.Sleep(time.Second) 1642 // now try again 1643 continue 1644 } 1645 1646 // we're connected but are we discovered? 1647 if !discovered { 1648 // recreate the clients because all the tunnel links are gone 1649 // so we haven't send discovery beneath 1650 // NOTE: when starting the tunnel for the first time we might be recreating potentially 1651 // well functioning tunnel clients as "discovered" will be false until the 1652 // n.discovered channel is read at some point later on. 1653 if err := n.createClients(); err != nil { 1654 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1655 logger.Debugf("Failed to recreate network/control clients: %v", err) 1656 } 1657 continue 1658 } 1659 1660 // send the connect message 1661 n.sendConnect() 1662 } 1663 1664 // check if we've been discovered 1665 select { 1666 case <-n.discovered: 1667 discovered = true 1668 attempts = 0 1669 case <-n.closed: 1670 return 1671 case <-time.After(time.Second + backoff.Do(attempts)): 1672 // we have to try again 1673 attempts++ 1674 } 1675 } 1676 } 1677 1678 // Connect connects the network 1679 func (n *network) Connect() error { 1680 n.Lock() 1681 defer n.Unlock() 1682 1683 // connect network tunnel 1684 if err := n.tunnel.Connect(); err != nil { 1685 return err 1686 } 1687 1688 // return if already connected 1689 if n.connected { 1690 // initialise the nodes 1691 n.initNodes(false) 1692 // send the connect message 1693 go n.sendConnect() 1694 return nil 1695 } 1696 1697 // initialise the nodes 1698 n.initNodes(true) 1699 1700 // set our internal node address 1701 // if advertise address is not set 1702 if len(n.options.Advertise) == 0 { 1703 n.server.Init(server.Advertise(n.tunnel.Address())) 1704 } 1705 1706 // listen on NetworkChannel 1707 netListener, err := n.tunnel.Listen( 1708 NetworkChannel, 1709 tunnel.ListenMode(tunnel.Multicast), 1710 ) 1711 if err != nil { 1712 return err 1713 } 1714 1715 // listen on ControlChannel 1716 ctrlListener, err := n.tunnel.Listen( 1717 ControlChannel, 1718 tunnel.ListenMode(tunnel.Multicast), 1719 ) 1720 if err != nil { 1721 return err 1722 } 1723 1724 // dial into ControlChannel to send route adverts 1725 ctrlClient, err := n.tunnel.Dial( 1726 ControlChannel, 1727 tunnel.DialMode(tunnel.Multicast), 1728 ) 1729 if err != nil { 1730 return err 1731 } 1732 1733 n.tunClient[ControlChannel] = ctrlClient 1734 1735 // dial into NetworkChannel to send network messages 1736 netClient, err := n.tunnel.Dial( 1737 NetworkChannel, 1738 tunnel.DialMode(tunnel.Multicast), 1739 ) 1740 if err != nil { 1741 return err 1742 } 1743 1744 n.tunClient[NetworkChannel] = netClient 1745 1746 // create closed channel 1747 n.closed = make(chan bool) 1748 1749 // start the router 1750 if err := n.options.Router.Start(); err != nil { 1751 return err 1752 } 1753 1754 // start advertising routes 1755 advertChan, err := n.options.Router.Advertise() 1756 if err != nil { 1757 return err 1758 } 1759 1760 // start the server 1761 if err := n.server.Start(); err != nil { 1762 return err 1763 } 1764 1765 // advertise service routes 1766 go n.advertise(advertChan) 1767 // listen to network messages 1768 go n.processNetChan(netListener) 1769 // accept and process routes 1770 go n.processCtrlChan(ctrlListener) 1771 // manage connection once links are established 1772 go n.connect() 1773 // resolve nodes, broadcast announcements and prune stale nodes 1774 go n.manage() 1775 1776 // we're now connected 1777 n.connected = true 1778 1779 return nil 1780 } 1781 1782 func (n *network) close() error { 1783 // stop the server 1784 if err := n.server.Stop(); err != nil { 1785 return err 1786 } 1787 1788 // stop the router 1789 if err := n.router.Stop(); err != nil { 1790 return err 1791 } 1792 1793 // close the tunnel 1794 if err := n.tunnel.Close(); err != nil { 1795 return err 1796 } 1797 1798 return nil 1799 } 1800 1801 // createClients is used to create new clients in the event we lose all the tunnels 1802 func (n *network) createClients() error { 1803 // dial into ControlChannel to send route adverts 1804 ctrlClient, err := n.tunnel.Dial(ControlChannel, tunnel.DialMode(tunnel.Multicast)) 1805 if err != nil { 1806 return err 1807 } 1808 1809 // dial into NetworkChannel to send network messages 1810 netClient, err := n.tunnel.Dial(NetworkChannel, tunnel.DialMode(tunnel.Multicast)) 1811 if err != nil { 1812 return err 1813 } 1814 1815 n.Lock() 1816 defer n.Unlock() 1817 1818 // set the control client 1819 c, ok := n.tunClient[ControlChannel] 1820 if ok { 1821 c.Close() 1822 } 1823 n.tunClient[ControlChannel] = ctrlClient 1824 1825 // set the network client 1826 c, ok = n.tunClient[NetworkChannel] 1827 if ok { 1828 c.Close() 1829 } 1830 n.tunClient[NetworkChannel] = netClient 1831 1832 return nil 1833 } 1834 1835 // Close closes network connection 1836 func (n *network) Close() error { 1837 n.Lock() 1838 1839 if !n.connected { 1840 n.Unlock() 1841 return nil 1842 } 1843 1844 select { 1845 case <-n.closed: 1846 n.Unlock() 1847 return nil 1848 default: 1849 close(n.closed) 1850 1851 // set connected to false 1852 n.connected = false 1853 1854 // unlock the lock otherwise we'll deadlock sending the close 1855 n.Unlock() 1856 1857 msg := &pbNet.Close{ 1858 Node: &pbNet.Node{ 1859 Id: n.node.id, 1860 Address: n.node.address, 1861 }, 1862 } 1863 1864 if err := n.sendMsg("close", NetworkChannel, msg); err != nil { 1865 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1866 logger.Debugf("Network failed to send close message: %s", err) 1867 } 1868 } 1869 <-time.After(time.Millisecond * 100) 1870 } 1871 1872 return n.close() 1873 } 1874 1875 // Client returns network client 1876 func (n *network) Client() client.Client { 1877 return n.client 1878 } 1879 1880 // Server returns network server 1881 func (n *network) Server() server.Server { 1882 return n.server 1883 }