github.com/tickoalcantara12/micro/v3@v3.0.0-20221007104245-9d75b9bcbab9/service/network/mucp/mucp.go (about) 1 // Licensed under the Apache License, Version 2.0 (the "License"); 2 // you may not use this file except in compliance with the License. 3 // You may obtain a copy of the License at 4 // 5 // https://www.apache.org/licenses/LICENSE-2.0 6 // 7 // Unless required by applicable law or agreed to in writing, software 8 // distributed under the License is distributed on an "AS IS" BASIS, 9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 // See the License for the specific language governing permissions and 11 // limitations under the License. 12 // 13 // Original source: github.com/tickoalcantara12/micro/v3/service/network/mucp/mucp.go 14 15 package mucp 16 17 import ( 18 "errors" 19 "fmt" 20 "hash/fnv" 21 "io" 22 "math" 23 "math/rand" 24 "sync" 25 "time" 26 27 "github.com/golang/protobuf/proto" 28 "github.com/tickoalcantara12/micro/v3/service/client" 29 cmucp "github.com/tickoalcantara12/micro/v3/service/client/mucp" 30 "github.com/tickoalcantara12/micro/v3/service/logger" 31 "github.com/tickoalcantara12/micro/v3/service/network" 32 pb "github.com/tickoalcantara12/micro/v3/service/network/mucp/proto" 33 "github.com/tickoalcantara12/micro/v3/service/network/resolver/dns" 34 "github.com/tickoalcantara12/micro/v3/service/network/transport" 35 "github.com/tickoalcantara12/micro/v3/service/network/tunnel" 36 bun "github.com/tickoalcantara12/micro/v3/service/network/tunnel/broker" 37 tun "github.com/tickoalcantara12/micro/v3/service/network/tunnel/transport" 38 "github.com/tickoalcantara12/micro/v3/service/proxy" 39 "github.com/tickoalcantara12/micro/v3/service/registry/noop" 40 "github.com/tickoalcantara12/micro/v3/service/router" 41 "github.com/tickoalcantara12/micro/v3/service/server" 42 smucp "github.com/tickoalcantara12/micro/v3/service/server/mucp" 43 "github.com/tickoalcantara12/micro/v3/util/backoff" 44 ) 45 46 var ( 47 // DefaultName is default network name 48 DefaultName = "go.micro" 49 // DefaultAddress is default network address 50 DefaultAddress = ":0" 51 // AnnounceTime defines time interval to periodically announce node neighbours 52 AnnounceTime = 1 * time.Second 53 // KeepAliveTime is the time in which we want to have sent a message to a peer 54 KeepAliveTime = 30 * time.Second 55 // SyncTime is the time a network node requests full sync from the network 56 SyncTime = 1 * time.Minute 57 // PruneTime defines time interval to periodically check nodes that need to be pruned 58 // due to their not announcing their presence within this time interval 59 PruneTime = 90 * time.Second 60 // MaxDepth defines max depth of peer topology 61 MaxDepth uint = 3 62 // NetworkChannel is the name of the tunnel channel for passing network messages 63 NetworkChannel = "network" 64 // ControlChannel is the name of the tunnel channel for passing control message 65 ControlChannel = "control" 66 // DefaultLink is default network link 67 DefaultLink = "network" 68 // MaxConnections is the max number of network client connections 69 MaxConnections = 3 70 // MaxPeerErrors is the max number of peer errors before we remove it from network graph 71 MaxPeerErrors = 3 72 // ErrPeerExists is returned when adding a peer which already exists 73 ErrPeerExists = errors.New("peer already exists") 74 // ErrPeerNotFound is returned when a peer could not be found in node topology 75 ErrPeerNotFound = errors.New("peer not found") 76 // ErrClientNotFound is returned when client for tunnel channel could not be found 77 ErrClientNotFound = errors.New("client not found") 78 // ErrPeerLinkNotFound is returned when peer link could not be found in tunnel Links 79 ErrPeerLinkNotFound = errors.New("peer link not found") 80 // ErrPeerMaxExceeded is returned when peer has reached its max error count limit 81 ErrPeerMaxExceeded = errors.New("peer max errors exceeded") 82 ) 83 84 // network implements Network interface 85 type mucpNetwork struct { 86 // node is network node 87 *node 88 // options configure the network 89 options network.Options 90 // rtr is network router 91 router router.Router 92 // proxy is network proxy 93 proxy proxy.Proxy 94 // tunnel is network tunnel 95 tunnel tunnel.Tunnel 96 // server is network server 97 server server.Server 98 // client is network client 99 client client.Client 100 101 // tunClient is a map of tunnel channel clients 102 tunClient map[string]tunnel.Session 103 // peerLinks is a map of links for each peer 104 peerLinks map[string]tunnel.Link 105 106 sync.RWMutex 107 // connected marks the network as connected 108 connected bool 109 // closed closes the network 110 closed chan bool 111 // whether we've discovered by the network 112 discovered chan bool 113 } 114 115 // message is network message 116 type message struct { 117 // msg is transport message 118 msg *transport.Message 119 // session is tunnel session 120 session tunnel.Session 121 } 122 123 // NewNetwork returns a new network node 124 func NewNetwork(opts ...network.Option) network.Network { 125 // create default options 126 options := network.DefaultOptions() 127 // initialize network options 128 for _, o := range opts { 129 o(&options) 130 } 131 132 // set the address to a hashed address 133 hasher := fnv.New64() 134 hasher.Write([]byte(options.Address + options.Id)) 135 address := fmt.Sprintf("%d", hasher.Sum64()) 136 137 // set the address to advertise 138 var advertise string 139 var peerAddress string 140 141 if len(options.Advertise) > 0 { 142 advertise = options.Advertise 143 peerAddress = options.Advertise 144 } else { 145 advertise = options.Address 146 peerAddress = address 147 } 148 149 // init tunnel address to the network bind address 150 options.Tunnel.Init( 151 tunnel.Address(options.Address), 152 ) 153 154 // init router Id to the network id 155 options.Router.Init( 156 router.Id(options.Id), 157 router.Address(peerAddress), 158 ) 159 160 // create tunnel client with tunnel transport 161 tunTransport := tun.NewTransport( 162 tun.WithTunnel(options.Tunnel), 163 ) 164 165 // create the tunnel broker 166 tunBroker := bun.NewBroker( 167 bun.WithTunnel(options.Tunnel), 168 ) 169 170 // server is network server 171 // TODO: use the real registry 172 server := smucp.NewServer( 173 server.Id(options.Id), 174 server.Address(peerAddress), 175 server.Advertise(advertise), 176 server.Name(options.Name), 177 server.Transport(tunTransport), 178 server.Broker(tunBroker), 179 server.Registry(noop.NewRegistry()), 180 ) 181 182 // client is network client 183 client := cmucp.NewClient( 184 client.Broker(tunBroker), 185 client.Transport(tunTransport), 186 client.Router(options.Router), 187 ) 188 189 network := &mucpNetwork{ 190 node: &node{ 191 id: options.Id, 192 address: peerAddress, 193 peers: make(map[string]*node), 194 status: newStatus(), 195 }, 196 options: options, 197 router: options.Router, 198 proxy: options.Proxy, 199 tunnel: options.Tunnel, 200 server: server, 201 client: client, 202 tunClient: make(map[string]tunnel.Session), 203 peerLinks: make(map[string]tunnel.Link), 204 discovered: make(chan bool, 1), 205 } 206 207 network.node.network = network 208 209 return network 210 } 211 212 func (n *mucpNetwork) Init(opts ...network.Option) error { 213 n.Lock() 214 defer n.Unlock() 215 216 // TODO: maybe only allow reinit of certain opts 217 for _, o := range opts { 218 o(&n.options) 219 } 220 221 return nil 222 } 223 224 // Options returns network options 225 func (n *mucpNetwork) Options() network.Options { 226 n.RLock() 227 defer n.RUnlock() 228 229 options := n.options 230 231 return options 232 } 233 234 // Name returns network name 235 func (n *mucpNetwork) Name() string { 236 n.RLock() 237 defer n.RUnlock() 238 239 name := n.options.Name 240 241 return name 242 } 243 244 // acceptNetConn accepts connections from NetworkChannel 245 func (n *mucpNetwork) acceptNetConn(l tunnel.Listener, recv chan *message) { 246 var i int 247 for { 248 // accept a connection 249 conn, err := l.Accept() 250 if err != nil { 251 sleep := backoff.Do(i) 252 logger.Debugf("Network tunnel [%s] accept error: %v, backing off for %v", ControlChannel, err, sleep) 253 time.Sleep(sleep) 254 i++ 255 continue 256 } 257 258 select { 259 case <-n.closed: 260 if err := conn.Close(); err != nil { 261 logger.Debugf("Network tunnel [%s] failed to close connection: %v", NetworkChannel, err) 262 } 263 return 264 default: 265 // go handle NetworkChannel connection 266 go n.handleNetConn(conn, recv) 267 } 268 } 269 } 270 271 // acceptCtrlConn accepts connections from ControlChannel 272 func (n *mucpNetwork) acceptCtrlConn(l tunnel.Listener, recv chan *message) { 273 var i int 274 for { 275 // accept a connection 276 conn, err := l.Accept() 277 if err != nil { 278 sleep := backoff.Do(i) 279 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 280 logger.Debugf("Network tunnel [%s] accept error: %v, backing off for %v", ControlChannel, err, sleep) 281 } 282 time.Sleep(sleep) 283 i++ 284 continue 285 } 286 287 select { 288 case <-n.closed: 289 if err := conn.Close(); err != nil { 290 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 291 logger.Debugf("Network tunnel [%s] failed to close connection: %v", ControlChannel, err) 292 } 293 } 294 return 295 default: 296 // go handle ControlChannel connection 297 go n.handleCtrlConn(conn, recv) 298 } 299 } 300 } 301 302 // maskRoute will mask the route so that we apply the right values 303 func (n *mucpNetwork) maskRoute(r *pb.Route) { 304 hasher := fnv.New64() 305 // the routes service address 306 address := r.Address 307 308 // only hash the address if we're advertising our own local routes 309 // avoid hashing * based routes 310 if r.Router == n.Id() && r.Address != "*" { 311 // hash the service before advertising it 312 hasher.Reset() 313 // routes for multiple instances of a service will be collapsed here. 314 // TODO: once we store labels in the table this may need to change 315 // to include the labels in case they differ but highly unlikely 316 hasher.Write([]byte(r.Service + n.Address())) 317 address = fmt.Sprintf("%d", hasher.Sum64()) 318 } 319 320 // calculate route metric to advertise 321 metric := n.getRouteMetric(r.Router, r.Gateway, r.Link) 322 323 // NOTE: we override Gateway, Link and Address here 324 r.Address = address 325 r.Gateway = n.Address() 326 r.Link = DefaultLink 327 r.Metric = metric 328 } 329 330 // advertise advertises routes to the network 331 func (n *mucpNetwork) advertise(eventChan <-chan *router.Event) { 332 rnd := rand.New(rand.NewSource(time.Now().UnixNano())) 333 334 for { 335 select { 336 // process local events and randomly fire them at other nodes 337 case event := <-eventChan: 338 // create a proto advert 339 var pbEvents []*pb.Event 340 341 // make a copy of the route 342 route := &pb.Route{ 343 Service: event.Route.Service, 344 Address: event.Route.Address, 345 Gateway: event.Route.Gateway, 346 Network: event.Route.Network, 347 Router: event.Route.Router, 348 Link: event.Route.Link, 349 Metric: event.Route.Metric, 350 } 351 352 // override the various values 353 n.maskRoute(route) 354 355 e := &pb.Event{ 356 Type: pb.EventType(event.Type), 357 Timestamp: event.Timestamp.UnixNano(), 358 Route: route, 359 } 360 361 pbEvents = append(pbEvents, e) 362 363 msg := &pb.Advert{ 364 Id: n.Id(), 365 Type: pb.AdvertType(event.Type), 366 Timestamp: event.Timestamp.UnixNano(), 367 Events: pbEvents, 368 } 369 370 // get a list of node peers 371 peers := n.Peers() 372 373 // continue if there is no one to send to 374 if len(peers) == 0 { 375 continue 376 } 377 378 // advertise to max 3 peers 379 max := len(peers) 380 if max > 3 { 381 max = 3 382 } 383 384 for i := 0; i < max; i++ { 385 if peer := n.node.GetPeerNode(peers[rnd.Intn(len(peers))].Id()); peer != nil { 386 if err := n.sendTo("advert", ControlChannel, peer, msg); err != nil { 387 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 388 logger.Debugf("Network failed to advertise routes to %s: %v", peer.Id(), err) 389 } 390 } 391 } 392 } 393 case <-n.closed: 394 return 395 } 396 } 397 } 398 399 // initNodes initializes tunnel with a list of resolved nodes 400 func (n *mucpNetwork) initNodes(startup bool) { 401 nodes, err := n.resolveNodes() 402 // NOTE: this condition never fires 403 // as resolveNodes() never returns error 404 if err != nil && !startup { 405 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 406 logger.Debugf("Network failed to init nodes: %v", err) 407 } 408 return 409 } 410 411 // strip self 412 var init []string 413 414 // our current address 415 advertised := n.server.Options().Advertise 416 417 for _, node := range nodes { 418 // skip self 419 if node == advertised { 420 continue 421 } 422 // add the node 423 init = append(init, node) 424 } 425 426 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 427 // initialize the tunnel 428 logger.Tracef("Network initialising nodes %+v\n", init) 429 } 430 431 n.tunnel.Init( 432 tunnel.Nodes(nodes...), 433 ) 434 } 435 436 // resolveNodes resolves network nodes to addresses 437 func (n *mucpNetwork) resolveNodes() ([]string, error) { 438 nodeMap := make(map[string]bool) 439 440 // collect network node addresses 441 //nolint:prealloc 442 var nodes []string 443 444 // use the DNS resolver to expand peers 445 dns := &dns.Resolver{} 446 447 // append seed nodes if we have them 448 for _, node := range n.options.Nodes { 449 // resolve anything that looks like a host name 450 records, err := dns.Resolve(node) 451 if err != nil { 452 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 453 logger.Debugf("Failed to resolve %v %v", node, err) 454 } 455 continue 456 } 457 458 // add to the node map 459 for _, record := range records { 460 if _, ok := nodeMap[record.Address]; !ok { 461 nodes = append(nodes, record.Address) 462 } 463 nodeMap[record.Address] = true 464 } 465 } 466 467 return nodes, nil 468 } 469 470 // handleNetConn handles network announcement messages 471 func (n *mucpNetwork) handleNetConn(s tunnel.Session, msg chan *message) { 472 for { 473 m := new(transport.Message) 474 if err := s.Recv(m); err != nil { 475 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 476 logger.Debugf("Network tunnel [%s] receive error: %v", NetworkChannel, err) 477 } 478 switch err { 479 case io.EOF, tunnel.ErrReadTimeout: 480 s.Close() 481 return 482 } 483 continue 484 } 485 486 // check if peer is set 487 peer := m.Header["Micro-Peer"] 488 489 // check who the message is intended for 490 if len(peer) > 0 && peer != n.options.Id { 491 continue 492 } 493 494 select { 495 case msg <- &message{ 496 msg: m, 497 session: s, 498 }: 499 case <-n.closed: 500 return 501 } 502 } 503 } 504 505 // handleCtrlConn handles ControlChannel connections 506 func (n *mucpNetwork) handleCtrlConn(s tunnel.Session, msg chan *message) { 507 for { 508 m := new(transport.Message) 509 if err := s.Recv(m); err != nil { 510 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 511 logger.Debugf("Network tunnel [%s] receive error: %v", ControlChannel, err) 512 } 513 switch err { 514 case io.EOF, tunnel.ErrReadTimeout: 515 s.Close() 516 return 517 } 518 continue 519 } 520 521 // check if peer is set 522 peer := m.Header["Micro-Peer"] 523 524 // check who the message is intended for 525 if len(peer) > 0 && peer != n.options.Id { 526 continue 527 } 528 529 select { 530 case msg <- &message{ 531 msg: m, 532 session: s, 533 }: 534 case <-n.closed: 535 return 536 } 537 } 538 } 539 540 // getHopCount queries network graph and returns hop count for given router 541 // NOTE: this should be called getHopeMetric 542 // - Routes for local services have hop count 1 543 // - Routes with ID of adjacent nodes have hop count 10 544 // - Routes by peers of the advertiser have hop count 100 545 // - Routes beyond node neighbourhood have hop count 1000 546 func (n *mucpNetwork) getHopCount(rtr string) int { 547 // make sure node.peers are not modified 548 n.node.RLock() 549 defer n.node.RUnlock() 550 551 // we are the origin of the route 552 if rtr == n.options.Id { 553 return 1 554 } 555 556 // the route origin is our peer 557 if _, ok := n.node.peers[rtr]; ok { 558 return 10 559 } 560 561 // the route origin is the peer of our peer 562 for _, peer := range n.node.peers { 563 for id := range peer.peers { 564 if rtr == id { 565 return 100 566 } 567 } 568 } 569 // otherwise we are three hops away 570 return 1000 571 } 572 573 // getRouteMetric calculates router metric and returns it 574 // Route metric is calculated based on link status and route hopd count 575 func (n *mucpNetwork) getRouteMetric(router string, gateway string, link string) int64 { 576 // set the route metric 577 n.RLock() 578 defer n.RUnlock() 579 580 // local links are marked as 1 581 if link == "local" && gateway == "" { 582 return 1 583 } 584 585 // local links from other gateways as 2 586 if link == "local" && gateway != "" { 587 return 2 588 } 589 590 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 591 logger.Tracef("Network looking up %s link to gateway: %s", link, gateway) 592 } 593 // attempt to find link based on gateway address 594 lnk, ok := n.peerLinks[gateway] 595 if !ok { 596 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 597 logger.Debugf("Network failed to find a link to gateway: %s", gateway) 598 } 599 // no link found so infinite metric returned 600 return math.MaxInt64 601 } 602 603 // calculating metric 604 605 delay := lnk.Delay() 606 hops := n.getHopCount(router) 607 length := lnk.Length() 608 609 // make sure delay is non-zero 610 if delay == 0 { 611 delay = 1 612 } 613 614 // make sure length is non-zero 615 if length == 0 { 616 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 617 logger.Debugf("Link length is 0 %v %v", link, lnk.Length()) 618 } 619 length = 10e9 620 } 621 622 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 623 logger.Tracef("Network calculated metric %v delay %v length %v distance %v", (delay*length*int64(hops))/10e6, delay, length, hops) 624 } 625 626 return (delay * length * int64(hops)) / 10e6 627 } 628 629 // processCtrlChan processes messages received on ControlChannel 630 func (n *mucpNetwork) processCtrlChan(listener tunnel.Listener) { 631 defer listener.Close() 632 633 // receive control message queue 634 recv := make(chan *message, 128) 635 636 // accept ControlChannel connections 637 go n.acceptCtrlConn(listener, recv) 638 639 for { 640 select { 641 case m := <-recv: 642 // switch on type of message and take action 643 switch m.msg.Header["Micro-Method"] { 644 case "advert": 645 pbAdvert := &pb.Advert{} 646 647 if err := proto.Unmarshal(m.msg.Body, pbAdvert); err != nil { 648 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 649 logger.Debugf("Network fail to unmarshal advert message: %v", err) 650 } 651 continue 652 } 653 654 // don't process your own messages 655 if pbAdvert.Id == n.Id() { 656 continue 657 } 658 659 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 660 logger.Debugf("Network received advert message from: %s", pbAdvert.Id) 661 } 662 663 // lookup advertising node in our peer topology 664 advertNode := n.node.GetPeerNode(pbAdvert.Id) 665 if advertNode == nil { 666 // if we can't find the node in our topology (MaxDepth) we skipp prcessing adverts 667 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 668 logger.Debugf("Network skipping advert message from unknown peer: %s", pbAdvert.Id) 669 } 670 continue 671 } 672 673 for _, event := range pbAdvert.Events { 674 // for backwards compatibility reasons 675 if event == nil || event.Route == nil { 676 continue 677 } 678 679 // we know the advertising node is not the origin of the route 680 if pbAdvert.Id != event.Route.Router { 681 // if the origin router is not the advertising node peer 682 // we can't rule out potential routing loops so we bail here 683 if peer := advertNode.GetPeerNode(event.Route.Router); peer == nil { 684 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 685 logger.Debugf("Network skipping advert message from peer: %s", pbAdvert.Id) 686 } 687 continue 688 } 689 } 690 691 route := router.Route{ 692 Service: event.Route.Service, 693 Address: event.Route.Address, 694 Gateway: event.Route.Gateway, 695 Network: event.Route.Network, 696 Router: event.Route.Router, 697 Link: event.Route.Link, 698 Metric: event.Route.Metric, 699 } 700 701 // calculate route metric and add to the advertised metric 702 // we need to make sure we do not overflow math.MaxInt64 703 metric := n.getRouteMetric(event.Route.Router, event.Route.Gateway, event.Route.Link) 704 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 705 logger.Tracef("Network metric for router %s and gateway %s: %v", event.Route.Router, event.Route.Gateway, metric) 706 } 707 708 // check we don't overflow max int 64 709 if d := route.Metric + metric; d <= 0 { 710 // set to max int64 if we overflow 711 route.Metric = math.MaxInt64 712 } else { 713 // set the combined value of metrics otherwise 714 route.Metric = d 715 } 716 717 // update the local table 718 if err := n.router.Table().Update(route); err != nil { 719 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 720 logger.Debugf("Network failed to process advert %s: %v", event.Id, err) 721 } 722 } 723 } 724 } 725 case <-n.closed: 726 return 727 } 728 } 729 } 730 731 // processNetChan processes messages received on NetworkChannel 732 func (n *mucpNetwork) processNetChan(listener tunnel.Listener) { 733 defer listener.Close() 734 735 // receive network message queue 736 recv := make(chan *message, 128) 737 738 // accept NetworkChannel connections 739 go n.acceptNetConn(listener, recv) 740 741 for { 742 select { 743 case m := <-recv: 744 // switch on type of message and take action 745 switch m.msg.Header["Micro-Method"] { 746 case "connect": 747 // mark the time the message has been received 748 now := time.Now() 749 750 pbConnect := &pb.Connect{} 751 if err := proto.Unmarshal(m.msg.Body, pbConnect); err != nil { 752 logger.Debugf("Network tunnel [%s] connect unmarshal error: %v", NetworkChannel, err) 753 continue 754 } 755 756 // don't process your own messages 757 if pbConnect.Node.Id == n.options.Id { 758 continue 759 } 760 761 logger.Debugf("Network received connect message from: %s", pbConnect.Node.Id) 762 763 peer := &node{ 764 id: pbConnect.Node.Id, 765 address: pbConnect.Node.Address, 766 link: m.msg.Header["Micro-Link"], 767 peers: make(map[string]*node), 768 status: newStatus(), 769 lastSeen: now, 770 } 771 772 // update peer links 773 774 // TODO: should we do this only if we manage to add a peer 775 // What should we do if the peer links failed to be updated? 776 if err := n.updatePeerLinks(peer); err != nil { 777 logger.Debugf("Network failed updating peer links: %s", err) 778 } 779 780 // add peer to the list of node peers 781 if err := n.AddPeer(peer); err == ErrPeerExists { 782 logger.Tracef("Network peer exists, refreshing: %s", peer.id) 783 // update lastSeen time for the peer 784 if err := n.RefreshPeer(peer.id, peer.link, now); err != nil { 785 logger.Debugf("Network failed refreshing peer %s: %v", peer.id, err) 786 } 787 } 788 789 // we send the sync message because someone has sent connect 790 // and wants to either connect or reconnect to the network 791 // The faster it gets the network config (routes and peer graph) 792 // the faster the network converges to a stable state 793 794 go func() { 795 // get node peer graph to send back to the connecting node 796 node := PeersToProto(n.node, MaxDepth) 797 798 msg := &pb.Sync{ 799 Peer: node, 800 } 801 802 // get a list of the best routes for each service in our routing table 803 routes, err := n.getProtoRoutes() 804 if err != nil { 805 logger.Debugf("Network node %s failed listing routes: %v", n.id, err) 806 } 807 // attached the routes to the message 808 msg.Routes = routes 809 810 // send sync message to the newly connected peer 811 if err := n.sendTo("sync", NetworkChannel, peer, msg); err != nil { 812 logger.Debugf("Network failed to send sync message: %v", err) 813 } 814 }() 815 case "peer": 816 // mark the time the message has been received 817 now := time.Now() 818 pbPeer := &pb.Peer{} 819 820 if err := proto.Unmarshal(m.msg.Body, pbPeer); err != nil { 821 logger.Debugf("Network tunnel [%s] peer unmarshal error: %v", NetworkChannel, err) 822 continue 823 } 824 825 // don't process your own messages 826 if pbPeer.Node.Id == n.options.Id { 827 continue 828 } 829 830 logger.Debugf("Network received peer message from: %s %s", pbPeer.Node.Id, pbPeer.Node.Address) 831 832 peer := &node{ 833 id: pbPeer.Node.Id, 834 address: pbPeer.Node.Address, 835 link: m.msg.Header["Micro-Link"], 836 peers: make(map[string]*node), 837 status: newPeerStatus(pbPeer), 838 lastSeen: now, 839 } 840 841 // update peer links 842 843 // TODO: should we do this only if we manage to add a peer 844 // What should we do if the peer links failed to be updated? 845 if err := n.updatePeerLinks(peer); err != nil { 846 logger.Debugf("Network failed updating peer links: %s", err) 847 } 848 849 // if it's a new peer i.e. we do not have it in our graph, we request full sync 850 if err := n.node.AddPeer(peer); err == nil { 851 go func() { 852 // marshal node graph into protobuf 853 node := PeersToProto(n.node, MaxDepth) 854 855 msg := &pb.Sync{ 856 Peer: node, 857 } 858 859 // get a list of the best routes for each service in our routing table 860 routes, err := n.getProtoRoutes() 861 if err != nil { 862 logger.Debugf("Network node %s failed listing routes: %v", n.id, err) 863 } 864 // attached the routes to the message 865 msg.Routes = routes 866 867 // send sync message to the newly connected peer 868 if err := n.sendTo("sync", NetworkChannel, peer, msg); err != nil { 869 logger.Debugf("Network failed to send sync message: %v", err) 870 } 871 }() 872 873 continue 874 // if we already have the peer in our graph, skip further steps 875 } else if err != ErrPeerExists { 876 logger.Debugf("Network got error adding peer %v", err) 877 continue 878 } 879 880 logger.Tracef("Network peer exists, refreshing: %s", pbPeer.Node.Id) 881 882 // update lastSeen time for the peer 883 if err := n.RefreshPeer(peer.id, peer.link, now); err != nil { 884 logger.Debugf("Network failed refreshing peer %s: %v", pbPeer.Node.Id, err) 885 } 886 887 // NOTE: we don't unpack MaxDepth topology 888 peer = UnpackPeerTopology(pbPeer, now, MaxDepth-1) 889 // update the link 890 peer.link = m.msg.Header["Micro-Link"] 891 892 logger.Tracef("Network updating topology of node: %s", n.node.id) 893 if err := n.node.UpdatePeer(peer); err != nil { 894 logger.Debugf("Network failed to update peers: %v", err) 895 } 896 897 // tell the connect loop that we've been discovered 898 // so it stops sending connect messages out 899 select { 900 case n.discovered <- true: 901 default: 902 // don't block here 903 } 904 case "sync": 905 // record the timestamp of the message receipt 906 now := time.Now() 907 908 pbSync := &pb.Sync{} 909 if err := proto.Unmarshal(m.msg.Body, pbSync); err != nil { 910 logger.Debugf("Network tunnel [%s] sync unmarshal error: %v", NetworkChannel, err) 911 continue 912 } 913 914 // don't process your own messages 915 if pbSync.Peer.Node.Id == n.options.Id { 916 continue 917 } 918 919 logger.Debugf("Network received sync message from: %s", pbSync.Peer.Node.Id) 920 921 peer := &node{ 922 id: pbSync.Peer.Node.Id, 923 address: pbSync.Peer.Node.Address, 924 link: m.msg.Header["Micro-Link"], 925 peers: make(map[string]*node), 926 status: newPeerStatus(pbSync.Peer), 927 lastSeen: now, 928 } 929 930 // update peer links 931 932 // TODO: should we do this only if we manage to add a peer 933 // What should we do if the peer links failed to be updated? 934 if err := n.updatePeerLinks(peer); err != nil { 935 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 936 logger.Debugf("Network failed updating peer links: %s", err) 937 } 938 } 939 940 // add peer to the list of node peers 941 if err := n.node.AddPeer(peer); err == ErrPeerExists { 942 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 943 logger.Tracef("Network peer exists, refreshing: %s", peer.id) 944 } 945 // update lastSeen time for the existing node 946 if err := n.RefreshPeer(peer.id, peer.link, now); err != nil { 947 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 948 logger.Debugf("Network failed refreshing peer %s: %v", peer.id, err) 949 } 950 } 951 } 952 953 // when we receive a sync message we update our routing table 954 // and send a peer message back to the network to announce our presence 955 956 // add all the routes we have received in the sync message 957 for _, pbRoute := range pbSync.Routes { 958 // unmarshal the routes received from remote peer 959 route := ProtoToRoute(pbRoute) 960 // continue if we are the originator of the route 961 if route.Router == n.router.Options().Id { 962 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 963 logger.Debugf("Network node %s skipping route addition: route already present", n.id) 964 } 965 continue 966 } 967 968 metric := n.getRouteMetric(route.Router, route.Gateway, route.Link) 969 // check we don't overflow max int 64 970 if d := route.Metric + metric; d <= 0 { 971 // set to max int64 if we overflow 972 route.Metric = math.MaxInt64 973 } else { 974 // set the combined value of metrics otherwise 975 route.Metric = d 976 } 977 978 q := []router.LookupOption{ 979 router.LookupLink(route.Link), 980 } 981 982 routes, err := n.router.Lookup(route.Service, q...) 983 if err != nil && err != router.ErrRouteNotFound { 984 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 985 logger.Debugf("Network node %s failed listing best routes for %s: %v", n.id, route.Service, err) 986 } 987 continue 988 } 989 990 // we found no routes for the given service 991 // create the new route we have just received 992 if len(routes) == 0 { 993 if err := n.router.Table().Create(route); err != nil && err != router.ErrDuplicateRoute { 994 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 995 logger.Debugf("Network node %s failed to add route: %v", n.id, err) 996 } 997 } 998 continue 999 } 1000 1001 // find the best route for the given service 1002 // from the routes that we would advertise 1003 bestRoute := routes[0] 1004 for _, r := range routes[0:] { 1005 if bestRoute.Metric > r.Metric { 1006 bestRoute = r 1007 } 1008 } 1009 1010 // Take the best route to given service and: 1011 // only add new routes if the metric is better 1012 // than the metric of our best route 1013 1014 if bestRoute.Metric <= route.Metric { 1015 continue 1016 } 1017 1018 // add route to the routing table 1019 if err := n.router.Table().Create(route); err != nil && err != router.ErrDuplicateRoute { 1020 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1021 logger.Debugf("Network node %s failed to add route: %v", n.id, err) 1022 } 1023 } 1024 } 1025 1026 // update your sync timestamp 1027 // NOTE: this might go away as we will be doing full table advert to random peer 1028 if err := n.RefreshSync(now); err != nil { 1029 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1030 logger.Debugf("Network failed refreshing sync time: %v", err) 1031 } 1032 } 1033 1034 go func() { 1035 // get node peer graph to send back to the syncing node 1036 msg := PeersToProto(n.node, MaxDepth) 1037 1038 // advertise yourself to the new node 1039 if err := n.sendTo("peer", NetworkChannel, peer, msg); err != nil { 1040 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1041 logger.Debugf("Network failed to advertise peers: %v", err) 1042 } 1043 } 1044 }() 1045 case "close": 1046 pbClose := &pb.Close{} 1047 if err := proto.Unmarshal(m.msg.Body, pbClose); err != nil { 1048 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1049 logger.Debugf("Network tunnel [%s] close unmarshal error: %v", NetworkChannel, err) 1050 } 1051 continue 1052 } 1053 1054 // don't process your own messages 1055 if pbClose.Node.Id == n.options.Id { 1056 continue 1057 } 1058 1059 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1060 logger.Debugf("Network received close message from: %s", pbClose.Node.Id) 1061 } 1062 1063 peer := &node{ 1064 id: pbClose.Node.Id, 1065 address: pbClose.Node.Address, 1066 } 1067 1068 if err := n.DeletePeerNode(peer.id); err != nil { 1069 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1070 logger.Debugf("Network failed to delete node %s routes: %v", peer.id, err) 1071 } 1072 } 1073 1074 if err := n.prunePeerRoutes(peer); err != nil { 1075 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1076 logger.Debugf("Network failed pruning peer %s routes: %v", peer.id, err) 1077 } 1078 } 1079 1080 // NOTE: we should maybe advertise this to the network so we converge faster on closed nodes 1081 // as opposed to our waiting until the node eventually gets pruned; something to think about 1082 1083 // delete peer from the peerLinks 1084 n.Lock() 1085 delete(n.peerLinks, pbClose.Node.Address) 1086 n.Unlock() 1087 } 1088 case <-n.closed: 1089 return 1090 } 1091 } 1092 } 1093 1094 // pruneRoutes prunes routes return by given query 1095 func (n *mucpNetwork) pruneRoutes(q ...router.LookupOption) error { 1096 routes, err := n.router.Table().Read() 1097 if err != nil && err != router.ErrRouteNotFound { 1098 return err 1099 } 1100 1101 // filter and delete the routes in question 1102 for _, route := range router.Filter(routes, router.NewLookup(q...)) { 1103 n.router.Table().Delete(route) 1104 } 1105 1106 return nil 1107 } 1108 1109 // pruneNodeRoutes prunes routes that were either originated by or routable via given node 1110 func (n *mucpNetwork) prunePeerRoutes(peer *node) error { 1111 // lookup all routes originated by router 1112 q := []router.LookupOption{ 1113 router.LookupRouter(peer.id), 1114 router.LookupLink("*"), 1115 } 1116 if err := n.pruneRoutes(q...); err != nil { 1117 return err 1118 } 1119 1120 // lookup all routes routable via gw 1121 q = []router.LookupOption{ 1122 router.LookupGateway(peer.address), 1123 router.LookupLink("*"), 1124 } 1125 if err := n.pruneRoutes(q...); err != nil { 1126 return err 1127 } 1128 1129 return nil 1130 } 1131 1132 // manage the process of announcing to peers and prune any peer nodes that have not been 1133 // seen for a period of time. Also removes all the routes either originated by or routable 1134 // by the stale nodes. it also resolves nodes periodically and adds them to the tunnel 1135 func (n *mucpNetwork) manage() { 1136 rnd := rand.New(rand.NewSource(time.Now().UnixNano())) 1137 announce := time.NewTicker(AnnounceTime) 1138 defer announce.Stop() 1139 prune := time.NewTicker(PruneTime) 1140 defer prune.Stop() 1141 netsync := time.NewTicker(SyncTime) 1142 defer netsync.Stop() 1143 1144 // list of links we've sent to 1145 links := make(map[string]time.Time) 1146 1147 for { 1148 select { 1149 case <-n.closed: 1150 return 1151 case <-announce.C: 1152 current := make(map[string]time.Time) 1153 1154 // build link map of current links 1155 for _, link := range n.tunnel.Links() { 1156 if n.isLoopback(link) { 1157 continue 1158 } 1159 // get an existing timestamp if it exists 1160 current[link.Id()] = links[link.Id()] 1161 } 1162 1163 // replace link map 1164 // we do this because a growing map is not 1165 // garbage collected 1166 links = current 1167 1168 n.RLock() 1169 var i int 1170 // create a list of peers to send to 1171 var peers []*node 1172 1173 // check peers to see if they need to be sent to 1174 for _, peer := range n.peers { 1175 if i >= 3 { 1176 break 1177 } 1178 1179 // get last sent 1180 lastSent := links[peer.link] 1181 1182 // check when we last sent to the peer 1183 // and send a peer message if we haven't 1184 if lastSent.IsZero() || time.Since(lastSent) > KeepAliveTime { 1185 link := peer.link 1186 id := peer.id 1187 1188 // might not exist for some weird reason 1189 if len(link) == 0 { 1190 // set the link via peer links 1191 l, ok := n.peerLinks[peer.address] 1192 if ok { 1193 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1194 logger.Debugf("Network link not found for peer %s cannot announce", peer.id) 1195 } 1196 continue 1197 } 1198 link = l.Id() 1199 } 1200 1201 // add to the list of peers we're going to send to 1202 peers = append(peers, &node{ 1203 id: id, 1204 link: link, 1205 }) 1206 1207 // increment our count 1208 i++ 1209 } 1210 } 1211 1212 n.RUnlock() 1213 1214 // peers to proto 1215 msg := PeersToProto(n.node, MaxDepth) 1216 1217 // we're only going to send to max 3 peers at any given tick 1218 for _, peer := range peers { 1219 // advertise yourself to the network 1220 if err := n.sendTo("peer", NetworkChannel, peer, msg); err != nil { 1221 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1222 logger.Debugf("Network failed to advertise peer %s: %v", peer.id, err) 1223 } 1224 continue 1225 } 1226 1227 // update last sent time 1228 links[peer.link] = time.Now() 1229 } 1230 1231 // now look at links we may not have sent to. this may occur 1232 // where a connect message was lost 1233 for link, lastSent := range links { 1234 if !lastSent.IsZero() || time.Since(lastSent) < KeepAliveTime { 1235 continue 1236 } 1237 1238 peer := &node{ 1239 // unknown id of the peer 1240 link: link, 1241 } 1242 1243 // unknown link and peer so lets do the connect flow 1244 if err := n.sendTo("connect", NetworkChannel, peer, msg); err != nil { 1245 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1246 logger.Debugf("Network failed to connect %s: %v", peer.id, err) 1247 } 1248 continue 1249 } 1250 1251 links[peer.link] = time.Now() 1252 } 1253 case <-prune.C: 1254 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1255 logger.Debugf("Network node %s pruning stale peers", n.id) 1256 } 1257 pruned := n.PruneStalePeers(PruneTime) 1258 1259 for id, peer := range pruned { 1260 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1261 logger.Debugf("Network peer exceeded prune time: %s", id) 1262 } 1263 n.Lock() 1264 delete(n.peerLinks, peer.address) 1265 n.Unlock() 1266 1267 if err := n.prunePeerRoutes(peer); err != nil { 1268 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1269 logger.Debugf("Network failed pruning peer %s routes: %v", id, err) 1270 } 1271 } 1272 } 1273 1274 // get a list of all routes 1275 routes, err := n.options.Router.Table().Read() 1276 if err != nil { 1277 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1278 logger.Debugf("Network failed listing routes when pruning peers: %v", err) 1279 } 1280 continue 1281 } 1282 1283 // collect all the router IDs in the routing table 1284 routers := make(map[string]bool) 1285 1286 for _, route := range routes { 1287 // don't process routes originated by ourselves 1288 if route.Router == n.Id() { 1289 continue 1290 } 1291 1292 // check if its been processed 1293 if _, ok := routers[route.Router]; ok { 1294 continue 1295 } 1296 1297 // mark as processed 1298 routers[route.Router] = true 1299 1300 // if the router is in our peer graph do NOT delete routes originated by it 1301 if peer := n.node.GetPeerNode(route.Router); peer != nil { 1302 continue 1303 } 1304 1305 // otherwise delete all the routes originated by it 1306 if err := n.pruneRoutes(router.LookupRouter(route.Router)); err != nil { 1307 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1308 logger.Debugf("Network failed deleting routes by %s: %v", route.Router, err) 1309 } 1310 } 1311 } 1312 case <-netsync.C: 1313 // get a list of node peers 1314 peers := n.Peers() 1315 1316 // skip when there are no peers 1317 if len(peers) == 0 { 1318 continue 1319 } 1320 1321 // pick a random peer from the list of peers and request full sync 1322 peer := n.node.GetPeerNode(peers[rnd.Intn(len(peers))].Id()) 1323 // skip if we can't find randomly selected peer 1324 if peer == nil { 1325 continue 1326 } 1327 1328 go func() { 1329 // get node peer graph to send back to the connecting node 1330 node := PeersToProto(n.node, MaxDepth) 1331 1332 msg := &pb.Sync{ 1333 Peer: node, 1334 } 1335 1336 // get a list of the best routes for each service in our routing table 1337 routes, err := n.getProtoRoutes() 1338 if err != nil { 1339 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1340 logger.Debugf("Network node %s failed listing routes: %v", n.id, err) 1341 } 1342 } 1343 // attached the routes to the message 1344 msg.Routes = routes 1345 1346 // send sync message to the newly connected peer 1347 if err := n.sendTo("sync", NetworkChannel, peer, msg); err != nil { 1348 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1349 logger.Debugf("Network failed to send sync message: %v", err) 1350 } 1351 } 1352 }() 1353 } 1354 } 1355 } 1356 1357 // getAdvertProtoRoutes returns a list of routes to advertise to remote peer 1358 // based on the advertisement strategy encoded in protobuf 1359 // It returns error if the routes failed to be retrieved from the routing table 1360 func (n *mucpNetwork) getProtoRoutes() ([]*pb.Route, error) { 1361 routes, err := n.router.Table().Read() 1362 if err != nil && err != router.ErrRouteNotFound { 1363 return nil, err 1364 } 1365 1366 // encode the routes to protobuf 1367 pbRoutes := make([]*pb.Route, 0, len(routes)) 1368 for _, route := range routes { 1369 // generate new route proto 1370 pbRoute := RouteToProto(route) 1371 // mask the route before outbounding 1372 n.maskRoute(pbRoute) 1373 // add to list of routes 1374 pbRoutes = append(pbRoutes, pbRoute) 1375 } 1376 1377 return pbRoutes, nil 1378 } 1379 1380 func (n *mucpNetwork) sendConnect() { 1381 // send connect message to NetworkChannel 1382 // NOTE: in theory we could do this as soon as 1383 // Dial to NetworkChannel succeeds, but instead 1384 // we initialize all other node resources first 1385 msg := &pb.Connect{ 1386 Node: &pb.Node{ 1387 Id: n.node.id, 1388 Address: n.node.address, 1389 }, 1390 } 1391 1392 if err := n.sendMsg("connect", NetworkChannel, msg); err != nil { 1393 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1394 logger.Debugf("Network failed to send connect message: %s", err) 1395 } 1396 } 1397 } 1398 1399 // sendTo sends a message to a specific node as a one off. 1400 // we need this because when links die, we have no discovery info, 1401 // and sending to an existing multicast link doesn't immediately work 1402 func (n *mucpNetwork) sendTo(method, channel string, peer *node, msg proto.Message) error { 1403 body, err := proto.Marshal(msg) 1404 if err != nil { 1405 return err 1406 } 1407 1408 // Create a unicast connection to the peer but don't do the open/accept flow 1409 c, err := n.tunnel.Dial(channel, tunnel.DialWait(false), tunnel.DialLink(peer.link)) 1410 if err != nil { 1411 if peerNode := n.GetPeerNode(peer.id); peerNode != nil { 1412 // update node status when error happens 1413 peerNode.status.err.Update(err) 1414 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1415 logger.Debugf("Network increment peer %v error count to: %d", peerNode, peerNode, peerNode.status.Error().Count()) 1416 } 1417 if count := peerNode.status.Error().Count(); count == MaxPeerErrors { 1418 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1419 logger.Debugf("Network peer %v error count exceeded %d. Prunning.", peerNode, MaxPeerErrors) 1420 } 1421 n.PrunePeer(peerNode.id) 1422 } 1423 } 1424 return err 1425 } 1426 defer c.Close() 1427 1428 id := peer.id 1429 1430 if len(id) == 0 { 1431 id = peer.link 1432 } 1433 1434 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1435 logger.Debugf("Network sending %s message from: %s to %s", method, n.options.Id, id) 1436 } 1437 tmsg := &transport.Message{ 1438 Header: map[string]string{ 1439 "Micro-Method": method, 1440 }, 1441 Body: body, 1442 } 1443 1444 // setting the peer header 1445 if len(peer.id) > 0 { 1446 tmsg.Header["Micro-Peer"] = peer.id 1447 } 1448 1449 if err := c.Send(tmsg); err != nil { 1450 // TODO: Lookup peer in our graph 1451 if peerNode := n.GetPeerNode(peer.id); peerNode != nil { 1452 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1453 logger.Debugf("Network found peer %s: %v", peer.id, peerNode) 1454 } 1455 // update node status when error happens 1456 peerNode.status.err.Update(err) 1457 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1458 logger.Debugf("Network increment node peer %p %v count to: %d", peerNode, peerNode, peerNode.status.Error().Count()) 1459 } 1460 if count := peerNode.status.Error().Count(); count == MaxPeerErrors { 1461 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1462 logger.Debugf("Network node peer %v count exceeded %d: %d", peerNode, MaxPeerErrors, peerNode.status.Error().Count()) 1463 } 1464 n.PrunePeer(peerNode.id) 1465 } 1466 } 1467 return err 1468 } 1469 1470 return nil 1471 } 1472 1473 // sendMsg sends a message to the tunnel channel 1474 func (n *mucpNetwork) sendMsg(method, channel string, msg proto.Message) error { 1475 body, err := proto.Marshal(msg) 1476 if err != nil { 1477 return err 1478 } 1479 1480 // check if the channel client is initialized 1481 n.RLock() 1482 client, ok := n.tunClient[channel] 1483 if !ok || client == nil { 1484 n.RUnlock() 1485 return ErrClientNotFound 1486 } 1487 n.RUnlock() 1488 1489 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1490 logger.Debugf("Network sending %s message from: %s", method, n.options.Id) 1491 } 1492 1493 return client.Send(&transport.Message{ 1494 Header: map[string]string{ 1495 "Micro-Method": method, 1496 }, 1497 Body: body, 1498 }) 1499 } 1500 1501 // updatePeerLinks updates link for a given peer 1502 func (n *mucpNetwork) updatePeerLinks(peer *node) error { 1503 n.Lock() 1504 defer n.Unlock() 1505 1506 linkId := peer.link 1507 1508 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 1509 logger.Tracef("Network looking up link %s in the peer links", linkId) 1510 } 1511 1512 // lookup the peer link 1513 var peerLink tunnel.Link 1514 1515 for _, link := range n.tunnel.Links() { 1516 if link.Id() == linkId { 1517 peerLink = link 1518 break 1519 } 1520 } 1521 1522 if peerLink == nil { 1523 return ErrPeerLinkNotFound 1524 } 1525 1526 if logger.V(logger.TraceLevel, logger.DefaultLogger) { 1527 // if the peerLink is found in the returned links update peerLinks 1528 logger.Tracef("Network updating peer links for peer %s", peer.address) 1529 } 1530 1531 // lookup a link and update it if better link is available 1532 if link, ok := n.peerLinks[peer.address]; ok { 1533 // if the existing has better Length then the new, replace it 1534 if link.Length() < peerLink.Length() { 1535 n.peerLinks[peer.address] = peerLink 1536 } 1537 return nil 1538 } 1539 1540 // add peerLink to the peerLinks map 1541 n.peerLinks[peer.address] = peerLink 1542 1543 return nil 1544 } 1545 1546 // isLoopback checks if a link is a loopback to ourselves 1547 func (n *mucpNetwork) isLoopback(link tunnel.Link) bool { 1548 // skip loopback 1549 if link.Loopback() { 1550 return true 1551 } 1552 1553 // our advertise address 1554 loopback := n.server.Options().Advertise 1555 // actual address 1556 address := n.tunnel.Address() 1557 1558 // if remote is ourselves 1559 switch link.Remote() { 1560 case loopback, address: 1561 return true 1562 } 1563 1564 return false 1565 } 1566 1567 // connect will wait for a link to be established and send the connect 1568 // message. We're trying to ensure convergence pretty quickly. So we want 1569 // to hear back. In the case we become completely disconnected we'll 1570 // connect again once a new link is established 1571 func (n *mucpNetwork) connect() { 1572 // discovered lets us know what we received a peer message back 1573 var discovered bool 1574 var attempts int 1575 1576 for { 1577 // connected is used to define if the link is connected 1578 var connected bool 1579 1580 // check the links state 1581 for _, link := range n.tunnel.Links() { 1582 // skip loopback 1583 if n.isLoopback(link) { 1584 continue 1585 } 1586 1587 if link.State() == "connected" { 1588 connected = true 1589 break 1590 } 1591 } 1592 1593 // if we're not connected wait 1594 if !connected { 1595 // reset discovered 1596 discovered = false 1597 // sleep for a second 1598 time.Sleep(time.Second) 1599 // now try again 1600 continue 1601 } 1602 1603 // we're connected but are we discovered? 1604 if !discovered { 1605 // recreate the clients because all the tunnel links are gone 1606 // so we haven't send discovery beneath 1607 // NOTE: when starting the tunnel for the first time we might be recreating potentially 1608 // well functioning tunnel clients as "discovered" will be false until the 1609 // n.discovered channel is read at some point later on. 1610 if err := n.createClients(); err != nil { 1611 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1612 logger.Debugf("Failed to recreate network/control clients: %v", err) 1613 } 1614 continue 1615 } 1616 1617 // send the connect message 1618 n.sendConnect() 1619 } 1620 1621 // check if we've been discovered 1622 select { 1623 case <-n.discovered: 1624 discovered = true 1625 attempts = 0 1626 case <-n.closed: 1627 return 1628 case <-time.After(time.Second + backoff.Do(attempts)): 1629 // we have to try again 1630 attempts++ 1631 } 1632 } 1633 } 1634 1635 // Connect connects the network 1636 func (n *mucpNetwork) Connect() error { 1637 n.Lock() 1638 defer n.Unlock() 1639 1640 // connect network tunnel 1641 if err := n.tunnel.Connect(); err != nil { 1642 return err 1643 } 1644 1645 // return if already connected 1646 if n.connected { 1647 // initialise the nodes 1648 n.initNodes(false) 1649 // send the connect message 1650 go n.sendConnect() 1651 return nil 1652 } 1653 1654 // initialise the nodes 1655 n.initNodes(true) 1656 1657 // set our internal node address 1658 // if advertise address is not set 1659 if len(n.options.Advertise) == 0 { 1660 n.server.Init(server.Advertise(n.tunnel.Address())) 1661 } 1662 1663 // listen on NetworkChannel 1664 netListener, err := n.tunnel.Listen( 1665 NetworkChannel, 1666 tunnel.ListenMode(tunnel.Multicast), 1667 ) 1668 if err != nil { 1669 return err 1670 } 1671 1672 // listen on ControlChannel 1673 ctrlListener, err := n.tunnel.Listen( 1674 ControlChannel, 1675 tunnel.ListenMode(tunnel.Multicast), 1676 ) 1677 if err != nil { 1678 return err 1679 } 1680 1681 // dial into ControlChannel to send route adverts 1682 ctrlClient, err := n.tunnel.Dial( 1683 ControlChannel, 1684 tunnel.DialMode(tunnel.Multicast), 1685 ) 1686 if err != nil { 1687 return err 1688 } 1689 1690 n.tunClient[ControlChannel] = ctrlClient 1691 1692 // dial into NetworkChannel to send network messages 1693 netClient, err := n.tunnel.Dial( 1694 NetworkChannel, 1695 tunnel.DialMode(tunnel.Multicast), 1696 ) 1697 if err != nil { 1698 return err 1699 } 1700 1701 n.tunClient[NetworkChannel] = netClient 1702 1703 // create closed channel 1704 n.closed = make(chan bool) 1705 1706 // start advertising routes 1707 watcher, err := n.options.Router.Watch() 1708 if err != nil { 1709 return err 1710 } 1711 1712 advertChan, err := watcher.Chan() 1713 if err != nil { 1714 return err 1715 } 1716 1717 // start the server 1718 if err := n.server.Start(); err != nil { 1719 return err 1720 } 1721 1722 // advertise service routes 1723 go n.advertise(advertChan) 1724 // listen to network messages 1725 go n.processNetChan(netListener) 1726 // accept and process routes 1727 go n.processCtrlChan(ctrlListener) 1728 // manage connection once links are established 1729 go n.connect() 1730 // resolve nodes, broadcast announcements and prune stale nodes 1731 go n.manage() 1732 1733 // we're now connected 1734 n.connected = true 1735 1736 return nil 1737 } 1738 1739 func (n *mucpNetwork) close() error { 1740 // stop the server 1741 if err := n.server.Stop(); err != nil { 1742 return err 1743 } 1744 1745 // close the router 1746 if err := n.router.Close(); err != nil { 1747 return err 1748 } 1749 1750 // close the tunnel 1751 if err := n.tunnel.Close(); err != nil { 1752 return err 1753 } 1754 1755 return nil 1756 } 1757 1758 // createClients is used to create new clients in the event we lose all the tunnels 1759 func (n *mucpNetwork) createClients() error { 1760 // dial into ControlChannel to send route adverts 1761 ctrlClient, err := n.tunnel.Dial(ControlChannel, tunnel.DialMode(tunnel.Multicast)) 1762 if err != nil { 1763 return err 1764 } 1765 1766 // dial into NetworkChannel to send network messages 1767 netClient, err := n.tunnel.Dial(NetworkChannel, tunnel.DialMode(tunnel.Multicast)) 1768 if err != nil { 1769 return err 1770 } 1771 1772 n.Lock() 1773 defer n.Unlock() 1774 1775 // set the control client 1776 c, ok := n.tunClient[ControlChannel] 1777 if ok { 1778 c.Close() 1779 } 1780 n.tunClient[ControlChannel] = ctrlClient 1781 1782 // set the network client 1783 c, ok = n.tunClient[NetworkChannel] 1784 if ok { 1785 c.Close() 1786 } 1787 n.tunClient[NetworkChannel] = netClient 1788 1789 return nil 1790 } 1791 1792 // Close closes network connection 1793 func (n *mucpNetwork) Close() error { 1794 n.Lock() 1795 1796 if !n.connected { 1797 n.Unlock() 1798 return nil 1799 } 1800 1801 select { 1802 case <-n.closed: 1803 n.Unlock() 1804 return nil 1805 default: 1806 close(n.closed) 1807 1808 // set connected to false 1809 n.connected = false 1810 1811 // unlock the lock otherwise we'll deadlock sending the close 1812 n.Unlock() 1813 1814 msg := &pb.Close{ 1815 Node: &pb.Node{ 1816 Id: n.node.id, 1817 Address: n.node.address, 1818 }, 1819 } 1820 1821 if err := n.sendMsg("close", NetworkChannel, msg); err != nil { 1822 if logger.V(logger.DebugLevel, logger.DefaultLogger) { 1823 logger.Debugf("Network failed to send close message: %s", err) 1824 } 1825 } 1826 <-time.After(time.Millisecond * 100) 1827 } 1828 1829 return n.close() 1830 } 1831 1832 // Client returns network client 1833 func (n *mucpNetwork) Client() client.Client { 1834 return n.client 1835 } 1836 1837 // Server returns network server 1838 func (n *mucpNetwork) Server() server.Server { 1839 return n.server 1840 } 1841 1842 // RouteToProto encodes route into protobuf and returns it 1843 func RouteToProto(route router.Route) *pb.Route { 1844 return &pb.Route{ 1845 Service: route.Service, 1846 Address: route.Address, 1847 Gateway: route.Gateway, 1848 Network: route.Network, 1849 Router: route.Router, 1850 Link: route.Link, 1851 Metric: int64(route.Metric), 1852 } 1853 } 1854 1855 // ProtoToRoute decodes protobuf route into router route and returns it 1856 func ProtoToRoute(route *pb.Route) router.Route { 1857 return router.Route{ 1858 Service: route.Service, 1859 Address: route.Address, 1860 Gateway: route.Gateway, 1861 Network: route.Network, 1862 Router: route.Router, 1863 Link: route.Link, 1864 Metric: route.Metric, 1865 } 1866 }