gitee.com/liuxuezhan/go-micro-v1.18.0@v1.0.0/network/default.go (about)

     1  package network
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"hash/fnv"
     7  	"io"
     8  	"math"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/golang/protobuf/proto"
    13  	"gitee.com/liuxuezhan/go-micro-v1.18.0/client"
    14  	rtr "gitee.com/liuxuezhan/go-micro-v1.18.0/client/selector/router"
    15  	pbNet "gitee.com/liuxuezhan/go-micro-v1.18.0/network/proto"
    16  	"gitee.com/liuxuezhan/go-micro-v1.18.0/network/resolver/dns"
    17  	"gitee.com/liuxuezhan/go-micro-v1.18.0/proxy"
    18  	"gitee.com/liuxuezhan/go-micro-v1.18.0/router"
    19  	pbRtr "gitee.com/liuxuezhan/go-micro-v1.18.0/router/proto"
    20  	"gitee.com/liuxuezhan/go-micro-v1.18.0/server"
    21  	"gitee.com/liuxuezhan/go-micro-v1.18.0/transport"
    22  	"gitee.com/liuxuezhan/go-micro-v1.18.0/tunnel"
    23  	bun "gitee.com/liuxuezhan/go-micro-v1.18.0/tunnel/broker"
    24  	tun "gitee.com/liuxuezhan/go-micro-v1.18.0/tunnel/transport"
    25  	"gitee.com/liuxuezhan/go-micro-v1.18.0/util/backoff"
    26  	"gitee.com/liuxuezhan/go-micro-v1.18.0/util/log"
    27  )
    28  
    29  var (
    30  	// NetworkChannel is the name of the tunnel channel for passing network messages
    31  	NetworkChannel = "network"
    32  	// ControlChannel is the name of the tunnel channel for passing control message
    33  	ControlChannel = "control"
    34  	// DefaultLink is default network link
    35  	DefaultLink = "network"
    36  	// MaxConnections is the max number of network client connections
    37  	MaxConnections = 3
    38  )
    39  
    40  var (
    41  	// ErrClientNotFound is returned when client for tunnel channel could not be found
    42  	ErrClientNotFound = errors.New("client not found")
    43  	// ErrPeerLinkNotFound is returned when peer link could not be found in tunnel Links
    44  	ErrPeerLinkNotFound = errors.New("peer link not found")
    45  )
    46  
    47  // network implements Network interface
    48  type network struct {
    49  	// node is network node
    50  	*node
    51  	// options configure the network
    52  	options Options
    53  	// rtr is network router
    54  	router router.Router
    55  	// prx is network proxy
    56  	proxy proxy.Proxy
    57  	// tun is network tunnel
    58  	tunnel tunnel.Tunnel
    59  	// server is network server
    60  	server server.Server
    61  	// client is network client
    62  	client client.Client
    63  
    64  	// tunClient is a map of tunnel clients keyed over tunnel channel names
    65  	tunClient map[string]transport.Client
    66  	// peerLinks is a map of links for each peer
    67  	peerLinks map[string]tunnel.Link
    68  
    69  	sync.RWMutex
    70  	// connected marks the network as connected
    71  	connected bool
    72  	// closed closes the network
    73  	closed chan bool
    74  	// whether we've discovered by the network
    75  	discovered chan bool
    76  	// solicted checks whether routes were solicited by one node
    77  	solicited chan string
    78  }
    79  
    80  // message is network message
    81  type message struct {
    82  	// msg is transport message
    83  	msg *transport.Message
    84  	// session is tunnel session
    85  	session tunnel.Session
    86  }
    87  
    88  // newNetwork returns a new network node
    89  func newNetwork(opts ...Option) Network {
    90  	options := DefaultOptions()
    91  
    92  	for _, o := range opts {
    93  		o(&options)
    94  	}
    95  
    96  	// set the address to a hashed address
    97  	hasher := fnv.New64()
    98  	hasher.Write([]byte(options.Address + options.Id))
    99  	address := fmt.Sprintf("%d", hasher.Sum64())
   100  
   101  	// set the address to advertise
   102  	var advertise string
   103  	var peerAddress string
   104  
   105  	if len(options.Advertise) > 0 {
   106  		advertise = options.Advertise
   107  		peerAddress = options.Advertise
   108  	} else {
   109  		advertise = options.Address
   110  		peerAddress = address
   111  	}
   112  
   113  	// init tunnel address to the network bind address
   114  	options.Tunnel.Init(
   115  		tunnel.Address(options.Address),
   116  	)
   117  
   118  	// init router Id to the network id
   119  	options.Router.Init(
   120  		router.Id(options.Id),
   121  		router.Address(peerAddress),
   122  	)
   123  
   124  	// create tunnel client with tunnel transport
   125  	tunTransport := tun.NewTransport(
   126  		tun.WithTunnel(options.Tunnel),
   127  	)
   128  
   129  	// create the tunnel broker
   130  	tunBroker := bun.NewBroker(
   131  		bun.WithTunnel(options.Tunnel),
   132  	)
   133  
   134  	// server is network server
   135  	server := server.NewServer(
   136  		server.Id(options.Id),
   137  		server.Address(peerAddress),
   138  		server.Advertise(advertise),
   139  		server.Name(options.Name),
   140  		server.Transport(tunTransport),
   141  		server.Broker(tunBroker),
   142  	)
   143  
   144  	// client is network client
   145  	client := client.NewClient(
   146  		client.Broker(tunBroker),
   147  		client.Transport(tunTransport),
   148  		client.Selector(
   149  			rtr.NewSelector(
   150  				rtr.WithRouter(options.Router),
   151  			),
   152  		),
   153  	)
   154  
   155  	network := &network{
   156  		node: &node{
   157  			id:      options.Id,
   158  			address: peerAddress,
   159  			peers:   make(map[string]*node),
   160  		},
   161  		options:    options,
   162  		router:     options.Router,
   163  		proxy:      options.Proxy,
   164  		tunnel:     options.Tunnel,
   165  		server:     server,
   166  		client:     client,
   167  		tunClient:  make(map[string]transport.Client),
   168  		peerLinks:  make(map[string]tunnel.Link),
   169  		discovered: make(chan bool, 1),
   170  		solicited:  make(chan string, 1),
   171  	}
   172  
   173  	network.node.network = network
   174  
   175  	return network
   176  }
   177  
   178  func (n *network) Init(opts ...Option) error {
   179  	n.Lock()
   180  	defer n.Unlock()
   181  
   182  	// TODO: maybe only allow reinit of certain opts
   183  	for _, o := range opts {
   184  		o(&n.options)
   185  	}
   186  
   187  	return nil
   188  }
   189  
   190  // Options returns network options
   191  func (n *network) Options() Options {
   192  	n.RLock()
   193  	defer n.RUnlock()
   194  
   195  	options := n.options
   196  
   197  	return options
   198  }
   199  
   200  // Name returns network name
   201  func (n *network) Name() string {
   202  	return n.options.Name
   203  }
   204  
   205  func (n *network) initNodes(startup bool) {
   206  	nodes, err := n.resolveNodes()
   207  	if err != nil && !startup {
   208  		log.Debugf("Network failed to resolve nodes: %v", err)
   209  		return
   210  	}
   211  
   212  	// initialize the tunnel
   213  	log.Tracef("Network initialising nodes %+v\n", nodes)
   214  
   215  	n.tunnel.Init(
   216  		tunnel.Nodes(nodes...),
   217  	)
   218  }
   219  
   220  // resolveNodes resolves network nodes to addresses
   221  func (n *network) resolveNodes() ([]string, error) {
   222  	// resolve the network address to network nodes
   223  	records, err := n.options.Resolver.Resolve(n.options.Name)
   224  	if err != nil {
   225  		log.Debugf("Network failed to resolve nodes: %v", err)
   226  	}
   227  
   228  	// keep processing
   229  
   230  	nodeMap := make(map[string]bool)
   231  
   232  	// collect network node addresses
   233  	//nolint:prealloc
   234  	var nodes []string
   235  	var i int
   236  
   237  	for _, record := range records {
   238  		if _, ok := nodeMap[record.Address]; ok {
   239  			continue
   240  		}
   241  
   242  		nodeMap[record.Address] = true
   243  		nodes = append(nodes, record.Address)
   244  
   245  		i++
   246  
   247  		// break once MaxConnection nodes has been reached
   248  		if i == MaxConnections {
   249  			break
   250  		}
   251  	}
   252  
   253  	// use the dns resolver to expand peers
   254  	dns := &dns.Resolver{}
   255  
   256  	// append seed nodes if we have them
   257  	for _, node := range n.options.Nodes {
   258  		// resolve anything that looks like a host name
   259  		records, err := dns.Resolve(node)
   260  		if err != nil {
   261  			log.Debugf("Failed to resolve %v %v", node, err)
   262  			continue
   263  		}
   264  
   265  		// add to the node map
   266  		for _, record := range records {
   267  			if _, ok := nodeMap[record.Address]; !ok {
   268  				nodes = append(nodes, record.Address)
   269  			}
   270  		}
   271  	}
   272  
   273  	return nodes, nil
   274  }
   275  
   276  // handleNetConn handles network announcement messages
   277  func (n *network) handleNetConn(s tunnel.Session, msg chan *message) {
   278  	for {
   279  		m := new(transport.Message)
   280  		if err := s.Recv(m); err != nil {
   281  			log.Debugf("Network tunnel [%s] receive error: %v", NetworkChannel, err)
   282  			switch err {
   283  			case io.EOF, tunnel.ErrReadTimeout:
   284  				s.Close()
   285  				return
   286  			}
   287  			continue
   288  		}
   289  
   290  		// check if peer is set
   291  		peer := m.Header["Micro-Peer"]
   292  
   293  		// check who the message is intended for
   294  		if len(peer) > 0 && peer != n.options.Id {
   295  			continue
   296  		}
   297  
   298  		select {
   299  		case msg <- &message{
   300  			msg:     m,
   301  			session: s,
   302  		}:
   303  		case <-n.closed:
   304  			return
   305  		}
   306  	}
   307  }
   308  
   309  // acceptNetConn accepts connections from NetworkChannel
   310  func (n *network) acceptNetConn(l tunnel.Listener, recv chan *message) {
   311  	var i int
   312  	for {
   313  		// accept a connection
   314  		conn, err := l.Accept()
   315  		if err != nil {
   316  			sleep := backoff.Do(i)
   317  			log.Debugf("Network tunnel [%s] accept error: %v, backing off for %v", ControlChannel, err, sleep)
   318  			time.Sleep(sleep)
   319  			if i > 5 {
   320  				i = 0
   321  			}
   322  			i++
   323  			continue
   324  		}
   325  
   326  		select {
   327  		case <-n.closed:
   328  			if err := conn.Close(); err != nil {
   329  				log.Debugf("Network tunnel [%s] failed to close connection: %v", NetworkChannel, err)
   330  			}
   331  			return
   332  		default:
   333  			// go handle NetworkChannel connection
   334  			go n.handleNetConn(conn, recv)
   335  		}
   336  	}
   337  }
   338  
   339  // processNetChan processes messages received on NetworkChannel
   340  func (n *network) processNetChan(listener tunnel.Listener) {
   341  	defer listener.Close()
   342  
   343  	// receive network message queue
   344  	recv := make(chan *message, 128)
   345  
   346  	// accept NetworkChannel connections
   347  	go n.acceptNetConn(listener, recv)
   348  
   349  	for {
   350  		select {
   351  		case m := <-recv:
   352  			// switch on type of message and take action
   353  			switch m.msg.Header["Micro-Method"] {
   354  			case "connect":
   355  				// mark the time the message has been received
   356  				now := time.Now()
   357  				pbNetConnect := &pbNet.Connect{}
   358  
   359  				if err := proto.Unmarshal(m.msg.Body, pbNetConnect); err != nil {
   360  					log.Debugf("Network tunnel [%s] connect unmarshal error: %v", NetworkChannel, err)
   361  					continue
   362  				}
   363  
   364  				// don't process your own messages
   365  				if pbNetConnect.Node.Id == n.options.Id {
   366  					continue
   367  				}
   368  
   369  				log.Debugf("Network received connect message from: %s", pbNetConnect.Node.Id)
   370  
   371  				peer := &node{
   372  					id:       pbNetConnect.Node.Id,
   373  					address:  pbNetConnect.Node.Address,
   374  					peers:    make(map[string]*node),
   375  					lastSeen: now,
   376  				}
   377  
   378  				// update peer links
   379  
   380  				if err := n.updatePeerLinks(pbNetConnect.Node.Address, m); err != nil {
   381  					log.Debugf("Network failed updating peer links: %s", err)
   382  				}
   383  
   384  				// add peer to the list of node peers
   385  				if err := n.node.AddPeer(peer); err == ErrPeerExists {
   386  					log.Debugf("Network peer exists, refreshing: %s", peer.id)
   387  					// update lastSeen time for the existing node
   388  					if err := n.RefreshPeer(peer.id, now); err != nil {
   389  						log.Debugf("Network failed refreshing peer %s: %v", peer.id, err)
   390  					}
   391  				}
   392  
   393  				// we send the peer message because someone has sent connect
   394  				// and wants to know what's on the network. The faster we
   395  				// respond the faster we start to converge
   396  
   397  				// get node peers down to MaxDepth encoded in protobuf
   398  				msg := PeersToProto(n.node, MaxDepth)
   399  				node := pbNetConnect.Node.Id
   400  
   401  				// advertise yourself to the network
   402  				if err := n.sendTo("peer", NetworkChannel, node, msg); err != nil {
   403  					log.Debugf("Network failed to advertise peers: %v", err)
   404  				}
   405  
   406  				// advertise all the routes when a new node has connected
   407  				if err := n.router.Solicit(); err != nil {
   408  					log.Debugf("Network failed to solicit routes: %s", err)
   409  				}
   410  
   411  				// specify that we're soliciting
   412  				select {
   413  				case n.solicited <- node:
   414  				default:
   415  					// don't block
   416  				}
   417  			case "peer":
   418  				// mark the time the message has been received
   419  				now := time.Now()
   420  				pbNetPeer := &pbNet.Peer{}
   421  
   422  				if err := proto.Unmarshal(m.msg.Body, pbNetPeer); err != nil {
   423  					log.Debugf("Network tunnel [%s] peer unmarshal error: %v", NetworkChannel, err)
   424  					continue
   425  				}
   426  
   427  				// don't process your own messages
   428  				if pbNetPeer.Node.Id == n.options.Id {
   429  					continue
   430  				}
   431  
   432  				log.Debugf("Network received peer message from: %s %s", pbNetPeer.Node.Id, pbNetPeer.Node.Address)
   433  
   434  				peer := &node{
   435  					id:       pbNetPeer.Node.Id,
   436  					address:  pbNetPeer.Node.Address,
   437  					peers:    make(map[string]*node),
   438  					lastSeen: now,
   439  				}
   440  
   441  				// update peer links
   442  
   443  				if err := n.updatePeerLinks(pbNetPeer.Node.Address, m); err != nil {
   444  					log.Debugf("Network failed updating peer links: %s", err)
   445  				}
   446  
   447  				if err := n.node.AddPeer(peer); err == nil {
   448  					// send a solicit message when discovering new peer
   449  					msg := &pbRtr.Solicit{
   450  						Id: n.options.Id,
   451  					}
   452  
   453  					node := pbNetPeer.Node.Id
   454  
   455  					// only solicit this peer
   456  					if err := n.sendTo("solicit", ControlChannel, node, msg); err != nil {
   457  						log.Debugf("Network failed to send solicit message: %s", err)
   458  					}
   459  
   460  					continue
   461  					// we're expecting any error to be ErrPeerExists
   462  				} else if err != ErrPeerExists {
   463  					log.Debugf("Network got error adding peer %v", err)
   464  					continue
   465  				}
   466  
   467  				log.Debugf("Network peer exists, refreshing: %s", pbNetPeer.Node.Id)
   468  
   469  				// update lastSeen time for the peer
   470  				if err := n.RefreshPeer(pbNetPeer.Node.Id, now); err != nil {
   471  					log.Debugf("Network failed refreshing peer %s: %v", pbNetPeer.Node.Id, err)
   472  				}
   473  
   474  				// NOTE: we don't unpack MaxDepth toplogy
   475  				peer = UnpackPeerTopology(pbNetPeer, now, MaxDepth-1)
   476  				log.Tracef("Network updating topology of node: %s", n.node.id)
   477  				if err := n.node.UpdatePeer(peer); err != nil {
   478  					log.Debugf("Network failed to update peers: %v", err)
   479  				}
   480  
   481  				// tell the connect loop that we've been discovered
   482  				// so it stops sending connect messages out
   483  				select {
   484  				case n.discovered <- true:
   485  				default:
   486  					// don't block here
   487  				}
   488  			case "close":
   489  				pbNetClose := &pbNet.Close{}
   490  				if err := proto.Unmarshal(m.msg.Body, pbNetClose); err != nil {
   491  					log.Debugf("Network tunnel [%s] close unmarshal error: %v", NetworkChannel, err)
   492  					continue
   493  				}
   494  
   495  				// don't process your own messages
   496  				if pbNetClose.Node.Id == n.options.Id {
   497  					continue
   498  				}
   499  
   500  				log.Debugf("Network received close message from: %s", pbNetClose.Node.Id)
   501  
   502  				peer := &node{
   503  					id:      pbNetClose.Node.Id,
   504  					address: pbNetClose.Node.Address,
   505  				}
   506  
   507  				if err := n.DeletePeerNode(peer.id); err != nil {
   508  					log.Debugf("Network failed to delete node %s routes: %v", peer.id, err)
   509  				}
   510  
   511  				if err := n.prunePeerRoutes(peer); err != nil {
   512  					log.Debugf("Network failed pruning peer %s routes: %v", peer.id, err)
   513  				}
   514  
   515  				// delete peer from the peerLinks
   516  				n.Lock()
   517  				delete(n.peerLinks, pbNetClose.Node.Address)
   518  				n.Unlock()
   519  			}
   520  		case <-n.closed:
   521  			return
   522  		}
   523  	}
   524  }
   525  
   526  // pruneRoutes prunes routes return by given query
   527  func (n *network) pruneRoutes(q ...router.QueryOption) error {
   528  	routes, err := n.router.Table().Query(q...)
   529  	if err != nil && err != router.ErrRouteNotFound {
   530  		return err
   531  	}
   532  
   533  	for _, route := range routes {
   534  		if err := n.router.Table().Delete(route); err != nil && err != router.ErrRouteNotFound {
   535  			return err
   536  		}
   537  	}
   538  
   539  	return nil
   540  }
   541  
   542  // pruneNodeRoutes prunes routes that were either originated by or routable via given node
   543  func (n *network) prunePeerRoutes(peer *node) error {
   544  	// lookup all routes originated by router
   545  	q := []router.QueryOption{
   546  		router.QueryRouter(peer.id),
   547  	}
   548  	if err := n.pruneRoutes(q...); err != nil {
   549  		return err
   550  	}
   551  
   552  	// lookup all routes routable via gw
   553  	q = []router.QueryOption{
   554  		router.QueryGateway(peer.address),
   555  	}
   556  	if err := n.pruneRoutes(q...); err != nil {
   557  		return err
   558  	}
   559  
   560  	return nil
   561  }
   562  
   563  // manage the process of announcing to peers and prune any peer nodes that have not been
   564  // seen for a period of time. Also removes all the routes either originated by or routable
   565  //by the stale nodes. it also resolves nodes periodically and adds them to the tunnel
   566  func (n *network) manage() {
   567  	announce := time.NewTicker(AnnounceTime)
   568  	defer announce.Stop()
   569  	prune := time.NewTicker(PruneTime)
   570  	defer prune.Stop()
   571  	resolve := time.NewTicker(ResolveTime)
   572  	defer resolve.Stop()
   573  
   574  	for {
   575  		select {
   576  		case <-n.closed:
   577  			return
   578  		case <-announce.C:
   579  			msg := PeersToProto(n.node, MaxDepth)
   580  			// advertise yourself to the network
   581  			if err := n.sendMsg("peer", NetworkChannel, msg); err != nil {
   582  				log.Debugf("Network failed to advertise peers: %v", err)
   583  			}
   584  		case <-prune.C:
   585  			pruned := n.PruneStalePeers(PruneTime)
   586  
   587  			for id, peer := range pruned {
   588  				log.Debugf("Network peer exceeded prune time: %s", id)
   589  
   590  				n.Lock()
   591  				delete(n.peerLinks, peer.address)
   592  				n.Unlock()
   593  
   594  				if err := n.prunePeerRoutes(peer); err != nil {
   595  					log.Debugf("Network failed pruning peer %s routes: %v", id, err)
   596  				}
   597  			}
   598  
   599  			// get a list of all routes
   600  			routes, err := n.options.Router.Table().List()
   601  			if err != nil {
   602  				log.Debugf("Network failed listing routes when pruning peers: %v", err)
   603  				continue
   604  			}
   605  
   606  			// collect all the router IDs in the routing table
   607  			routers := make(map[string]bool)
   608  
   609  			for _, route := range routes {
   610  				// check if its been processed
   611  				if _, ok := routers[route.Router]; ok {
   612  					continue
   613  				}
   614  
   615  				// mark as processed
   616  				routers[route.Router] = true
   617  
   618  				// if the router is NOT in our peer graph, delete all routes originated by it
   619  				if peer := n.node.GetPeerNode(route.Router); peer != nil {
   620  					continue
   621  				}
   622  
   623  				if err := n.pruneRoutes(router.QueryRouter(route.Router)); err != nil {
   624  					log.Debugf("Network failed deleting routes by %s: %v", route.Router, err)
   625  				}
   626  			}
   627  		case <-resolve.C:
   628  			n.initNodes(false)
   629  		}
   630  	}
   631  }
   632  
   633  // sendTo sends a message to a specific node as a one off.
   634  // we need this because when links die, we have no discovery info,
   635  // and sending to an existing multicast link doesn't immediately work
   636  func (n *network) sendTo(method, channel, peer string, msg proto.Message) error {
   637  	body, err := proto.Marshal(msg)
   638  	if err != nil {
   639  		return err
   640  	}
   641  	c, err := n.tunnel.Dial(channel, tunnel.DialMode(tunnel.Multicast))
   642  	if err != nil {
   643  		return err
   644  	}
   645  	defer c.Close()
   646  
   647  	log.Debugf("Network sending %s message from: %s to %s", method, n.options.Id, peer)
   648  
   649  	return c.Send(&transport.Message{
   650  		Header: map[string]string{
   651  			"Micro-Method": method,
   652  			"Micro-Peer":   peer,
   653  		},
   654  		Body: body,
   655  	})
   656  }
   657  
   658  // sendMsg sends a message to the tunnel channel
   659  func (n *network) sendMsg(method, channel string, msg proto.Message) error {
   660  	body, err := proto.Marshal(msg)
   661  	if err != nil {
   662  		return err
   663  	}
   664  
   665  	// check if the channel client is initialized
   666  	n.RLock()
   667  	client, ok := n.tunClient[channel]
   668  	if !ok || client == nil {
   669  		n.RUnlock()
   670  		return ErrClientNotFound
   671  	}
   672  	n.RUnlock()
   673  
   674  	log.Debugf("Network sending %s message from: %s", method, n.options.Id)
   675  
   676  	return client.Send(&transport.Message{
   677  		Header: map[string]string{
   678  			"Micro-Method": method,
   679  		},
   680  		Body: body,
   681  	})
   682  }
   683  
   684  // updatePeerLinks updates link for a given peer
   685  func (n *network) updatePeerLinks(peerAddr string, m *message) error {
   686  	n.Lock()
   687  	defer n.Unlock()
   688  
   689  	linkId := m.msg.Header["Micro-Link"]
   690  
   691  	log.Tracef("Network looking up link %s in the peer links", linkId)
   692  
   693  	// lookup the peer link
   694  	var peerLink tunnel.Link
   695  
   696  	for _, link := range n.tunnel.Links() {
   697  		if link.Id() == linkId {
   698  			peerLink = link
   699  			break
   700  		}
   701  	}
   702  
   703  	if peerLink == nil {
   704  		return ErrPeerLinkNotFound
   705  	}
   706  
   707  	// if the peerLink is found in the returned links update peerLinks
   708  	log.Tracef("Network updating peer links for peer %s", peerAddr)
   709  
   710  	// add peerLink to the peerLinks map
   711  	if link, ok := n.peerLinks[peerAddr]; ok {
   712  		// if the existing has better Length then the new, replace it
   713  		if link.Length() < peerLink.Length() {
   714  			n.peerLinks[peerAddr] = peerLink
   715  		}
   716  	} else {
   717  		n.peerLinks[peerAddr] = peerLink
   718  	}
   719  
   720  	return nil
   721  }
   722  
   723  // handleCtrlConn handles ControlChannel connections
   724  func (n *network) handleCtrlConn(s tunnel.Session, msg chan *message) {
   725  	for {
   726  		m := new(transport.Message)
   727  		if err := s.Recv(m); err != nil {
   728  			log.Debugf("Network tunnel [%s] receive error: %v", ControlChannel, err)
   729  			switch err {
   730  			case io.EOF, tunnel.ErrReadTimeout:
   731  				s.Close()
   732  				return
   733  			}
   734  			continue
   735  		}
   736  
   737  		// check if peer is set
   738  		peer := m.Header["Micro-Peer"]
   739  
   740  		// check who the message is intended for
   741  		if len(peer) > 0 && peer != n.options.Id {
   742  			continue
   743  		}
   744  
   745  		select {
   746  		case msg <- &message{
   747  			msg:     m,
   748  			session: s,
   749  		}:
   750  		case <-n.closed:
   751  			return
   752  		}
   753  	}
   754  }
   755  
   756  // acceptCtrlConn accepts connections from ControlChannel
   757  func (n *network) acceptCtrlConn(l tunnel.Listener, recv chan *message) {
   758  	var i int
   759  	for {
   760  		// accept a connection
   761  		conn, err := l.Accept()
   762  		if err != nil {
   763  			sleep := backoff.Do(i)
   764  			log.Debugf("Network tunnel [%s] accept error: %v, backing off for %v", ControlChannel, err, sleep)
   765  			time.Sleep(sleep)
   766  			if i > 5 {
   767  				// reset the counter
   768  				i = 0
   769  			}
   770  			i++
   771  			continue
   772  		}
   773  
   774  		select {
   775  		case <-n.closed:
   776  			if err := conn.Close(); err != nil {
   777  				log.Debugf("Network tunnel [%s] failed to close connection: %v", ControlChannel, err)
   778  			}
   779  			return
   780  		default:
   781  			// go handle ControlChannel connection
   782  			go n.handleCtrlConn(conn, recv)
   783  		}
   784  	}
   785  }
   786  
   787  // getHopCount queries network graph and returns hop count for given router
   788  // - Routes for local services have hop count 1
   789  // - Routes with ID of adjacent nodes have hop count 2
   790  // - Routes by peers of the advertiser have hop count 3
   791  // - Routes beyond node neighbourhood have hop count 4
   792  func (n *network) getHopCount(rtr string) int {
   793  	// make sure node.peers are not modified
   794  	n.node.RLock()
   795  	defer n.node.RUnlock()
   796  
   797  	// we are the origin of the route
   798  	if rtr == n.options.Id {
   799  		return 1
   800  	}
   801  
   802  	// the route origin is our peer
   803  	if _, ok := n.peers[rtr]; ok {
   804  		return 10
   805  	}
   806  
   807  	// the route origin is the peer of our peer
   808  	for _, peer := range n.peers {
   809  		for id := range peer.peers {
   810  			if rtr == id {
   811  				return 100
   812  			}
   813  		}
   814  	}
   815  	// otherwise we are three hops away
   816  	return 1000
   817  }
   818  
   819  // getRouteMetric calculates router metric and returns it
   820  // Route metric is calculated based on link status and route hopd count
   821  func (n *network) getRouteMetric(router string, gateway string, link string) int64 {
   822  	// set the route metric
   823  	n.RLock()
   824  	defer n.RUnlock()
   825  
   826  	if link == "local" && gateway == "" {
   827  		return 1
   828  	}
   829  
   830  	if link == "local" && gateway != "" {
   831  		return 2
   832  	}
   833  
   834  	log.Tracef("Network looking up %s link to gateway: %s", link, gateway)
   835  	if link, ok := n.peerLinks[gateway]; ok {
   836  		// maka sure delay is non-zero
   837  		delay := link.Delay()
   838  		if delay == 0 {
   839  			delay = 1
   840  		}
   841  		// get the route hop count
   842  		hops := n.getHopCount(router)
   843  		// make sure length is non-zero
   844  		length := link.Length()
   845  		if length == 0 {
   846  			log.Debugf("Link length is 0 %v %v", link, link.Length())
   847  			length = 10e9
   848  		}
   849  		log.Tracef("Network calculated metric %v delay %v length %v distance %v", (delay*length*int64(hops))/10e6, delay, length, hops)
   850  		return (delay * length * int64(hops)) / 10e6
   851  	}
   852  
   853  	log.Debugf("Network failed to find a link to gateway: %s", gateway)
   854  
   855  	return math.MaxInt64
   856  }
   857  
   858  // processCtrlChan processes messages received on ControlChannel
   859  func (n *network) processCtrlChan(listener tunnel.Listener) {
   860  	defer listener.Close()
   861  
   862  	// receive control message queue
   863  	recv := make(chan *message, 128)
   864  
   865  	// accept ControlChannel cconnections
   866  	go n.acceptCtrlConn(listener, recv)
   867  
   868  	for {
   869  		select {
   870  		case m := <-recv:
   871  			// switch on type of message and take action
   872  			switch m.msg.Header["Micro-Method"] {
   873  			case "advert":
   874  				pbRtrAdvert := &pbRtr.Advert{}
   875  
   876  				if err := proto.Unmarshal(m.msg.Body, pbRtrAdvert); err != nil {
   877  					log.Debugf("Network fail to unmarshal advert message: %v", err)
   878  					continue
   879  				}
   880  
   881  				// don't process your own messages
   882  				if pbRtrAdvert.Id == n.options.Id {
   883  					continue
   884  				}
   885  
   886  				log.Debugf("Network received advert message from: %s", pbRtrAdvert.Id)
   887  
   888  				// loookup advertising node in our peer topology
   889  				advertNode := n.node.GetPeerNode(pbRtrAdvert.Id)
   890  				if advertNode == nil {
   891  					// if we can't find the node in our topology (MaxDepth) we skipp prcessing adverts
   892  					log.Debugf("Network skipping advert message from unknown peer: %s", pbRtrAdvert.Id)
   893  					continue
   894  				}
   895  
   896  				var events []*router.Event
   897  
   898  				for _, event := range pbRtrAdvert.Events {
   899  					// we know the advertising node is not the origin of the route
   900  					if pbRtrAdvert.Id != event.Route.Router {
   901  						// if the origin router is not the advertising node peer
   902  						// we can't rule out potential routing loops so we bail here
   903  						if peer := advertNode.GetPeerNode(event.Route.Router); peer == nil {
   904  							log.Debugf("Network skipping advert message from peer: %s", pbRtrAdvert.Id)
   905  							continue
   906  						}
   907  					}
   908  
   909  					route := router.Route{
   910  						Service: event.Route.Service,
   911  						Address: event.Route.Address,
   912  						Gateway: event.Route.Gateway,
   913  						Network: event.Route.Network,
   914  						Router:  event.Route.Router,
   915  						Link:    event.Route.Link,
   916  						Metric:  event.Route.Metric,
   917  					}
   918  
   919  					// calculate route metric and add to the advertised metric
   920  					// we need to make sure we do not overflow math.MaxInt64
   921  					metric := n.getRouteMetric(event.Route.Router, event.Route.Gateway, event.Route.Link)
   922  					log.Tracef("Network metric for router %s and gateway %s: %v", event.Route.Router, event.Route.Gateway, metric)
   923  
   924  					// check we don't overflow max int 64
   925  					if d := route.Metric + metric; d <= 0 {
   926  						// set to max int64 if we overflow
   927  						route.Metric = math.MaxInt64
   928  					} else {
   929  						// set the combined value of metrics otherwise
   930  						route.Metric = d
   931  					}
   932  
   933  					// create router event
   934  					e := &router.Event{
   935  						Type:      router.EventType(event.Type),
   936  						Timestamp: time.Unix(0, pbRtrAdvert.Timestamp),
   937  						Route:     route,
   938  					}
   939  					events = append(events, e)
   940  				}
   941  
   942  				// if no events are eligible for processing continue
   943  				if len(events) == 0 {
   944  					log.Tracef("Network no events to be processed by router: %s", n.options.Id)
   945  					continue
   946  				}
   947  
   948  				// create an advert and process it
   949  				advert := &router.Advert{
   950  					Id:        pbRtrAdvert.Id,
   951  					Type:      router.AdvertType(pbRtrAdvert.Type),
   952  					Timestamp: time.Unix(0, pbRtrAdvert.Timestamp),
   953  					TTL:       time.Duration(pbRtrAdvert.Ttl),
   954  					Events:    events,
   955  				}
   956  
   957  				log.Debugf("Network router %s processing advert: %s", n.Id(), advert.Id)
   958  				if err := n.router.Process(advert); err != nil {
   959  					log.Debugf("Network failed to process advert %s: %v", advert.Id, err)
   960  				}
   961  			case "solicit":
   962  				pbRtrSolicit := &pbRtr.Solicit{}
   963  				if err := proto.Unmarshal(m.msg.Body, pbRtrSolicit); err != nil {
   964  					log.Debugf("Network fail to unmarshal solicit message: %v", err)
   965  					continue
   966  				}
   967  
   968  				log.Debugf("Network received solicit message from: %s", pbRtrSolicit.Id)
   969  
   970  				// ignore solicitation when requested by you
   971  				if pbRtrSolicit.Id == n.options.Id {
   972  					continue
   973  				}
   974  
   975  				log.Debugf("Network router flushing routes for: %s", pbRtrSolicit.Id)
   976  
   977  				// advertise all the routes when a new node has connected
   978  				if err := n.router.Solicit(); err != nil {
   979  					log.Debugf("Network failed to solicit routes: %s", err)
   980  				}
   981  
   982  				// specify that someone solicited the route
   983  				select {
   984  				case n.solicited <- pbRtrSolicit.Id:
   985  				default:
   986  					// don't block
   987  				}
   988  			}
   989  		case <-n.closed:
   990  			return
   991  		}
   992  	}
   993  }
   994  
   995  // advertise advertises routes to the network
   996  func (n *network) advertise(advertChan <-chan *router.Advert) {
   997  	hasher := fnv.New64()
   998  	for {
   999  		select {
  1000  		// process local adverts and randomly fire them at other nodes
  1001  		case advert := <-advertChan:
  1002  			// create a proto advert
  1003  			var events []*pbRtr.Event
  1004  
  1005  			for _, event := range advert.Events {
  1006  				// the routes service address
  1007  				address := event.Route.Address
  1008  
  1009  				// only hash the address if we're advertising our own local routes
  1010  				if event.Route.Router == advert.Id {
  1011  					// hash the service before advertising it
  1012  					hasher.Reset()
  1013  					// routes for multiple instances of a service will be collapsed here.
  1014  					// TODO: once we store labels in the table this may need to change
  1015  					// to include the labels in case they differ but highly unlikely
  1016  					hasher.Write([]byte(event.Route.Service + n.node.Address()))
  1017  					address = fmt.Sprintf("%d", hasher.Sum64())
  1018  				}
  1019  				// calculate route metric to advertise
  1020  				metric := n.getRouteMetric(event.Route.Router, event.Route.Gateway, event.Route.Link)
  1021  				// NOTE: we override Gateway, Link and Address here
  1022  				route := &pbRtr.Route{
  1023  					Service: event.Route.Service,
  1024  					Address: address,
  1025  					Gateway: n.node.Address(),
  1026  					Network: event.Route.Network,
  1027  					Router:  event.Route.Router,
  1028  					Link:    DefaultLink,
  1029  					Metric:  metric,
  1030  				}
  1031  				e := &pbRtr.Event{
  1032  					Type:      pbRtr.EventType(event.Type),
  1033  					Timestamp: event.Timestamp.UnixNano(),
  1034  					Route:     route,
  1035  				}
  1036  				events = append(events, e)
  1037  			}
  1038  
  1039  			msg := &pbRtr.Advert{
  1040  				Id:        advert.Id,
  1041  				Type:      pbRtr.AdvertType(advert.Type),
  1042  				Timestamp: advert.Timestamp.UnixNano(),
  1043  				Events:    events,
  1044  			}
  1045  
  1046  			// send the advert to all on the control channel
  1047  			// since its not a solicitation
  1048  			if advert.Type != router.Solicitation {
  1049  				if err := n.sendMsg("advert", ControlChannel, msg); err != nil {
  1050  					log.Debugf("Network failed to advertise routes: %v", err)
  1051  				}
  1052  				continue
  1053  			}
  1054  
  1055  			// it's a solication, someone asked for it
  1056  			// so we're going to pick off the node and send it
  1057  			select {
  1058  			case node := <-n.solicited:
  1059  				// someone requested the route
  1060  				n.sendTo("advert", ControlChannel, node, msg)
  1061  			default:
  1062  				// send to all since we can't get anything
  1063  				n.sendMsg("advert", ControlChannel, msg)
  1064  			}
  1065  		case <-n.closed:
  1066  			return
  1067  		}
  1068  	}
  1069  }
  1070  
  1071  func (n *network) sendConnect() {
  1072  	// send connect message to NetworkChannel
  1073  	// NOTE: in theory we could do this as soon as
  1074  	// Dial to NetworkChannel succeeds, but instead
  1075  	// we initialize all other node resources first
  1076  	msg := &pbNet.Connect{
  1077  		Node: &pbNet.Node{
  1078  			Id:      n.node.id,
  1079  			Address: n.node.address,
  1080  		},
  1081  	}
  1082  
  1083  	if err := n.sendMsg("connect", NetworkChannel, msg); err != nil {
  1084  		log.Debugf("Network failed to send connect message: %s", err)
  1085  	}
  1086  }
  1087  
  1088  // connect will wait for a link to be established and send the connect
  1089  // message. We're trying to ensure convergence pretty quickly. So we want
  1090  // to hear back. In the case we become completely disconnected we'll
  1091  // connect again once a new link is established
  1092  func (n *network) connect() {
  1093  	// discovered lets us know what we received a peer message back
  1094  	var discovered bool
  1095  	var attempts int
  1096  
  1097  	// our advertise address
  1098  	loopback := n.server.Options().Advertise
  1099  	// actual address
  1100  	address := n.tunnel.Address()
  1101  
  1102  	for {
  1103  		// connected is used to define if the link is connected
  1104  		var connected bool
  1105  
  1106  		// check the links state
  1107  		for _, link := range n.tunnel.Links() {
  1108  			// skip loopback
  1109  			if link.Loopback() {
  1110  				continue
  1111  			}
  1112  
  1113  			// if remote is ourselves
  1114  			switch link.Remote() {
  1115  			case loopback, address:
  1116  				continue
  1117  			}
  1118  
  1119  			if link.State() == "connected" {
  1120  				connected = true
  1121  				break
  1122  			}
  1123  		}
  1124  
  1125  		// if we're not connected wait
  1126  		if !connected {
  1127  			// reset discovered
  1128  			discovered = false
  1129  			// sleep for a second
  1130  			time.Sleep(time.Second)
  1131  			// now try again
  1132  			continue
  1133  		}
  1134  
  1135  		// we're connected but are we discovered?
  1136  		if !discovered {
  1137  			// recreate the clients because all the tunnel links are gone
  1138  			// so we haven't send discovery beneath
  1139  			if err := n.createClients(); err != nil {
  1140  				log.Debugf("Failed to recreate network/control clients: %v", err)
  1141  				continue
  1142  			}
  1143  
  1144  			// send the connect message
  1145  			n.sendConnect()
  1146  		}
  1147  
  1148  		// check if we've been discovered
  1149  		select {
  1150  		case <-n.discovered:
  1151  			discovered = true
  1152  			attempts = 0
  1153  		case <-n.closed:
  1154  			return
  1155  		case <-time.After(time.Second + backoff.Do(attempts)):
  1156  			// we have to try again
  1157  			attempts++
  1158  
  1159  			// reset attempts 5 == ~2mins
  1160  			if attempts > 5 {
  1161  				attempts = 0
  1162  			}
  1163  		}
  1164  	}
  1165  }
  1166  
  1167  // Connect connects the network
  1168  func (n *network) Connect() error {
  1169  	n.Lock()
  1170  	defer n.Unlock()
  1171  
  1172  	// connect network tunnel
  1173  	if err := n.tunnel.Connect(); err != nil {
  1174  		return err
  1175  	}
  1176  
  1177  	// return if already connected
  1178  	if n.connected {
  1179  		// initialise the nodes
  1180  		n.initNodes(false)
  1181  		// send the connect message
  1182  		go n.sendConnect()
  1183  		return nil
  1184  	}
  1185  
  1186  	// initialise the nodes
  1187  	n.initNodes(true)
  1188  
  1189  	// set our internal node address
  1190  	// if advertise address is not set
  1191  	if len(n.options.Advertise) == 0 {
  1192  		n.server.Init(server.Advertise(n.tunnel.Address()))
  1193  	}
  1194  
  1195  	// listen on NetworkChannel
  1196  	netListener, err := n.tunnel.Listen(
  1197  		NetworkChannel,
  1198  		tunnel.ListenMode(tunnel.Multicast),
  1199  		tunnel.ListenTimeout(AnnounceTime*2),
  1200  	)
  1201  	if err != nil {
  1202  		return err
  1203  	}
  1204  
  1205  	// listen on ControlChannel
  1206  	ctrlListener, err := n.tunnel.Listen(
  1207  		ControlChannel,
  1208  		tunnel.ListenMode(tunnel.Multicast),
  1209  		tunnel.ListenTimeout(router.AdvertiseTableTick*2),
  1210  	)
  1211  	if err != nil {
  1212  		return err
  1213  	}
  1214  
  1215  	// dial into ControlChannel to send route adverts
  1216  	ctrlClient, err := n.tunnel.Dial(ControlChannel, tunnel.DialMode(tunnel.Multicast))
  1217  	if err != nil {
  1218  		return err
  1219  	}
  1220  
  1221  	n.tunClient[ControlChannel] = ctrlClient
  1222  
  1223  	// dial into NetworkChannel to send network messages
  1224  	netClient, err := n.tunnel.Dial(NetworkChannel, tunnel.DialMode(tunnel.Multicast))
  1225  	if err != nil {
  1226  		return err
  1227  	}
  1228  
  1229  	n.tunClient[NetworkChannel] = netClient
  1230  
  1231  	// create closed channel
  1232  	n.closed = make(chan bool)
  1233  
  1234  	// start the router
  1235  	if err := n.options.Router.Start(); err != nil {
  1236  		return err
  1237  	}
  1238  
  1239  	// start advertising routes
  1240  	advertChan, err := n.options.Router.Advertise()
  1241  	if err != nil {
  1242  		return err
  1243  	}
  1244  
  1245  	// start the server
  1246  	if err := n.server.Start(); err != nil {
  1247  		return err
  1248  	}
  1249  
  1250  	// advertise service routes
  1251  	go n.advertise(advertChan)
  1252  	// listen to network messages
  1253  	go n.processNetChan(netListener)
  1254  	// accept and process routes
  1255  	go n.processCtrlChan(ctrlListener)
  1256  	// manage connection once links are established
  1257  	go n.connect()
  1258  	// resolve nodes, broadcast announcements and prune stale nodes
  1259  	go n.manage()
  1260  
  1261  	// we're now connected
  1262  	n.connected = true
  1263  
  1264  	return nil
  1265  }
  1266  
  1267  func (n *network) close() error {
  1268  	// stop the server
  1269  	if err := n.server.Stop(); err != nil {
  1270  		return err
  1271  	}
  1272  
  1273  	// stop the router
  1274  	if err := n.router.Stop(); err != nil {
  1275  		return err
  1276  	}
  1277  
  1278  	// close the tunnel
  1279  	if err := n.tunnel.Close(); err != nil {
  1280  		return err
  1281  	}
  1282  
  1283  	return nil
  1284  }
  1285  
  1286  // createClients is used to create new clients in the event we lose all the tunnels
  1287  func (n *network) createClients() error {
  1288  	// dial into ControlChannel to send route adverts
  1289  	ctrlClient, err := n.tunnel.Dial(ControlChannel, tunnel.DialMode(tunnel.Multicast))
  1290  	if err != nil {
  1291  		return err
  1292  	}
  1293  
  1294  	// dial into NetworkChannel to send network messages
  1295  	netClient, err := n.tunnel.Dial(NetworkChannel, tunnel.DialMode(tunnel.Multicast))
  1296  	if err != nil {
  1297  		return err
  1298  	}
  1299  
  1300  	n.Lock()
  1301  	defer n.Unlock()
  1302  
  1303  	// set the control client
  1304  	c, ok := n.tunClient[ControlChannel]
  1305  	if ok {
  1306  		c.Close()
  1307  	}
  1308  	n.tunClient[ControlChannel] = ctrlClient
  1309  
  1310  	// set the network client
  1311  	c, ok = n.tunClient[NetworkChannel]
  1312  	if ok {
  1313  		c.Close()
  1314  	}
  1315  	n.tunClient[NetworkChannel] = netClient
  1316  
  1317  	return nil
  1318  }
  1319  
  1320  // Close closes network connection
  1321  func (n *network) Close() error {
  1322  	n.Lock()
  1323  
  1324  	if !n.connected {
  1325  		n.Unlock()
  1326  		return nil
  1327  	}
  1328  
  1329  	select {
  1330  	case <-n.closed:
  1331  		n.Unlock()
  1332  		return nil
  1333  	default:
  1334  		// TODO: send close message to the network channel
  1335  		close(n.closed)
  1336  		// set connected to false
  1337  		n.connected = false
  1338  
  1339  		// unlock the lock otherwise we'll deadlock sending the close
  1340  		n.Unlock()
  1341  
  1342  		msg := &pbNet.Close{
  1343  			Node: &pbNet.Node{
  1344  				Id:      n.node.id,
  1345  				Address: n.node.address,
  1346  			},
  1347  		}
  1348  
  1349  		if err := n.sendMsg("close", NetworkChannel, msg); err != nil {
  1350  			log.Debugf("Network failed to send close message: %s", err)
  1351  		}
  1352  	}
  1353  
  1354  	return n.close()
  1355  }
  1356  
  1357  // Client returns network client
  1358  func (n *network) Client() client.Client {
  1359  	return n.client
  1360  }
  1361  
  1362  // Server returns network server
  1363  func (n *network) Server() server.Server {
  1364  	return n.server
  1365  }