github.com/annwntech/go-micro/v2@v2.9.5/network/default.go (about)

     1  package network
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"hash/fnv"
     7  	"io"
     8  	"math"
     9  	"math/rand"
    10  	"sort"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/golang/protobuf/proto"
    15  	"github.com/annwntech/go-micro/v2/client"
    16  	cmucp "github.com/annwntech/go-micro/v2/client/mucp"
    17  	rtr "github.com/annwntech/go-micro/v2/client/selector/router"
    18  	"github.com/annwntech/go-micro/v2/logger"
    19  	"github.com/annwntech/go-micro/v2/network/resolver/dns"
    20  	pbNet "github.com/annwntech/go-micro/v2/network/service/proto"
    21  	"github.com/annwntech/go-micro/v2/proxy"
    22  	"github.com/annwntech/go-micro/v2/router"
    23  	pbRtr "github.com/annwntech/go-micro/v2/router/service/proto"
    24  	"github.com/annwntech/go-micro/v2/server"
    25  	smucp "github.com/annwntech/go-micro/v2/server/mucp"
    26  	"github.com/annwntech/go-micro/v2/transport"
    27  	"github.com/annwntech/go-micro/v2/tunnel"
    28  	bun "github.com/annwntech/go-micro/v2/tunnel/broker"
    29  	tun "github.com/annwntech/go-micro/v2/tunnel/transport"
    30  	"github.com/annwntech/go-micro/v2/util/backoff"
    31  	pbUtil "github.com/annwntech/go-micro/v2/util/proto"
    32  )
    33  
    34  var (
    35  	// NetworkChannel is the name of the tunnel channel for passing network messages
    36  	NetworkChannel = "network"
    37  	// ControlChannel is the name of the tunnel channel for passing control message
    38  	ControlChannel = "control"
    39  	// DefaultLink is default network link
    40  	DefaultLink = "network"
    41  	// MaxConnections is the max number of network client connections
    42  	MaxConnections = 3
    43  	// MaxPeerErrors is the max number of peer errors before we remove it from network graph
    44  	MaxPeerErrors = 3
    45  )
    46  
    47  var (
    48  	// ErrClientNotFound is returned when client for tunnel channel could not be found
    49  	ErrClientNotFound = errors.New("client not found")
    50  	// ErrPeerLinkNotFound is returned when peer link could not be found in tunnel Links
    51  	ErrPeerLinkNotFound = errors.New("peer link not found")
    52  	// ErrPeerMaxExceeded is returned when peer has reached its max error count limit
    53  	ErrPeerMaxExceeded = errors.New("peer max errors exceeded")
    54  )
    55  
    56  // network implements Network interface
    57  type network struct {
    58  	// node is network node
    59  	*node
    60  	// options configure the network
    61  	options Options
    62  	// rtr is network router
    63  	router router.Router
    64  	// proxy is network proxy
    65  	proxy proxy.Proxy
    66  	// tunnel is network tunnel
    67  	tunnel tunnel.Tunnel
    68  	// server is network server
    69  	server server.Server
    70  	// client is network client
    71  	client client.Client
    72  
    73  	// tunClient is a map of tunnel channel clients
    74  	tunClient map[string]tunnel.Session
    75  	// peerLinks is a map of links for each peer
    76  	peerLinks map[string]tunnel.Link
    77  
    78  	sync.RWMutex
    79  	// connected marks the network as connected
    80  	connected bool
    81  	// closed closes the network
    82  	closed chan bool
    83  	// whether we've discovered by the network
    84  	discovered chan bool
    85  }
    86  
    87  // message is network message
    88  type message struct {
    89  	// msg is transport message
    90  	msg *transport.Message
    91  	// session is tunnel session
    92  	session tunnel.Session
    93  }
    94  
    95  // newNetwork returns a new network node
    96  func newNetwork(opts ...Option) Network {
    97  	// create default options
    98  	options := DefaultOptions()
    99  	// initialize network options
   100  	for _, o := range opts {
   101  		o(&options)
   102  	}
   103  
   104  	// set the address to a hashed address
   105  	hasher := fnv.New64()
   106  	hasher.Write([]byte(options.Address + options.Id))
   107  	address := fmt.Sprintf("%d", hasher.Sum64())
   108  
   109  	// set the address to advertise
   110  	var advertise string
   111  	var peerAddress string
   112  
   113  	if len(options.Advertise) > 0 {
   114  		advertise = options.Advertise
   115  		peerAddress = options.Advertise
   116  	} else {
   117  		advertise = options.Address
   118  		peerAddress = address
   119  	}
   120  
   121  	// init tunnel address to the network bind address
   122  	options.Tunnel.Init(
   123  		tunnel.Address(options.Address),
   124  	)
   125  
   126  	// init router Id to the network id
   127  	options.Router.Init(
   128  		router.Id(options.Id),
   129  		router.Address(peerAddress),
   130  	)
   131  
   132  	// create tunnel client with tunnel transport
   133  	tunTransport := tun.NewTransport(
   134  		tun.WithTunnel(options.Tunnel),
   135  	)
   136  
   137  	// create the tunnel broker
   138  	tunBroker := bun.NewBroker(
   139  		bun.WithTunnel(options.Tunnel),
   140  	)
   141  
   142  	// server is network server
   143  	server := smucp.NewServer(
   144  		server.Id(options.Id),
   145  		server.Address(peerAddress),
   146  		server.Advertise(advertise),
   147  		server.Name(options.Name),
   148  		server.Transport(tunTransport),
   149  		server.Broker(tunBroker),
   150  	)
   151  
   152  	// client is network client
   153  	client := cmucp.NewClient(
   154  		client.Broker(tunBroker),
   155  		client.Transport(tunTransport),
   156  		client.Selector(
   157  			rtr.NewSelector(
   158  				rtr.WithRouter(options.Router),
   159  			),
   160  		),
   161  	)
   162  
   163  	network := &network{
   164  		node: &node{
   165  			id:      options.Id,
   166  			address: peerAddress,
   167  			peers:   make(map[string]*node),
   168  			status:  newStatus(),
   169  		},
   170  		options:    options,
   171  		router:     options.Router,
   172  		proxy:      options.Proxy,
   173  		tunnel:     options.Tunnel,
   174  		server:     server,
   175  		client:     client,
   176  		tunClient:  make(map[string]tunnel.Session),
   177  		peerLinks:  make(map[string]tunnel.Link),
   178  		discovered: make(chan bool, 1),
   179  	}
   180  
   181  	network.node.network = network
   182  
   183  	return network
   184  }
   185  
   186  func (n *network) Init(opts ...Option) error {
   187  	n.Lock()
   188  	defer n.Unlock()
   189  
   190  	// TODO: maybe only allow reinit of certain opts
   191  	for _, o := range opts {
   192  		o(&n.options)
   193  	}
   194  
   195  	return nil
   196  }
   197  
   198  // Options returns network options
   199  func (n *network) Options() Options {
   200  	n.RLock()
   201  	defer n.RUnlock()
   202  
   203  	options := n.options
   204  
   205  	return options
   206  }
   207  
   208  // Name returns network name
   209  func (n *network) Name() string {
   210  	n.RLock()
   211  	defer n.RUnlock()
   212  
   213  	name := n.options.Name
   214  
   215  	return name
   216  }
   217  
   218  // acceptNetConn accepts connections from NetworkChannel
   219  func (n *network) acceptNetConn(l tunnel.Listener, recv chan *message) {
   220  	var i int
   221  	for {
   222  		// accept a connection
   223  		conn, err := l.Accept()
   224  		if err != nil {
   225  			sleep := backoff.Do(i)
   226  			logger.Debugf("Network tunnel [%s] accept error: %v, backing off for %v", ControlChannel, err, sleep)
   227  			time.Sleep(sleep)
   228  			i++
   229  			continue
   230  		}
   231  
   232  		select {
   233  		case <-n.closed:
   234  			if err := conn.Close(); err != nil {
   235  				logger.Debugf("Network tunnel [%s] failed to close connection: %v", NetworkChannel, err)
   236  			}
   237  			return
   238  		default:
   239  			// go handle NetworkChannel connection
   240  			go n.handleNetConn(conn, recv)
   241  		}
   242  	}
   243  }
   244  
   245  // acceptCtrlConn accepts connections from ControlChannel
   246  func (n *network) acceptCtrlConn(l tunnel.Listener, recv chan *message) {
   247  	var i int
   248  	for {
   249  		// accept a connection
   250  		conn, err := l.Accept()
   251  		if err != nil {
   252  			sleep := backoff.Do(i)
   253  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   254  				logger.Debugf("Network tunnel [%s] accept error: %v, backing off for %v", ControlChannel, err, sleep)
   255  			}
   256  			time.Sleep(sleep)
   257  			i++
   258  			continue
   259  		}
   260  
   261  		select {
   262  		case <-n.closed:
   263  			if err := conn.Close(); err != nil {
   264  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   265  					logger.Debugf("Network tunnel [%s] failed to close connection: %v", ControlChannel, err)
   266  				}
   267  			}
   268  			return
   269  		default:
   270  			// go handle ControlChannel connection
   271  			go n.handleCtrlConn(conn, recv)
   272  		}
   273  	}
   274  }
   275  
   276  // maskRoute will mask the route so that we apply the right values
   277  func (n *network) maskRoute(r *pbRtr.Route) {
   278  	hasher := fnv.New64()
   279  	// the routes service address
   280  	address := r.Address
   281  
   282  	// only hash the address if we're advertising our own local routes
   283  	// avoid hashing * based routes
   284  	if r.Router == n.Id() && r.Address != "*" {
   285  		// hash the service before advertising it
   286  		hasher.Reset()
   287  		// routes for multiple instances of a service will be collapsed here.
   288  		// TODO: once we store labels in the table this may need to change
   289  		// to include the labels in case they differ but highly unlikely
   290  		hasher.Write([]byte(r.Service + n.Address()))
   291  		address = fmt.Sprintf("%d", hasher.Sum64())
   292  	}
   293  
   294  	// calculate route metric to advertise
   295  	metric := n.getRouteMetric(r.Router, r.Gateway, r.Link)
   296  
   297  	// NOTE: we override Gateway, Link and Address here
   298  	r.Address = address
   299  	r.Gateway = n.Address()
   300  	r.Link = DefaultLink
   301  	r.Metric = metric
   302  }
   303  
   304  // advertise advertises routes to the network
   305  func (n *network) advertise(advertChan <-chan *router.Advert) {
   306  	rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
   307  	for {
   308  		select {
   309  		// process local adverts and randomly fire them at other nodes
   310  		case advert := <-advertChan:
   311  			// create a proto advert
   312  			var events []*pbRtr.Event
   313  
   314  			for _, event := range advert.Events {
   315  				// make a copy of the route
   316  				route := &pbRtr.Route{
   317  					Service: event.Route.Service,
   318  					Address: event.Route.Address,
   319  					Gateway: event.Route.Gateway,
   320  					Network: event.Route.Network,
   321  					Router:  event.Route.Router,
   322  					Link:    event.Route.Link,
   323  					Metric:  event.Route.Metric,
   324  				}
   325  
   326  				// override the various values
   327  				n.maskRoute(route)
   328  
   329  				e := &pbRtr.Event{
   330  					Type:      pbRtr.EventType(event.Type),
   331  					Timestamp: event.Timestamp.UnixNano(),
   332  					Route:     route,
   333  				}
   334  
   335  				events = append(events, e)
   336  			}
   337  
   338  			msg := &pbRtr.Advert{
   339  				Id:        advert.Id,
   340  				Type:      pbRtr.AdvertType(advert.Type),
   341  				Timestamp: advert.Timestamp.UnixNano(),
   342  				Events:    events,
   343  			}
   344  
   345  			// get a list of node peers
   346  			peers := n.Peers()
   347  
   348  			// continue if there is no one to send to
   349  			if len(peers) == 0 {
   350  				continue
   351  			}
   352  
   353  			// advertise to max 3 peers
   354  			max := len(peers)
   355  			if max > 3 {
   356  				max = 3
   357  			}
   358  
   359  			for i := 0; i < max; i++ {
   360  				if peer := n.node.GetPeerNode(peers[rnd.Intn(len(peers))].Id()); peer != nil {
   361  					if err := n.sendTo("advert", ControlChannel, peer, msg); err != nil {
   362  						if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   363  							logger.Debugf("Network failed to advertise routes to %s: %v", peer.Id(), err)
   364  						}
   365  					}
   366  				}
   367  			}
   368  		case <-n.closed:
   369  			return
   370  		}
   371  	}
   372  }
   373  
   374  // initNodes initializes tunnel with a list of resolved nodes
   375  func (n *network) initNodes(startup bool) {
   376  	nodes, err := n.resolveNodes()
   377  	// NOTE: this condition never fires
   378  	// as resolveNodes() never returns error
   379  	if err != nil && !startup {
   380  		if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   381  			logger.Debugf("Network failed to init nodes: %v", err)
   382  		}
   383  		return
   384  	}
   385  
   386  	// strip self
   387  	var init []string
   388  
   389  	// our current address
   390  	advertised := n.server.Options().Advertise
   391  
   392  	for _, node := range nodes {
   393  		// skip self
   394  		if node == advertised {
   395  			continue
   396  		}
   397  		// add the node
   398  		init = append(init, node)
   399  	}
   400  
   401  	if logger.V(logger.TraceLevel, logger.DefaultLogger) {
   402  		// initialize the tunnel
   403  		logger.Tracef("Network initialising nodes %+v\n", init)
   404  	}
   405  
   406  	n.tunnel.Init(
   407  		tunnel.Nodes(nodes...),
   408  	)
   409  }
   410  
   411  // resolveNodes resolves network nodes to addresses
   412  func (n *network) resolveNodes() ([]string, error) {
   413  	// resolve the network address to network nodes
   414  	records, err := n.options.Resolver.Resolve(n.options.Name)
   415  	if err != nil {
   416  		if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   417  			logger.Debugf("Network failed to resolve nodes: %v", err)
   418  		}
   419  	}
   420  
   421  	// sort by lowest priority
   422  	if err == nil {
   423  		sort.Slice(records, func(i, j int) bool { return records[i].Priority < records[j].Priority })
   424  	}
   425  
   426  	// keep processing
   427  
   428  	nodeMap := make(map[string]bool)
   429  
   430  	// collect network node addresses
   431  	//nolint:prealloc
   432  	var nodes []string
   433  	var i int
   434  
   435  	for _, record := range records {
   436  		if _, ok := nodeMap[record.Address]; ok {
   437  			continue
   438  		}
   439  
   440  		nodeMap[record.Address] = true
   441  		nodes = append(nodes, record.Address)
   442  
   443  		i++
   444  
   445  		// break once MaxConnection nodes has been reached
   446  		if i == MaxConnections {
   447  			break
   448  		}
   449  	}
   450  
   451  	// use the DNS resolver to expand peers
   452  	dns := &dns.Resolver{}
   453  
   454  	// append seed nodes if we have them
   455  	for _, node := range n.options.Nodes {
   456  		// resolve anything that looks like a host name
   457  		records, err := dns.Resolve(node)
   458  		if err != nil {
   459  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   460  				logger.Debugf("Failed to resolve %v %v", node, err)
   461  			}
   462  			continue
   463  		}
   464  
   465  		// add to the node map
   466  		for _, record := range records {
   467  			if _, ok := nodeMap[record.Address]; !ok {
   468  				nodes = append(nodes, record.Address)
   469  			}
   470  		}
   471  	}
   472  
   473  	return nodes, nil
   474  }
   475  
   476  // handleNetConn handles network announcement messages
   477  func (n *network) handleNetConn(s tunnel.Session, msg chan *message) {
   478  	for {
   479  		m := new(transport.Message)
   480  		if err := s.Recv(m); err != nil {
   481  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   482  				logger.Debugf("Network tunnel [%s] receive error: %v", NetworkChannel, err)
   483  			}
   484  			switch err {
   485  			case io.EOF, tunnel.ErrReadTimeout:
   486  				s.Close()
   487  				return
   488  			}
   489  			continue
   490  		}
   491  
   492  		// check if peer is set
   493  		peer := m.Header["Micro-Peer"]
   494  
   495  		// check who the message is intended for
   496  		if len(peer) > 0 && peer != n.options.Id {
   497  			continue
   498  		}
   499  
   500  		select {
   501  		case msg <- &message{
   502  			msg:     m,
   503  			session: s,
   504  		}:
   505  		case <-n.closed:
   506  			return
   507  		}
   508  	}
   509  }
   510  
   511  // handleCtrlConn handles ControlChannel connections
   512  func (n *network) handleCtrlConn(s tunnel.Session, msg chan *message) {
   513  	for {
   514  		m := new(transport.Message)
   515  		if err := s.Recv(m); err != nil {
   516  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   517  				logger.Debugf("Network tunnel [%s] receive error: %v", ControlChannel, err)
   518  			}
   519  			switch err {
   520  			case io.EOF, tunnel.ErrReadTimeout:
   521  				s.Close()
   522  				return
   523  			}
   524  			continue
   525  		}
   526  
   527  		// check if peer is set
   528  		peer := m.Header["Micro-Peer"]
   529  
   530  		// check who the message is intended for
   531  		if len(peer) > 0 && peer != n.options.Id {
   532  			continue
   533  		}
   534  
   535  		select {
   536  		case msg <- &message{
   537  			msg:     m,
   538  			session: s,
   539  		}:
   540  		case <-n.closed:
   541  			return
   542  		}
   543  	}
   544  }
   545  
   546  // getHopCount queries network graph and returns hop count for given router
   547  // NOTE: this should be called getHopeMetric
   548  // - Routes for local services have hop count 1
   549  // - Routes with ID of adjacent nodes have hop count 10
   550  // - Routes by peers of the advertiser have hop count 100
   551  // - Routes beyond node neighbourhood have hop count 1000
   552  func (n *network) getHopCount(rtr string) int {
   553  	// make sure node.peers are not modified
   554  	n.node.RLock()
   555  	defer n.node.RUnlock()
   556  
   557  	// we are the origin of the route
   558  	if rtr == n.options.Id {
   559  		return 1
   560  	}
   561  
   562  	// the route origin is our peer
   563  	if _, ok := n.node.peers[rtr]; ok {
   564  		return 10
   565  	}
   566  
   567  	// the route origin is the peer of our peer
   568  	for _, peer := range n.node.peers {
   569  		for id := range peer.peers {
   570  			if rtr == id {
   571  				return 100
   572  			}
   573  		}
   574  	}
   575  	// otherwise we are three hops away
   576  	return 1000
   577  }
   578  
   579  // getRouteMetric calculates router metric and returns it
   580  // Route metric is calculated based on link status and route hopd count
   581  func (n *network) getRouteMetric(router string, gateway string, link string) int64 {
   582  	// set the route metric
   583  	n.RLock()
   584  	defer n.RUnlock()
   585  
   586  	// local links are marked as 1
   587  	if link == "local" && gateway == "" {
   588  		return 1
   589  	}
   590  
   591  	// local links from other gateways as 2
   592  	if link == "local" && gateway != "" {
   593  		return 2
   594  	}
   595  
   596  	if logger.V(logger.TraceLevel, logger.DefaultLogger) {
   597  		logger.Tracef("Network looking up %s link to gateway: %s", link, gateway)
   598  	}
   599  	// attempt to find link based on gateway address
   600  	lnk, ok := n.peerLinks[gateway]
   601  	if !ok {
   602  		if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   603  			logger.Debugf("Network failed to find a link to gateway: %s", gateway)
   604  		}
   605  		// no link found so infinite metric returned
   606  		return math.MaxInt64
   607  	}
   608  
   609  	// calculating metric
   610  
   611  	delay := lnk.Delay()
   612  	hops := n.getHopCount(router)
   613  	length := lnk.Length()
   614  
   615  	// make sure delay is non-zero
   616  	if delay == 0 {
   617  		delay = 1
   618  	}
   619  
   620  	// make sure length is non-zero
   621  	if length == 0 {
   622  		if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   623  			logger.Debugf("Link length is 0 %v %v", link, lnk.Length())
   624  		}
   625  		length = 10e9
   626  	}
   627  
   628  	if logger.V(logger.TraceLevel, logger.DefaultLogger) {
   629  		logger.Tracef("Network calculated metric %v delay %v length %v distance %v", (delay*length*int64(hops))/10e6, delay, length, hops)
   630  	}
   631  
   632  	return (delay * length * int64(hops)) / 10e6
   633  }
   634  
   635  // processCtrlChan processes messages received on ControlChannel
   636  func (n *network) processCtrlChan(listener tunnel.Listener) {
   637  	defer listener.Close()
   638  
   639  	// receive control message queue
   640  	recv := make(chan *message, 128)
   641  
   642  	// accept ControlChannel cconnections
   643  	go n.acceptCtrlConn(listener, recv)
   644  
   645  	for {
   646  		select {
   647  		case m := <-recv:
   648  			// switch on type of message and take action
   649  			switch m.msg.Header["Micro-Method"] {
   650  			case "advert":
   651  				pbRtrAdvert := &pbRtr.Advert{}
   652  
   653  				if err := proto.Unmarshal(m.msg.Body, pbRtrAdvert); err != nil {
   654  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   655  						logger.Debugf("Network fail to unmarshal advert message: %v", err)
   656  					}
   657  					continue
   658  				}
   659  
   660  				// don't process your own messages
   661  				if pbRtrAdvert.Id == n.options.Id {
   662  					continue
   663  				}
   664  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   665  					logger.Debugf("Network received advert message from: %s", pbRtrAdvert.Id)
   666  				}
   667  
   668  				// loookup advertising node in our peer topology
   669  				advertNode := n.node.GetPeerNode(pbRtrAdvert.Id)
   670  				if advertNode == nil {
   671  					// if we can't find the node in our topology (MaxDepth) we skipp prcessing adverts
   672  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   673  						logger.Debugf("Network skipping advert message from unknown peer: %s", pbRtrAdvert.Id)
   674  					}
   675  					continue
   676  				}
   677  
   678  				var events []*router.Event
   679  
   680  				for _, event := range pbRtrAdvert.Events {
   681  					// for backwards compatibility reasons
   682  					if event == nil || event.Route == nil {
   683  						continue
   684  					}
   685  
   686  					// we know the advertising node is not the origin of the route
   687  					if pbRtrAdvert.Id != event.Route.Router {
   688  						// if the origin router is not the advertising node peer
   689  						// we can't rule out potential routing loops so we bail here
   690  						if peer := advertNode.GetPeerNode(event.Route.Router); peer == nil {
   691  							if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   692  								logger.Debugf("Network skipping advert message from peer: %s", pbRtrAdvert.Id)
   693  							}
   694  							continue
   695  						}
   696  					}
   697  
   698  					route := router.Route{
   699  						Service: event.Route.Service,
   700  						Address: event.Route.Address,
   701  						Gateway: event.Route.Gateway,
   702  						Network: event.Route.Network,
   703  						Router:  event.Route.Router,
   704  						Link:    event.Route.Link,
   705  						Metric:  event.Route.Metric,
   706  					}
   707  
   708  					// calculate route metric and add to the advertised metric
   709  					// we need to make sure we do not overflow math.MaxInt64
   710  					metric := n.getRouteMetric(event.Route.Router, event.Route.Gateway, event.Route.Link)
   711  					if logger.V(logger.TraceLevel, logger.DefaultLogger) {
   712  						logger.Tracef("Network metric for router %s and gateway %s: %v", event.Route.Router, event.Route.Gateway, metric)
   713  					}
   714  
   715  					// check we don't overflow max int 64
   716  					if d := route.Metric + metric; d <= 0 {
   717  						// set to max int64 if we overflow
   718  						route.Metric = math.MaxInt64
   719  					} else {
   720  						// set the combined value of metrics otherwise
   721  						route.Metric = d
   722  					}
   723  
   724  					// create router event
   725  					e := &router.Event{
   726  						Type:      router.EventType(event.Type),
   727  						Timestamp: time.Unix(0, pbRtrAdvert.Timestamp),
   728  						Route:     route,
   729  					}
   730  					events = append(events, e)
   731  				}
   732  
   733  				// if no events are eligible for processing continue
   734  				if len(events) == 0 {
   735  					if logger.V(logger.TraceLevel, logger.DefaultLogger) {
   736  						logger.Tracef("Network no events to be processed by router: %s", n.options.Id)
   737  					}
   738  					continue
   739  				}
   740  
   741  				// create an advert and process it
   742  				advert := &router.Advert{
   743  					Id:        pbRtrAdvert.Id,
   744  					Type:      router.AdvertType(pbRtrAdvert.Type),
   745  					Timestamp: time.Unix(0, pbRtrAdvert.Timestamp),
   746  					TTL:       time.Duration(pbRtrAdvert.Ttl),
   747  					Events:    events,
   748  				}
   749  
   750  				if logger.V(logger.TraceLevel, logger.DefaultLogger) {
   751  					logger.Tracef("Network router %s processing advert: %s", n.Id(), advert.Id)
   752  				}
   753  				if err := n.router.Process(advert); err != nil {
   754  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   755  						logger.Debugf("Network failed to process advert %s: %v", advert.Id, err)
   756  					}
   757  				}
   758  			}
   759  		case <-n.closed:
   760  			return
   761  		}
   762  	}
   763  }
   764  
   765  // processNetChan processes messages received on NetworkChannel
   766  func (n *network) processNetChan(listener tunnel.Listener) {
   767  	defer listener.Close()
   768  
   769  	// receive network message queue
   770  	recv := make(chan *message, 128)
   771  
   772  	// accept NetworkChannel connections
   773  	go n.acceptNetConn(listener, recv)
   774  
   775  	for {
   776  		select {
   777  		case m := <-recv:
   778  			// switch on type of message and take action
   779  			switch m.msg.Header["Micro-Method"] {
   780  			case "connect":
   781  				// mark the time the message has been received
   782  				now := time.Now()
   783  
   784  				pbNetConnect := &pbNet.Connect{}
   785  				if err := proto.Unmarshal(m.msg.Body, pbNetConnect); err != nil {
   786  					logger.Debugf("Network tunnel [%s] connect unmarshal error: %v", NetworkChannel, err)
   787  					continue
   788  				}
   789  
   790  				// don't process your own messages
   791  				if pbNetConnect.Node.Id == n.options.Id {
   792  					continue
   793  				}
   794  
   795  				logger.Debugf("Network received connect message from: %s", pbNetConnect.Node.Id)
   796  
   797  				peer := &node{
   798  					id:       pbNetConnect.Node.Id,
   799  					address:  pbNetConnect.Node.Address,
   800  					link:     m.msg.Header["Micro-Link"],
   801  					peers:    make(map[string]*node),
   802  					status:   newStatus(),
   803  					lastSeen: now,
   804  				}
   805  
   806  				// update peer links
   807  
   808  				// TODO: should we do this only if we manage to add a peer
   809  				// What should we do if the peer links failed to be updated?
   810  				if err := n.updatePeerLinks(peer); err != nil {
   811  					logger.Debugf("Network failed updating peer links: %s", err)
   812  				}
   813  
   814  				// add peer to the list of node peers
   815  				if err := n.AddPeer(peer); err == ErrPeerExists {
   816  					logger.Tracef("Network peer exists, refreshing: %s", peer.id)
   817  					// update lastSeen time for the peer
   818  					if err := n.RefreshPeer(peer.id, peer.link, now); err != nil {
   819  						logger.Debugf("Network failed refreshing peer %s: %v", peer.id, err)
   820  					}
   821  				}
   822  
   823  				// we send the sync message because someone has sent connect
   824  				// and wants to either connect or reconnect to the network
   825  				// The faster it gets the network config (routes and peer graph)
   826  				// the faster the network converges to a stable state
   827  
   828  				go func() {
   829  					// get node peer graph to send back to the connecting node
   830  					node := PeersToProto(n.node, MaxDepth)
   831  
   832  					msg := &pbNet.Sync{
   833  						Peer: node,
   834  					}
   835  
   836  					// get a list of the best routes for each service in our routing table
   837  					routes, err := n.getProtoRoutes()
   838  					if err != nil {
   839  						logger.Debugf("Network node %s failed listing routes: %v", n.id, err)
   840  					}
   841  					// attached the routes to the message
   842  					msg.Routes = routes
   843  
   844  					// send sync message to the newly connected peer
   845  					if err := n.sendTo("sync", NetworkChannel, peer, msg); err != nil {
   846  						logger.Debugf("Network failed to send sync message: %v", err)
   847  					}
   848  				}()
   849  			case "peer":
   850  				// mark the time the message has been received
   851  				now := time.Now()
   852  				pbNetPeer := &pbNet.Peer{}
   853  
   854  				if err := proto.Unmarshal(m.msg.Body, pbNetPeer); err != nil {
   855  					logger.Debugf("Network tunnel [%s] peer unmarshal error: %v", NetworkChannel, err)
   856  					continue
   857  				}
   858  
   859  				// don't process your own messages
   860  				if pbNetPeer.Node.Id == n.options.Id {
   861  					continue
   862  				}
   863  
   864  				logger.Debugf("Network received peer message from: %s %s", pbNetPeer.Node.Id, pbNetPeer.Node.Address)
   865  
   866  				peer := &node{
   867  					id:       pbNetPeer.Node.Id,
   868  					address:  pbNetPeer.Node.Address,
   869  					link:     m.msg.Header["Micro-Link"],
   870  					peers:    make(map[string]*node),
   871  					status:   newPeerStatus(pbNetPeer),
   872  					lastSeen: now,
   873  				}
   874  
   875  				// update peer links
   876  
   877  				// TODO: should we do this only if we manage to add a peer
   878  				// What should we do if the peer links failed to be updated?
   879  				if err := n.updatePeerLinks(peer); err != nil {
   880  					logger.Debugf("Network failed updating peer links: %s", err)
   881  				}
   882  
   883  				// if it's a new peer i.e. we do not have it in our graph, we request full sync
   884  				if err := n.node.AddPeer(peer); err == nil {
   885  					go func() {
   886  						// marshal node graph into protobuf
   887  						node := PeersToProto(n.node, MaxDepth)
   888  
   889  						msg := &pbNet.Sync{
   890  							Peer: node,
   891  						}
   892  
   893  						// get a list of the best routes for each service in our routing table
   894  						routes, err := n.getProtoRoutes()
   895  						if err != nil {
   896  							logger.Debugf("Network node %s failed listing routes: %v", n.id, err)
   897  						}
   898  						// attached the routes to the message
   899  						msg.Routes = routes
   900  
   901  						// send sync message to the newly connected peer
   902  						if err := n.sendTo("sync", NetworkChannel, peer, msg); err != nil {
   903  							logger.Debugf("Network failed to send sync message: %v", err)
   904  						}
   905  					}()
   906  
   907  					continue
   908  					// if we already have the peer in our graph, skip further steps
   909  				} else if err != ErrPeerExists {
   910  					logger.Debugf("Network got error adding peer %v", err)
   911  					continue
   912  				}
   913  
   914  				logger.Tracef("Network peer exists, refreshing: %s", pbNetPeer.Node.Id)
   915  
   916  				// update lastSeen time for the peer
   917  				if err := n.RefreshPeer(peer.id, peer.link, now); err != nil {
   918  					logger.Debugf("Network failed refreshing peer %s: %v", pbNetPeer.Node.Id, err)
   919  				}
   920  
   921  				// NOTE: we don't unpack MaxDepth toplogy
   922  				peer = UnpackPeerTopology(pbNetPeer, now, MaxDepth-1)
   923  				// update the link
   924  				peer.link = m.msg.Header["Micro-Link"]
   925  
   926  				logger.Tracef("Network updating topology of node: %s", n.node.id)
   927  				if err := n.node.UpdatePeer(peer); err != nil {
   928  					logger.Debugf("Network failed to update peers: %v", err)
   929  				}
   930  
   931  				// tell the connect loop that we've been discovered
   932  				// so it stops sending connect messages out
   933  				select {
   934  				case n.discovered <- true:
   935  				default:
   936  					// don't block here
   937  				}
   938  			case "sync":
   939  				// record the timestamp of the message receipt
   940  				now := time.Now()
   941  
   942  				pbNetSync := &pbNet.Sync{}
   943  				if err := proto.Unmarshal(m.msg.Body, pbNetSync); err != nil {
   944  					logger.Debugf("Network tunnel [%s] sync unmarshal error: %v", NetworkChannel, err)
   945  					continue
   946  				}
   947  
   948  				// don't process your own messages
   949  				if pbNetSync.Peer.Node.Id == n.options.Id {
   950  					continue
   951  				}
   952  
   953  				logger.Debugf("Network received sync message from: %s", pbNetSync.Peer.Node.Id)
   954  
   955  				peer := &node{
   956  					id:       pbNetSync.Peer.Node.Id,
   957  					address:  pbNetSync.Peer.Node.Address,
   958  					link:     m.msg.Header["Micro-Link"],
   959  					peers:    make(map[string]*node),
   960  					status:   newPeerStatus(pbNetSync.Peer),
   961  					lastSeen: now,
   962  				}
   963  
   964  				// update peer links
   965  
   966  				// TODO: should we do this only if we manage to add a peer
   967  				// What should we do if the peer links failed to be updated?
   968  				if err := n.updatePeerLinks(peer); err != nil {
   969  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   970  						logger.Debugf("Network failed updating peer links: %s", err)
   971  					}
   972  				}
   973  
   974  				// add peer to the list of node peers
   975  				if err := n.node.AddPeer(peer); err == ErrPeerExists {
   976  					if logger.V(logger.TraceLevel, logger.DefaultLogger) {
   977  						logger.Tracef("Network peer exists, refreshing: %s", peer.id)
   978  					}
   979  					// update lastSeen time for the existing node
   980  					if err := n.RefreshPeer(peer.id, peer.link, now); err != nil {
   981  						if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   982  							logger.Debugf("Network failed refreshing peer %s: %v", peer.id, err)
   983  						}
   984  					}
   985  				}
   986  
   987  				// when we receive a sync message we update our routing table
   988  				// and send a peer message back to the network to announce our presence
   989  
   990  				// add all the routes we have received in the sync message
   991  				for _, pbRoute := range pbNetSync.Routes {
   992  					// unmarshal the routes received from remote peer
   993  					route := pbUtil.ProtoToRoute(pbRoute)
   994  					// continue if we are the originator of the route
   995  					if route.Router == n.router.Options().Id {
   996  						if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   997  							logger.Debugf("Network node %s skipping route addition: route already present", n.id)
   998  						}
   999  						continue
  1000  					}
  1001  
  1002  					metric := n.getRouteMetric(route.Router, route.Gateway, route.Link)
  1003  					// check we don't overflow max int 64
  1004  					if d := route.Metric + metric; d <= 0 {
  1005  						// set to max int64 if we overflow
  1006  						route.Metric = math.MaxInt64
  1007  					} else {
  1008  						// set the combined value of metrics otherwise
  1009  						route.Metric = d
  1010  					}
  1011  
  1012  					/////////////////////////////////////////////////////////////////////
  1013  					//          maybe we should not be this clever ¯\_(ツ)_/¯          //
  1014  					/////////////////////////////////////////////////////////////////////
  1015  					// lookup best routes for the services in the just received route
  1016  					q := []router.QueryOption{
  1017  						router.QueryService(route.Service),
  1018  						router.QueryStrategy(n.router.Options().Advertise),
  1019  					}
  1020  
  1021  					routes, err := n.router.Table().Query(q...)
  1022  					if err != nil && err != router.ErrRouteNotFound {
  1023  						if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1024  							logger.Debugf("Network node %s failed listing best routes for %s: %v", n.id, route.Service, err)
  1025  						}
  1026  						continue
  1027  					}
  1028  
  1029  					// we found no routes for the given service
  1030  					// create the new route we have just received
  1031  					if len(routes) == 0 {
  1032  						if err := n.router.Table().Create(route); err != nil && err != router.ErrDuplicateRoute {
  1033  							if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1034  								logger.Debugf("Network node %s failed to add route: %v", n.id, err)
  1035  							}
  1036  						}
  1037  						continue
  1038  					}
  1039  
  1040  					// find the best route for the given service
  1041  					// from the routes that we would advertise
  1042  					bestRoute := routes[0]
  1043  					for _, r := range routes[0:] {
  1044  						if bestRoute.Metric > r.Metric {
  1045  							bestRoute = r
  1046  						}
  1047  					}
  1048  
  1049  					// Take the best route to given service and:
  1050  					// only add new routes if the metric is better
  1051  					// than the metric of our best route
  1052  
  1053  					if bestRoute.Metric <= route.Metric {
  1054  						continue
  1055  					}
  1056  					///////////////////////////////////////////////////////////////////////
  1057  					///////////////////////////////////////////////////////////////////////
  1058  
  1059  					// add route to the routing table
  1060  					if err := n.router.Table().Create(route); err != nil && err != router.ErrDuplicateRoute {
  1061  						if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1062  							logger.Debugf("Network node %s failed to add route: %v", n.id, err)
  1063  						}
  1064  					}
  1065  				}
  1066  
  1067  				// update your sync timestamp
  1068  				// NOTE: this might go away as we will be doing full table advert to random peer
  1069  				if err := n.RefreshSync(now); err != nil {
  1070  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1071  						logger.Debugf("Network failed refreshing sync time: %v", err)
  1072  					}
  1073  				}
  1074  
  1075  				go func() {
  1076  					// get node peer graph to send back to the syncing node
  1077  					msg := PeersToProto(n.node, MaxDepth)
  1078  
  1079  					// advertise yourself to the new node
  1080  					if err := n.sendTo("peer", NetworkChannel, peer, msg); err != nil {
  1081  						if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1082  							logger.Debugf("Network failed to advertise peers: %v", err)
  1083  						}
  1084  					}
  1085  				}()
  1086  			case "close":
  1087  				pbNetClose := &pbNet.Close{}
  1088  				if err := proto.Unmarshal(m.msg.Body, pbNetClose); err != nil {
  1089  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1090  						logger.Debugf("Network tunnel [%s] close unmarshal error: %v", NetworkChannel, err)
  1091  					}
  1092  					continue
  1093  				}
  1094  
  1095  				// don't process your own messages
  1096  				if pbNetClose.Node.Id == n.options.Id {
  1097  					continue
  1098  				}
  1099  
  1100  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1101  					logger.Debugf("Network received close message from: %s", pbNetClose.Node.Id)
  1102  				}
  1103  
  1104  				peer := &node{
  1105  					id:      pbNetClose.Node.Id,
  1106  					address: pbNetClose.Node.Address,
  1107  				}
  1108  
  1109  				if err := n.DeletePeerNode(peer.id); err != nil {
  1110  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1111  						logger.Debugf("Network failed to delete node %s routes: %v", peer.id, err)
  1112  					}
  1113  				}
  1114  
  1115  				if err := n.prunePeerRoutes(peer); err != nil {
  1116  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1117  						logger.Debugf("Network failed pruning peer %s routes: %v", peer.id, err)
  1118  					}
  1119  				}
  1120  
  1121  				// NOTE: we should maybe advertise this to the network so we converge faster on closed nodes
  1122  				// as opposed to our waiting until the node eventually gets pruned; something to think about
  1123  
  1124  				// delete peer from the peerLinks
  1125  				n.Lock()
  1126  				delete(n.peerLinks, pbNetClose.Node.Address)
  1127  				n.Unlock()
  1128  			}
  1129  		case <-n.closed:
  1130  			return
  1131  		}
  1132  	}
  1133  }
  1134  
  1135  // pruneRoutes prunes routes return by given query
  1136  func (n *network) pruneRoutes(q ...router.QueryOption) error {
  1137  	routes, err := n.router.Table().Query(q...)
  1138  	if err != nil && err != router.ErrRouteNotFound {
  1139  		return err
  1140  	}
  1141  
  1142  	for _, route := range routes {
  1143  		if err := n.router.Table().Delete(route); err != nil && err != router.ErrRouteNotFound {
  1144  			return err
  1145  		}
  1146  	}
  1147  
  1148  	return nil
  1149  }
  1150  
  1151  // pruneNodeRoutes prunes routes that were either originated by or routable via given node
  1152  func (n *network) prunePeerRoutes(peer *node) error {
  1153  	// lookup all routes originated by router
  1154  	q := []router.QueryOption{
  1155  		router.QueryRouter(peer.id),
  1156  	}
  1157  	if err := n.pruneRoutes(q...); err != nil {
  1158  		return err
  1159  	}
  1160  
  1161  	// lookup all routes routable via gw
  1162  	q = []router.QueryOption{
  1163  		router.QueryGateway(peer.address),
  1164  	}
  1165  	if err := n.pruneRoutes(q...); err != nil {
  1166  		return err
  1167  	}
  1168  
  1169  	return nil
  1170  }
  1171  
  1172  // manage the process of announcing to peers and prune any peer nodes that have not been
  1173  // seen for a period of time. Also removes all the routes either originated by or routable
  1174  // by the stale nodes. it also resolves nodes periodically and adds them to the tunnel
  1175  func (n *network) manage() {
  1176  	rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
  1177  	announce := time.NewTicker(AnnounceTime)
  1178  	defer announce.Stop()
  1179  	prune := time.NewTicker(PruneTime)
  1180  	defer prune.Stop()
  1181  	resolve := time.NewTicker(ResolveTime)
  1182  	defer resolve.Stop()
  1183  	netsync := time.NewTicker(SyncTime)
  1184  	defer netsync.Stop()
  1185  
  1186  	// list of links we've sent to
  1187  	links := make(map[string]time.Time)
  1188  
  1189  	for {
  1190  		select {
  1191  		case <-n.closed:
  1192  			return
  1193  		case <-announce.C:
  1194  			current := make(map[string]time.Time)
  1195  
  1196  			// build link map of current links
  1197  			for _, link := range n.tunnel.Links() {
  1198  				if n.isLoopback(link) {
  1199  					continue
  1200  				}
  1201  				// get an existing timestamp if it exists
  1202  				current[link.Id()] = links[link.Id()]
  1203  			}
  1204  
  1205  			// replace link map
  1206  			// we do this because a growing map is not
  1207  			// garbage collected
  1208  			links = current
  1209  
  1210  			n.RLock()
  1211  			var i int
  1212  			// create a list of peers to send to
  1213  			var peers []*node
  1214  
  1215  			// check peers to see if they need to be sent to
  1216  			for _, peer := range n.peers {
  1217  				if i >= 3 {
  1218  					break
  1219  				}
  1220  
  1221  				// get last sent
  1222  				lastSent := links[peer.link]
  1223  
  1224  				// check when we last sent to the peer
  1225  				// and send a peer message if we havent
  1226  				if lastSent.IsZero() || time.Since(lastSent) > KeepAliveTime {
  1227  					link := peer.link
  1228  					id := peer.id
  1229  
  1230  					// might not exist for some weird reason
  1231  					if len(link) == 0 {
  1232  						// set the link via peer links
  1233  						l, ok := n.peerLinks[peer.address]
  1234  						if ok {
  1235  							if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1236  								logger.Debugf("Network link not found for peer %s cannot announce", peer.id)
  1237  							}
  1238  							continue
  1239  						}
  1240  						link = l.Id()
  1241  					}
  1242  
  1243  					// add to the list of peers we're going to send to
  1244  					peers = append(peers, &node{
  1245  						id:   id,
  1246  						link: link,
  1247  					})
  1248  
  1249  					// increment our count
  1250  					i++
  1251  				}
  1252  			}
  1253  
  1254  			n.RUnlock()
  1255  
  1256  			// peers to proto
  1257  			msg := PeersToProto(n.node, MaxDepth)
  1258  
  1259  			// we're only going to send to max 3 peers at any given tick
  1260  			for _, peer := range peers {
  1261  				// advertise yourself to the network
  1262  				if err := n.sendTo("peer", NetworkChannel, peer, msg); err != nil {
  1263  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1264  						logger.Debugf("Network failed to advertise peer %s: %v", peer.id, err)
  1265  					}
  1266  					continue
  1267  				}
  1268  
  1269  				// update last sent time
  1270  				links[peer.link] = time.Now()
  1271  			}
  1272  
  1273  			// now look at links we may not have sent to. this may occur
  1274  			// where a connect message was lost
  1275  			for link, lastSent := range links {
  1276  				if !lastSent.IsZero() || time.Since(lastSent) < KeepAliveTime {
  1277  					continue
  1278  				}
  1279  
  1280  				peer := &node{
  1281  					// unknown id of the peer
  1282  					link: link,
  1283  				}
  1284  
  1285  				// unknown link and peer so lets do the connect flow
  1286  				if err := n.sendTo("connect", NetworkChannel, peer, msg); err != nil {
  1287  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1288  						logger.Debugf("Network failed to connect %s: %v", peer.id, err)
  1289  					}
  1290  					continue
  1291  				}
  1292  
  1293  				links[peer.link] = time.Now()
  1294  			}
  1295  		case <-prune.C:
  1296  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1297  				logger.Debugf("Network node %s pruning stale peers", n.id)
  1298  			}
  1299  			pruned := n.PruneStalePeers(PruneTime)
  1300  
  1301  			for id, peer := range pruned {
  1302  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1303  					logger.Debugf("Network peer exceeded prune time: %s", id)
  1304  				}
  1305  				n.Lock()
  1306  				delete(n.peerLinks, peer.address)
  1307  				n.Unlock()
  1308  
  1309  				if err := n.prunePeerRoutes(peer); err != nil {
  1310  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1311  						logger.Debugf("Network failed pruning peer %s routes: %v", id, err)
  1312  					}
  1313  				}
  1314  			}
  1315  
  1316  			// get a list of all routes
  1317  			routes, err := n.options.Router.Table().List()
  1318  			if err != nil {
  1319  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1320  					logger.Debugf("Network failed listing routes when pruning peers: %v", err)
  1321  				}
  1322  				continue
  1323  			}
  1324  
  1325  			// collect all the router IDs in the routing table
  1326  			routers := make(map[string]bool)
  1327  
  1328  			for _, route := range routes {
  1329  				// check if its been processed
  1330  				if _, ok := routers[route.Router]; ok {
  1331  					continue
  1332  				}
  1333  
  1334  				// mark as processed
  1335  				routers[route.Router] = true
  1336  
  1337  				// if the router is in our peer graph do NOT delete routes originated by it
  1338  				if peer := n.node.GetPeerNode(route.Router); peer != nil {
  1339  					continue
  1340  				}
  1341  				// otherwise delete all the routes originated by it
  1342  				if err := n.pruneRoutes(router.QueryRouter(route.Router)); err != nil {
  1343  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1344  						logger.Debugf("Network failed deleting routes by %s: %v", route.Router, err)
  1345  					}
  1346  				}
  1347  			}
  1348  		case <-netsync.C:
  1349  			// get a list of node peers
  1350  			peers := n.Peers()
  1351  
  1352  			// skip when there are no peers
  1353  			if len(peers) == 0 {
  1354  				continue
  1355  			}
  1356  
  1357  			// pick a random peer from the list of peers and request full sync
  1358  			peer := n.node.GetPeerNode(peers[rnd.Intn(len(peers))].Id())
  1359  			// skip if we can't find randmly selected peer
  1360  			if peer == nil {
  1361  				continue
  1362  			}
  1363  
  1364  			go func() {
  1365  				// get node peer graph to send back to the connecting node
  1366  				node := PeersToProto(n.node, MaxDepth)
  1367  
  1368  				msg := &pbNet.Sync{
  1369  					Peer: node,
  1370  				}
  1371  
  1372  				// get a list of the best routes for each service in our routing table
  1373  				routes, err := n.getProtoRoutes()
  1374  				if err != nil {
  1375  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1376  						logger.Debugf("Network node %s failed listing routes: %v", n.id, err)
  1377  					}
  1378  				}
  1379  				// attached the routes to the message
  1380  				msg.Routes = routes
  1381  
  1382  				// send sync message to the newly connected peer
  1383  				if err := n.sendTo("sync", NetworkChannel, peer, msg); err != nil {
  1384  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1385  						logger.Debugf("Network failed to send sync message: %v", err)
  1386  					}
  1387  				}
  1388  			}()
  1389  		case <-resolve.C:
  1390  			n.initNodes(false)
  1391  		}
  1392  	}
  1393  }
  1394  
  1395  // getAdvertProtoRoutes returns a list of routes to advertise to remote peer
  1396  // based on the advertisement strategy encoded in protobuf
  1397  // It returns error if the routes failed to be retrieved from the routing table
  1398  func (n *network) getProtoRoutes() ([]*pbRtr.Route, error) {
  1399  	// get a list of the best routes for each service in our routing table
  1400  	q := []router.QueryOption{
  1401  		router.QueryStrategy(n.router.Options().Advertise),
  1402  	}
  1403  
  1404  	routes, err := n.router.Table().Query(q...)
  1405  	if err != nil && err != router.ErrRouteNotFound {
  1406  		return nil, err
  1407  	}
  1408  
  1409  	// encode the routes to protobuf
  1410  	pbRoutes := make([]*pbRtr.Route, 0, len(routes))
  1411  	for _, route := range routes {
  1412  		// generate new route proto
  1413  		pbRoute := pbUtil.RouteToProto(route)
  1414  		// mask the route before outbounding
  1415  		n.maskRoute(pbRoute)
  1416  		// add to list of routes
  1417  		pbRoutes = append(pbRoutes, pbRoute)
  1418  	}
  1419  
  1420  	return pbRoutes, nil
  1421  }
  1422  
  1423  func (n *network) sendConnect() {
  1424  	// send connect message to NetworkChannel
  1425  	// NOTE: in theory we could do this as soon as
  1426  	// Dial to NetworkChannel succeeds, but instead
  1427  	// we initialize all other node resources first
  1428  	msg := &pbNet.Connect{
  1429  		Node: &pbNet.Node{
  1430  			Id:      n.node.id,
  1431  			Address: n.node.address,
  1432  		},
  1433  	}
  1434  
  1435  	if err := n.sendMsg("connect", NetworkChannel, msg); err != nil {
  1436  		if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1437  			logger.Debugf("Network failed to send connect message: %s", err)
  1438  		}
  1439  	}
  1440  }
  1441  
  1442  // sendTo sends a message to a specific node as a one off.
  1443  // we need this because when links die, we have no discovery info,
  1444  // and sending to an existing multicast link doesn't immediately work
  1445  func (n *network) sendTo(method, channel string, peer *node, msg proto.Message) error {
  1446  	body, err := proto.Marshal(msg)
  1447  	if err != nil {
  1448  		return err
  1449  	}
  1450  
  1451  	// Create a unicast connection to the peer but don't do the open/accept flow
  1452  	c, err := n.tunnel.Dial(channel, tunnel.DialWait(false), tunnel.DialLink(peer.link))
  1453  	if err != nil {
  1454  		if peerNode := n.GetPeerNode(peer.id); peerNode != nil {
  1455  			// update node status when error happens
  1456  			peerNode.status.err.Update(err)
  1457  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1458  				logger.Debugf("Network increment peer %v error count to: %d", peerNode, peerNode, peerNode.status.Error().Count())
  1459  			}
  1460  			if count := peerNode.status.Error().Count(); count == MaxPeerErrors {
  1461  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1462  					logger.Debugf("Network peer %v error count exceeded %d. Prunning.", peerNode, MaxPeerErrors)
  1463  				}
  1464  				n.PrunePeer(peerNode.id)
  1465  			}
  1466  		}
  1467  		return err
  1468  	}
  1469  	defer c.Close()
  1470  
  1471  	id := peer.id
  1472  
  1473  	if len(id) == 0 {
  1474  		id = peer.link
  1475  	}
  1476  
  1477  	if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1478  		logger.Debugf("Network sending %s message from: %s to %s", method, n.options.Id, id)
  1479  	}
  1480  	tmsg := &transport.Message{
  1481  		Header: map[string]string{
  1482  			"Micro-Method": method,
  1483  		},
  1484  		Body: body,
  1485  	}
  1486  
  1487  	// setting the peer header
  1488  	if len(peer.id) > 0 {
  1489  		tmsg.Header["Micro-Peer"] = peer.id
  1490  	}
  1491  
  1492  	if err := c.Send(tmsg); err != nil {
  1493  		// TODO: Lookup peer in our graph
  1494  		if peerNode := n.GetPeerNode(peer.id); peerNode != nil {
  1495  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1496  				logger.Debugf("Network found peer %s: %v", peer.id, peerNode)
  1497  			}
  1498  			// update node status when error happens
  1499  			peerNode.status.err.Update(err)
  1500  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1501  				logger.Debugf("Network increment node peer %p %v count to: %d", peerNode, peerNode, peerNode.status.Error().Count())
  1502  			}
  1503  			if count := peerNode.status.Error().Count(); count == MaxPeerErrors {
  1504  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1505  					logger.Debugf("Network node peer %v count exceeded %d: %d", peerNode, MaxPeerErrors, peerNode.status.Error().Count())
  1506  				}
  1507  				n.PrunePeer(peerNode.id)
  1508  			}
  1509  		}
  1510  		return err
  1511  	}
  1512  
  1513  	return nil
  1514  }
  1515  
  1516  // sendMsg sends a message to the tunnel channel
  1517  func (n *network) sendMsg(method, channel string, msg proto.Message) error {
  1518  	body, err := proto.Marshal(msg)
  1519  	if err != nil {
  1520  		return err
  1521  	}
  1522  
  1523  	// check if the channel client is initialized
  1524  	n.RLock()
  1525  	client, ok := n.tunClient[channel]
  1526  	if !ok || client == nil {
  1527  		n.RUnlock()
  1528  		return ErrClientNotFound
  1529  	}
  1530  	n.RUnlock()
  1531  
  1532  	if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1533  		logger.Debugf("Network sending %s message from: %s", method, n.options.Id)
  1534  	}
  1535  
  1536  	return client.Send(&transport.Message{
  1537  		Header: map[string]string{
  1538  			"Micro-Method": method,
  1539  		},
  1540  		Body: body,
  1541  	})
  1542  }
  1543  
  1544  // updatePeerLinks updates link for a given peer
  1545  func (n *network) updatePeerLinks(peer *node) error {
  1546  	n.Lock()
  1547  	defer n.Unlock()
  1548  
  1549  	linkId := peer.link
  1550  
  1551  	if logger.V(logger.TraceLevel, logger.DefaultLogger) {
  1552  		logger.Tracef("Network looking up link %s in the peer links", linkId)
  1553  	}
  1554  
  1555  	// lookup the peer link
  1556  	var peerLink tunnel.Link
  1557  
  1558  	for _, link := range n.tunnel.Links() {
  1559  		if link.Id() == linkId {
  1560  			peerLink = link
  1561  			break
  1562  		}
  1563  	}
  1564  
  1565  	if peerLink == nil {
  1566  		return ErrPeerLinkNotFound
  1567  	}
  1568  
  1569  	if logger.V(logger.TraceLevel, logger.DefaultLogger) {
  1570  		// if the peerLink is found in the returned links update peerLinks
  1571  		logger.Tracef("Network updating peer links for peer %s", peer.address)
  1572  	}
  1573  
  1574  	// lookup a link and update it if better link is available
  1575  	if link, ok := n.peerLinks[peer.address]; ok {
  1576  		// if the existing has better Length then the new, replace it
  1577  		if link.Length() < peerLink.Length() {
  1578  			n.peerLinks[peer.address] = peerLink
  1579  		}
  1580  		return nil
  1581  	}
  1582  
  1583  	// add peerLink to the peerLinks map
  1584  	n.peerLinks[peer.address] = peerLink
  1585  
  1586  	return nil
  1587  }
  1588  
  1589  // isLoopback checks if a link is a loopback to ourselves
  1590  func (n *network) isLoopback(link tunnel.Link) bool {
  1591  	// skip loopback
  1592  	if link.Loopback() {
  1593  		return true
  1594  	}
  1595  
  1596  	// our advertise address
  1597  	loopback := n.server.Options().Advertise
  1598  	// actual address
  1599  	address := n.tunnel.Address()
  1600  
  1601  	// if remote is ourselves
  1602  	switch link.Remote() {
  1603  	case loopback, address:
  1604  		return true
  1605  	}
  1606  
  1607  	return false
  1608  }
  1609  
  1610  // connect will wait for a link to be established and send the connect
  1611  // message. We're trying to ensure convergence pretty quickly. So we want
  1612  // to hear back. In the case we become completely disconnected we'll
  1613  // connect again once a new link is established
  1614  func (n *network) connect() {
  1615  	// discovered lets us know what we received a peer message back
  1616  	var discovered bool
  1617  	var attempts int
  1618  
  1619  	for {
  1620  		// connected is used to define if the link is connected
  1621  		var connected bool
  1622  
  1623  		// check the links state
  1624  		for _, link := range n.tunnel.Links() {
  1625  			// skip loopback
  1626  			if n.isLoopback(link) {
  1627  				continue
  1628  			}
  1629  
  1630  			if link.State() == "connected" {
  1631  				connected = true
  1632  				break
  1633  			}
  1634  		}
  1635  
  1636  		// if we're not connected wait
  1637  		if !connected {
  1638  			// reset discovered
  1639  			discovered = false
  1640  			// sleep for a second
  1641  			time.Sleep(time.Second)
  1642  			// now try again
  1643  			continue
  1644  		}
  1645  
  1646  		// we're connected but are we discovered?
  1647  		if !discovered {
  1648  			// recreate the clients because all the tunnel links are gone
  1649  			// so we haven't send discovery beneath
  1650  			// NOTE: when starting the tunnel for the first time we might be recreating potentially
  1651  			// well functioning tunnel clients as "discovered" will be false until the
  1652  			// n.discovered channel is read at some point later on.
  1653  			if err := n.createClients(); err != nil {
  1654  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1655  					logger.Debugf("Failed to recreate network/control clients: %v", err)
  1656  				}
  1657  				continue
  1658  			}
  1659  
  1660  			// send the connect message
  1661  			n.sendConnect()
  1662  		}
  1663  
  1664  		// check if we've been discovered
  1665  		select {
  1666  		case <-n.discovered:
  1667  			discovered = true
  1668  			attempts = 0
  1669  		case <-n.closed:
  1670  			return
  1671  		case <-time.After(time.Second + backoff.Do(attempts)):
  1672  			// we have to try again
  1673  			attempts++
  1674  		}
  1675  	}
  1676  }
  1677  
  1678  // Connect connects the network
  1679  func (n *network) Connect() error {
  1680  	n.Lock()
  1681  	defer n.Unlock()
  1682  
  1683  	// connect network tunnel
  1684  	if err := n.tunnel.Connect(); err != nil {
  1685  		return err
  1686  	}
  1687  
  1688  	// return if already connected
  1689  	if n.connected {
  1690  		// initialise the nodes
  1691  		n.initNodes(false)
  1692  		// send the connect message
  1693  		go n.sendConnect()
  1694  		return nil
  1695  	}
  1696  
  1697  	// initialise the nodes
  1698  	n.initNodes(true)
  1699  
  1700  	// set our internal node address
  1701  	// if advertise address is not set
  1702  	if len(n.options.Advertise) == 0 {
  1703  		n.server.Init(server.Advertise(n.tunnel.Address()))
  1704  	}
  1705  
  1706  	// listen on NetworkChannel
  1707  	netListener, err := n.tunnel.Listen(
  1708  		NetworkChannel,
  1709  		tunnel.ListenMode(tunnel.Multicast),
  1710  	)
  1711  	if err != nil {
  1712  		return err
  1713  	}
  1714  
  1715  	// listen on ControlChannel
  1716  	ctrlListener, err := n.tunnel.Listen(
  1717  		ControlChannel,
  1718  		tunnel.ListenMode(tunnel.Multicast),
  1719  	)
  1720  	if err != nil {
  1721  		return err
  1722  	}
  1723  
  1724  	// dial into ControlChannel to send route adverts
  1725  	ctrlClient, err := n.tunnel.Dial(
  1726  		ControlChannel,
  1727  		tunnel.DialMode(tunnel.Multicast),
  1728  	)
  1729  	if err != nil {
  1730  		return err
  1731  	}
  1732  
  1733  	n.tunClient[ControlChannel] = ctrlClient
  1734  
  1735  	// dial into NetworkChannel to send network messages
  1736  	netClient, err := n.tunnel.Dial(
  1737  		NetworkChannel,
  1738  		tunnel.DialMode(tunnel.Multicast),
  1739  	)
  1740  	if err != nil {
  1741  		return err
  1742  	}
  1743  
  1744  	n.tunClient[NetworkChannel] = netClient
  1745  
  1746  	// create closed channel
  1747  	n.closed = make(chan bool)
  1748  
  1749  	// start the router
  1750  	if err := n.options.Router.Start(); err != nil {
  1751  		return err
  1752  	}
  1753  
  1754  	// start advertising routes
  1755  	advertChan, err := n.options.Router.Advertise()
  1756  	if err != nil {
  1757  		return err
  1758  	}
  1759  
  1760  	// start the server
  1761  	if err := n.server.Start(); err != nil {
  1762  		return err
  1763  	}
  1764  
  1765  	// advertise service routes
  1766  	go n.advertise(advertChan)
  1767  	// listen to network messages
  1768  	go n.processNetChan(netListener)
  1769  	// accept and process routes
  1770  	go n.processCtrlChan(ctrlListener)
  1771  	// manage connection once links are established
  1772  	go n.connect()
  1773  	// resolve nodes, broadcast announcements and prune stale nodes
  1774  	go n.manage()
  1775  
  1776  	// we're now connected
  1777  	n.connected = true
  1778  
  1779  	return nil
  1780  }
  1781  
  1782  func (n *network) close() error {
  1783  	// stop the server
  1784  	if err := n.server.Stop(); err != nil {
  1785  		return err
  1786  	}
  1787  
  1788  	// stop the router
  1789  	if err := n.router.Stop(); err != nil {
  1790  		return err
  1791  	}
  1792  
  1793  	// close the tunnel
  1794  	if err := n.tunnel.Close(); err != nil {
  1795  		return err
  1796  	}
  1797  
  1798  	return nil
  1799  }
  1800  
  1801  // createClients is used to create new clients in the event we lose all the tunnels
  1802  func (n *network) createClients() error {
  1803  	// dial into ControlChannel to send route adverts
  1804  	ctrlClient, err := n.tunnel.Dial(ControlChannel, tunnel.DialMode(tunnel.Multicast))
  1805  	if err != nil {
  1806  		return err
  1807  	}
  1808  
  1809  	// dial into NetworkChannel to send network messages
  1810  	netClient, err := n.tunnel.Dial(NetworkChannel, tunnel.DialMode(tunnel.Multicast))
  1811  	if err != nil {
  1812  		return err
  1813  	}
  1814  
  1815  	n.Lock()
  1816  	defer n.Unlock()
  1817  
  1818  	// set the control client
  1819  	c, ok := n.tunClient[ControlChannel]
  1820  	if ok {
  1821  		c.Close()
  1822  	}
  1823  	n.tunClient[ControlChannel] = ctrlClient
  1824  
  1825  	// set the network client
  1826  	c, ok = n.tunClient[NetworkChannel]
  1827  	if ok {
  1828  		c.Close()
  1829  	}
  1830  	n.tunClient[NetworkChannel] = netClient
  1831  
  1832  	return nil
  1833  }
  1834  
  1835  // Close closes network connection
  1836  func (n *network) Close() error {
  1837  	n.Lock()
  1838  
  1839  	if !n.connected {
  1840  		n.Unlock()
  1841  		return nil
  1842  	}
  1843  
  1844  	select {
  1845  	case <-n.closed:
  1846  		n.Unlock()
  1847  		return nil
  1848  	default:
  1849  		close(n.closed)
  1850  
  1851  		// set connected to false
  1852  		n.connected = false
  1853  
  1854  		// unlock the lock otherwise we'll deadlock sending the close
  1855  		n.Unlock()
  1856  
  1857  		msg := &pbNet.Close{
  1858  			Node: &pbNet.Node{
  1859  				Id:      n.node.id,
  1860  				Address: n.node.address,
  1861  			},
  1862  		}
  1863  
  1864  		if err := n.sendMsg("close", NetworkChannel, msg); err != nil {
  1865  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1866  				logger.Debugf("Network failed to send close message: %s", err)
  1867  			}
  1868  		}
  1869  		<-time.After(time.Millisecond * 100)
  1870  	}
  1871  
  1872  	return n.close()
  1873  }
  1874  
  1875  // Client returns network client
  1876  func (n *network) Client() client.Client {
  1877  	return n.client
  1878  }
  1879  
  1880  // Server returns network server
  1881  func (n *network) Server() server.Server {
  1882  	return n.server
  1883  }