github.com/tickoalcantara12/micro/v3@v3.0.0-20221007104245-9d75b9bcbab9/service/network/mucp/mucp.go (about)

     1  // Licensed under the Apache License, Version 2.0 (the "License");
     2  // you may not use this file except in compliance with the License.
     3  // You may obtain a copy of the License at
     4  //
     5  //     https://www.apache.org/licenses/LICENSE-2.0
     6  //
     7  // Unless required by applicable law or agreed to in writing, software
     8  // distributed under the License is distributed on an "AS IS" BASIS,
     9  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    10  // See the License for the specific language governing permissions and
    11  // limitations under the License.
    12  //
    13  // Original source: github.com/tickoalcantara12/micro/v3/service/network/mucp/mucp.go
    14  
    15  package mucp
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"hash/fnv"
    21  	"io"
    22  	"math"
    23  	"math/rand"
    24  	"sync"
    25  	"time"
    26  
    27  	"github.com/golang/protobuf/proto"
    28  	"github.com/tickoalcantara12/micro/v3/service/client"
    29  	cmucp "github.com/tickoalcantara12/micro/v3/service/client/mucp"
    30  	"github.com/tickoalcantara12/micro/v3/service/logger"
    31  	"github.com/tickoalcantara12/micro/v3/service/network"
    32  	pb "github.com/tickoalcantara12/micro/v3/service/network/mucp/proto"
    33  	"github.com/tickoalcantara12/micro/v3/service/network/resolver/dns"
    34  	"github.com/tickoalcantara12/micro/v3/service/network/transport"
    35  	"github.com/tickoalcantara12/micro/v3/service/network/tunnel"
    36  	bun "github.com/tickoalcantara12/micro/v3/service/network/tunnel/broker"
    37  	tun "github.com/tickoalcantara12/micro/v3/service/network/tunnel/transport"
    38  	"github.com/tickoalcantara12/micro/v3/service/proxy"
    39  	"github.com/tickoalcantara12/micro/v3/service/registry/noop"
    40  	"github.com/tickoalcantara12/micro/v3/service/router"
    41  	"github.com/tickoalcantara12/micro/v3/service/server"
    42  	smucp "github.com/tickoalcantara12/micro/v3/service/server/mucp"
    43  	"github.com/tickoalcantara12/micro/v3/util/backoff"
    44  )
    45  
    46  var (
    47  	// DefaultName is default network name
    48  	DefaultName = "go.micro"
    49  	// DefaultAddress is default network address
    50  	DefaultAddress = ":0"
    51  	// AnnounceTime defines time interval to periodically announce node neighbours
    52  	AnnounceTime = 1 * time.Second
    53  	// KeepAliveTime is the time in which we want to have sent a message to a peer
    54  	KeepAliveTime = 30 * time.Second
    55  	// SyncTime is the time a network node requests full sync from the network
    56  	SyncTime = 1 * time.Minute
    57  	// PruneTime defines time interval to periodically check nodes that need to be pruned
    58  	// due to their not announcing their presence within this time interval
    59  	PruneTime = 90 * time.Second
    60  	// MaxDepth defines max depth of peer topology
    61  	MaxDepth uint = 3
    62  	// NetworkChannel is the name of the tunnel channel for passing network messages
    63  	NetworkChannel = "network"
    64  	// ControlChannel is the name of the tunnel channel for passing control message
    65  	ControlChannel = "control"
    66  	// DefaultLink is default network link
    67  	DefaultLink = "network"
    68  	// MaxConnections is the max number of network client connections
    69  	MaxConnections = 3
    70  	// MaxPeerErrors is the max number of peer errors before we remove it from network graph
    71  	MaxPeerErrors = 3
    72  	// ErrPeerExists is returned when adding a peer which already exists
    73  	ErrPeerExists = errors.New("peer already exists")
    74  	// ErrPeerNotFound is returned when a peer could not be found in node topology
    75  	ErrPeerNotFound = errors.New("peer not found")
    76  	// ErrClientNotFound is returned when client for tunnel channel could not be found
    77  	ErrClientNotFound = errors.New("client not found")
    78  	// ErrPeerLinkNotFound is returned when peer link could not be found in tunnel Links
    79  	ErrPeerLinkNotFound = errors.New("peer link not found")
    80  	// ErrPeerMaxExceeded is returned when peer has reached its max error count limit
    81  	ErrPeerMaxExceeded = errors.New("peer max errors exceeded")
    82  )
    83  
    84  // network implements Network interface
    85  type mucpNetwork struct {
    86  	// node is network node
    87  	*node
    88  	// options configure the network
    89  	options network.Options
    90  	// rtr is network router
    91  	router router.Router
    92  	// proxy is network proxy
    93  	proxy proxy.Proxy
    94  	// tunnel is network tunnel
    95  	tunnel tunnel.Tunnel
    96  	// server is network server
    97  	server server.Server
    98  	// client is network client
    99  	client client.Client
   100  
   101  	// tunClient is a map of tunnel channel clients
   102  	tunClient map[string]tunnel.Session
   103  	// peerLinks is a map of links for each peer
   104  	peerLinks map[string]tunnel.Link
   105  
   106  	sync.RWMutex
   107  	// connected marks the network as connected
   108  	connected bool
   109  	// closed closes the network
   110  	closed chan bool
   111  	// whether we've discovered by the network
   112  	discovered chan bool
   113  }
   114  
   115  // message is network message
   116  type message struct {
   117  	// msg is transport message
   118  	msg *transport.Message
   119  	// session is tunnel session
   120  	session tunnel.Session
   121  }
   122  
   123  // NewNetwork returns a new network node
   124  func NewNetwork(opts ...network.Option) network.Network {
   125  	// create default options
   126  	options := network.DefaultOptions()
   127  	// initialize network options
   128  	for _, o := range opts {
   129  		o(&options)
   130  	}
   131  
   132  	// set the address to a hashed address
   133  	hasher := fnv.New64()
   134  	hasher.Write([]byte(options.Address + options.Id))
   135  	address := fmt.Sprintf("%d", hasher.Sum64())
   136  
   137  	// set the address to advertise
   138  	var advertise string
   139  	var peerAddress string
   140  
   141  	if len(options.Advertise) > 0 {
   142  		advertise = options.Advertise
   143  		peerAddress = options.Advertise
   144  	} else {
   145  		advertise = options.Address
   146  		peerAddress = address
   147  	}
   148  
   149  	// init tunnel address to the network bind address
   150  	options.Tunnel.Init(
   151  		tunnel.Address(options.Address),
   152  	)
   153  
   154  	// init router Id to the network id
   155  	options.Router.Init(
   156  		router.Id(options.Id),
   157  		router.Address(peerAddress),
   158  	)
   159  
   160  	// create tunnel client with tunnel transport
   161  	tunTransport := tun.NewTransport(
   162  		tun.WithTunnel(options.Tunnel),
   163  	)
   164  
   165  	// create the tunnel broker
   166  	tunBroker := bun.NewBroker(
   167  		bun.WithTunnel(options.Tunnel),
   168  	)
   169  
   170  	// server is network server
   171  	// TODO: use the real registry
   172  	server := smucp.NewServer(
   173  		server.Id(options.Id),
   174  		server.Address(peerAddress),
   175  		server.Advertise(advertise),
   176  		server.Name(options.Name),
   177  		server.Transport(tunTransport),
   178  		server.Broker(tunBroker),
   179  		server.Registry(noop.NewRegistry()),
   180  	)
   181  
   182  	// client is network client
   183  	client := cmucp.NewClient(
   184  		client.Broker(tunBroker),
   185  		client.Transport(tunTransport),
   186  		client.Router(options.Router),
   187  	)
   188  
   189  	network := &mucpNetwork{
   190  		node: &node{
   191  			id:      options.Id,
   192  			address: peerAddress,
   193  			peers:   make(map[string]*node),
   194  			status:  newStatus(),
   195  		},
   196  		options:    options,
   197  		router:     options.Router,
   198  		proxy:      options.Proxy,
   199  		tunnel:     options.Tunnel,
   200  		server:     server,
   201  		client:     client,
   202  		tunClient:  make(map[string]tunnel.Session),
   203  		peerLinks:  make(map[string]tunnel.Link),
   204  		discovered: make(chan bool, 1),
   205  	}
   206  
   207  	network.node.network = network
   208  
   209  	return network
   210  }
   211  
   212  func (n *mucpNetwork) Init(opts ...network.Option) error {
   213  	n.Lock()
   214  	defer n.Unlock()
   215  
   216  	// TODO: maybe only allow reinit of certain opts
   217  	for _, o := range opts {
   218  		o(&n.options)
   219  	}
   220  
   221  	return nil
   222  }
   223  
   224  // Options returns network options
   225  func (n *mucpNetwork) Options() network.Options {
   226  	n.RLock()
   227  	defer n.RUnlock()
   228  
   229  	options := n.options
   230  
   231  	return options
   232  }
   233  
   234  // Name returns network name
   235  func (n *mucpNetwork) Name() string {
   236  	n.RLock()
   237  	defer n.RUnlock()
   238  
   239  	name := n.options.Name
   240  
   241  	return name
   242  }
   243  
   244  // acceptNetConn accepts connections from NetworkChannel
   245  func (n *mucpNetwork) acceptNetConn(l tunnel.Listener, recv chan *message) {
   246  	var i int
   247  	for {
   248  		// accept a connection
   249  		conn, err := l.Accept()
   250  		if err != nil {
   251  			sleep := backoff.Do(i)
   252  			logger.Debugf("Network tunnel [%s] accept error: %v, backing off for %v", ControlChannel, err, sleep)
   253  			time.Sleep(sleep)
   254  			i++
   255  			continue
   256  		}
   257  
   258  		select {
   259  		case <-n.closed:
   260  			if err := conn.Close(); err != nil {
   261  				logger.Debugf("Network tunnel [%s] failed to close connection: %v", NetworkChannel, err)
   262  			}
   263  			return
   264  		default:
   265  			// go handle NetworkChannel connection
   266  			go n.handleNetConn(conn, recv)
   267  		}
   268  	}
   269  }
   270  
   271  // acceptCtrlConn accepts connections from ControlChannel
   272  func (n *mucpNetwork) acceptCtrlConn(l tunnel.Listener, recv chan *message) {
   273  	var i int
   274  	for {
   275  		// accept a connection
   276  		conn, err := l.Accept()
   277  		if err != nil {
   278  			sleep := backoff.Do(i)
   279  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   280  				logger.Debugf("Network tunnel [%s] accept error: %v, backing off for %v", ControlChannel, err, sleep)
   281  			}
   282  			time.Sleep(sleep)
   283  			i++
   284  			continue
   285  		}
   286  
   287  		select {
   288  		case <-n.closed:
   289  			if err := conn.Close(); err != nil {
   290  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   291  					logger.Debugf("Network tunnel [%s] failed to close connection: %v", ControlChannel, err)
   292  				}
   293  			}
   294  			return
   295  		default:
   296  			// go handle ControlChannel connection
   297  			go n.handleCtrlConn(conn, recv)
   298  		}
   299  	}
   300  }
   301  
   302  // maskRoute will mask the route so that we apply the right values
   303  func (n *mucpNetwork) maskRoute(r *pb.Route) {
   304  	hasher := fnv.New64()
   305  	// the routes service address
   306  	address := r.Address
   307  
   308  	// only hash the address if we're advertising our own local routes
   309  	// avoid hashing * based routes
   310  	if r.Router == n.Id() && r.Address != "*" {
   311  		// hash the service before advertising it
   312  		hasher.Reset()
   313  		// routes for multiple instances of a service will be collapsed here.
   314  		// TODO: once we store labels in the table this may need to change
   315  		// to include the labels in case they differ but highly unlikely
   316  		hasher.Write([]byte(r.Service + n.Address()))
   317  		address = fmt.Sprintf("%d", hasher.Sum64())
   318  	}
   319  
   320  	// calculate route metric to advertise
   321  	metric := n.getRouteMetric(r.Router, r.Gateway, r.Link)
   322  
   323  	// NOTE: we override Gateway, Link and Address here
   324  	r.Address = address
   325  	r.Gateway = n.Address()
   326  	r.Link = DefaultLink
   327  	r.Metric = metric
   328  }
   329  
   330  // advertise advertises routes to the network
   331  func (n *mucpNetwork) advertise(eventChan <-chan *router.Event) {
   332  	rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
   333  
   334  	for {
   335  		select {
   336  		// process local events and randomly fire them at other nodes
   337  		case event := <-eventChan:
   338  			// create a proto advert
   339  			var pbEvents []*pb.Event
   340  
   341  			// make a copy of the route
   342  			route := &pb.Route{
   343  				Service: event.Route.Service,
   344  				Address: event.Route.Address,
   345  				Gateway: event.Route.Gateway,
   346  				Network: event.Route.Network,
   347  				Router:  event.Route.Router,
   348  				Link:    event.Route.Link,
   349  				Metric:  event.Route.Metric,
   350  			}
   351  
   352  			// override the various values
   353  			n.maskRoute(route)
   354  
   355  			e := &pb.Event{
   356  				Type:      pb.EventType(event.Type),
   357  				Timestamp: event.Timestamp.UnixNano(),
   358  				Route:     route,
   359  			}
   360  
   361  			pbEvents = append(pbEvents, e)
   362  
   363  			msg := &pb.Advert{
   364  				Id:        n.Id(),
   365  				Type:      pb.AdvertType(event.Type),
   366  				Timestamp: event.Timestamp.UnixNano(),
   367  				Events:    pbEvents,
   368  			}
   369  
   370  			// get a list of node peers
   371  			peers := n.Peers()
   372  
   373  			// continue if there is no one to send to
   374  			if len(peers) == 0 {
   375  				continue
   376  			}
   377  
   378  			// advertise to max 3 peers
   379  			max := len(peers)
   380  			if max > 3 {
   381  				max = 3
   382  			}
   383  
   384  			for i := 0; i < max; i++ {
   385  				if peer := n.node.GetPeerNode(peers[rnd.Intn(len(peers))].Id()); peer != nil {
   386  					if err := n.sendTo("advert", ControlChannel, peer, msg); err != nil {
   387  						if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   388  							logger.Debugf("Network failed to advertise routes to %s: %v", peer.Id(), err)
   389  						}
   390  					}
   391  				}
   392  			}
   393  		case <-n.closed:
   394  			return
   395  		}
   396  	}
   397  }
   398  
   399  // initNodes initializes tunnel with a list of resolved nodes
   400  func (n *mucpNetwork) initNodes(startup bool) {
   401  	nodes, err := n.resolveNodes()
   402  	// NOTE: this condition never fires
   403  	// as resolveNodes() never returns error
   404  	if err != nil && !startup {
   405  		if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   406  			logger.Debugf("Network failed to init nodes: %v", err)
   407  		}
   408  		return
   409  	}
   410  
   411  	// strip self
   412  	var init []string
   413  
   414  	// our current address
   415  	advertised := n.server.Options().Advertise
   416  
   417  	for _, node := range nodes {
   418  		// skip self
   419  		if node == advertised {
   420  			continue
   421  		}
   422  		// add the node
   423  		init = append(init, node)
   424  	}
   425  
   426  	if logger.V(logger.TraceLevel, logger.DefaultLogger) {
   427  		// initialize the tunnel
   428  		logger.Tracef("Network initialising nodes %+v\n", init)
   429  	}
   430  
   431  	n.tunnel.Init(
   432  		tunnel.Nodes(nodes...),
   433  	)
   434  }
   435  
   436  // resolveNodes resolves network nodes to addresses
   437  func (n *mucpNetwork) resolveNodes() ([]string, error) {
   438  	nodeMap := make(map[string]bool)
   439  
   440  	// collect network node addresses
   441  	//nolint:prealloc
   442  	var nodes []string
   443  
   444  	// use the DNS resolver to expand peers
   445  	dns := &dns.Resolver{}
   446  
   447  	// append seed nodes if we have them
   448  	for _, node := range n.options.Nodes {
   449  		// resolve anything that looks like a host name
   450  		records, err := dns.Resolve(node)
   451  		if err != nil {
   452  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   453  				logger.Debugf("Failed to resolve %v %v", node, err)
   454  			}
   455  			continue
   456  		}
   457  
   458  		// add to the node map
   459  		for _, record := range records {
   460  			if _, ok := nodeMap[record.Address]; !ok {
   461  				nodes = append(nodes, record.Address)
   462  			}
   463  			nodeMap[record.Address] = true
   464  		}
   465  	}
   466  
   467  	return nodes, nil
   468  }
   469  
   470  // handleNetConn handles network announcement messages
   471  func (n *mucpNetwork) handleNetConn(s tunnel.Session, msg chan *message) {
   472  	for {
   473  		m := new(transport.Message)
   474  		if err := s.Recv(m); err != nil {
   475  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   476  				logger.Debugf("Network tunnel [%s] receive error: %v", NetworkChannel, err)
   477  			}
   478  			switch err {
   479  			case io.EOF, tunnel.ErrReadTimeout:
   480  				s.Close()
   481  				return
   482  			}
   483  			continue
   484  		}
   485  
   486  		// check if peer is set
   487  		peer := m.Header["Micro-Peer"]
   488  
   489  		// check who the message is intended for
   490  		if len(peer) > 0 && peer != n.options.Id {
   491  			continue
   492  		}
   493  
   494  		select {
   495  		case msg <- &message{
   496  			msg:     m,
   497  			session: s,
   498  		}:
   499  		case <-n.closed:
   500  			return
   501  		}
   502  	}
   503  }
   504  
   505  // handleCtrlConn handles ControlChannel connections
   506  func (n *mucpNetwork) handleCtrlConn(s tunnel.Session, msg chan *message) {
   507  	for {
   508  		m := new(transport.Message)
   509  		if err := s.Recv(m); err != nil {
   510  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   511  				logger.Debugf("Network tunnel [%s] receive error: %v", ControlChannel, err)
   512  			}
   513  			switch err {
   514  			case io.EOF, tunnel.ErrReadTimeout:
   515  				s.Close()
   516  				return
   517  			}
   518  			continue
   519  		}
   520  
   521  		// check if peer is set
   522  		peer := m.Header["Micro-Peer"]
   523  
   524  		// check who the message is intended for
   525  		if len(peer) > 0 && peer != n.options.Id {
   526  			continue
   527  		}
   528  
   529  		select {
   530  		case msg <- &message{
   531  			msg:     m,
   532  			session: s,
   533  		}:
   534  		case <-n.closed:
   535  			return
   536  		}
   537  	}
   538  }
   539  
   540  // getHopCount queries network graph and returns hop count for given router
   541  // NOTE: this should be called getHopeMetric
   542  // - Routes for local services have hop count 1
   543  // - Routes with ID of adjacent nodes have hop count 10
   544  // - Routes by peers of the advertiser have hop count 100
   545  // - Routes beyond node neighbourhood have hop count 1000
   546  func (n *mucpNetwork) getHopCount(rtr string) int {
   547  	// make sure node.peers are not modified
   548  	n.node.RLock()
   549  	defer n.node.RUnlock()
   550  
   551  	// we are the origin of the route
   552  	if rtr == n.options.Id {
   553  		return 1
   554  	}
   555  
   556  	// the route origin is our peer
   557  	if _, ok := n.node.peers[rtr]; ok {
   558  		return 10
   559  	}
   560  
   561  	// the route origin is the peer of our peer
   562  	for _, peer := range n.node.peers {
   563  		for id := range peer.peers {
   564  			if rtr == id {
   565  				return 100
   566  			}
   567  		}
   568  	}
   569  	// otherwise we are three hops away
   570  	return 1000
   571  }
   572  
   573  // getRouteMetric calculates router metric and returns it
   574  // Route metric is calculated based on link status and route hopd count
   575  func (n *mucpNetwork) getRouteMetric(router string, gateway string, link string) int64 {
   576  	// set the route metric
   577  	n.RLock()
   578  	defer n.RUnlock()
   579  
   580  	// local links are marked as 1
   581  	if link == "local" && gateway == "" {
   582  		return 1
   583  	}
   584  
   585  	// local links from other gateways as 2
   586  	if link == "local" && gateway != "" {
   587  		return 2
   588  	}
   589  
   590  	if logger.V(logger.TraceLevel, logger.DefaultLogger) {
   591  		logger.Tracef("Network looking up %s link to gateway: %s", link, gateway)
   592  	}
   593  	// attempt to find link based on gateway address
   594  	lnk, ok := n.peerLinks[gateway]
   595  	if !ok {
   596  		if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   597  			logger.Debugf("Network failed to find a link to gateway: %s", gateway)
   598  		}
   599  		// no link found so infinite metric returned
   600  		return math.MaxInt64
   601  	}
   602  
   603  	// calculating metric
   604  
   605  	delay := lnk.Delay()
   606  	hops := n.getHopCount(router)
   607  	length := lnk.Length()
   608  
   609  	// make sure delay is non-zero
   610  	if delay == 0 {
   611  		delay = 1
   612  	}
   613  
   614  	// make sure length is non-zero
   615  	if length == 0 {
   616  		if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   617  			logger.Debugf("Link length is 0 %v %v", link, lnk.Length())
   618  		}
   619  		length = 10e9
   620  	}
   621  
   622  	if logger.V(logger.TraceLevel, logger.DefaultLogger) {
   623  		logger.Tracef("Network calculated metric %v delay %v length %v distance %v", (delay*length*int64(hops))/10e6, delay, length, hops)
   624  	}
   625  
   626  	return (delay * length * int64(hops)) / 10e6
   627  }
   628  
   629  // processCtrlChan processes messages received on ControlChannel
   630  func (n *mucpNetwork) processCtrlChan(listener tunnel.Listener) {
   631  	defer listener.Close()
   632  
   633  	// receive control message queue
   634  	recv := make(chan *message, 128)
   635  
   636  	// accept ControlChannel connections
   637  	go n.acceptCtrlConn(listener, recv)
   638  
   639  	for {
   640  		select {
   641  		case m := <-recv:
   642  			// switch on type of message and take action
   643  			switch m.msg.Header["Micro-Method"] {
   644  			case "advert":
   645  				pbAdvert := &pb.Advert{}
   646  
   647  				if err := proto.Unmarshal(m.msg.Body, pbAdvert); err != nil {
   648  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   649  						logger.Debugf("Network fail to unmarshal advert message: %v", err)
   650  					}
   651  					continue
   652  				}
   653  
   654  				// don't process your own messages
   655  				if pbAdvert.Id == n.Id() {
   656  					continue
   657  				}
   658  
   659  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   660  					logger.Debugf("Network received advert message from: %s", pbAdvert.Id)
   661  				}
   662  
   663  				// lookup advertising node in our peer topology
   664  				advertNode := n.node.GetPeerNode(pbAdvert.Id)
   665  				if advertNode == nil {
   666  					// if we can't find the node in our topology (MaxDepth) we skipp prcessing adverts
   667  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   668  						logger.Debugf("Network skipping advert message from unknown peer: %s", pbAdvert.Id)
   669  					}
   670  					continue
   671  				}
   672  
   673  				for _, event := range pbAdvert.Events {
   674  					// for backwards compatibility reasons
   675  					if event == nil || event.Route == nil {
   676  						continue
   677  					}
   678  
   679  					// we know the advertising node is not the origin of the route
   680  					if pbAdvert.Id != event.Route.Router {
   681  						// if the origin router is not the advertising node peer
   682  						// we can't rule out potential routing loops so we bail here
   683  						if peer := advertNode.GetPeerNode(event.Route.Router); peer == nil {
   684  							if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   685  								logger.Debugf("Network skipping advert message from peer: %s", pbAdvert.Id)
   686  							}
   687  							continue
   688  						}
   689  					}
   690  
   691  					route := router.Route{
   692  						Service: event.Route.Service,
   693  						Address: event.Route.Address,
   694  						Gateway: event.Route.Gateway,
   695  						Network: event.Route.Network,
   696  						Router:  event.Route.Router,
   697  						Link:    event.Route.Link,
   698  						Metric:  event.Route.Metric,
   699  					}
   700  
   701  					// calculate route metric and add to the advertised metric
   702  					// we need to make sure we do not overflow math.MaxInt64
   703  					metric := n.getRouteMetric(event.Route.Router, event.Route.Gateway, event.Route.Link)
   704  					if logger.V(logger.TraceLevel, logger.DefaultLogger) {
   705  						logger.Tracef("Network metric for router %s and gateway %s: %v", event.Route.Router, event.Route.Gateway, metric)
   706  					}
   707  
   708  					// check we don't overflow max int 64
   709  					if d := route.Metric + metric; d <= 0 {
   710  						// set to max int64 if we overflow
   711  						route.Metric = math.MaxInt64
   712  					} else {
   713  						// set the combined value of metrics otherwise
   714  						route.Metric = d
   715  					}
   716  
   717  					// update the local table
   718  					if err := n.router.Table().Update(route); err != nil {
   719  						if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   720  							logger.Debugf("Network failed to process advert %s: %v", event.Id, err)
   721  						}
   722  					}
   723  				}
   724  			}
   725  		case <-n.closed:
   726  			return
   727  		}
   728  	}
   729  }
   730  
   731  // processNetChan processes messages received on NetworkChannel
   732  func (n *mucpNetwork) processNetChan(listener tunnel.Listener) {
   733  	defer listener.Close()
   734  
   735  	// receive network message queue
   736  	recv := make(chan *message, 128)
   737  
   738  	// accept NetworkChannel connections
   739  	go n.acceptNetConn(listener, recv)
   740  
   741  	for {
   742  		select {
   743  		case m := <-recv:
   744  			// switch on type of message and take action
   745  			switch m.msg.Header["Micro-Method"] {
   746  			case "connect":
   747  				// mark the time the message has been received
   748  				now := time.Now()
   749  
   750  				pbConnect := &pb.Connect{}
   751  				if err := proto.Unmarshal(m.msg.Body, pbConnect); err != nil {
   752  					logger.Debugf("Network tunnel [%s] connect unmarshal error: %v", NetworkChannel, err)
   753  					continue
   754  				}
   755  
   756  				// don't process your own messages
   757  				if pbConnect.Node.Id == n.options.Id {
   758  					continue
   759  				}
   760  
   761  				logger.Debugf("Network received connect message from: %s", pbConnect.Node.Id)
   762  
   763  				peer := &node{
   764  					id:       pbConnect.Node.Id,
   765  					address:  pbConnect.Node.Address,
   766  					link:     m.msg.Header["Micro-Link"],
   767  					peers:    make(map[string]*node),
   768  					status:   newStatus(),
   769  					lastSeen: now,
   770  				}
   771  
   772  				// update peer links
   773  
   774  				// TODO: should we do this only if we manage to add a peer
   775  				// What should we do if the peer links failed to be updated?
   776  				if err := n.updatePeerLinks(peer); err != nil {
   777  					logger.Debugf("Network failed updating peer links: %s", err)
   778  				}
   779  
   780  				// add peer to the list of node peers
   781  				if err := n.AddPeer(peer); err == ErrPeerExists {
   782  					logger.Tracef("Network peer exists, refreshing: %s", peer.id)
   783  					// update lastSeen time for the peer
   784  					if err := n.RefreshPeer(peer.id, peer.link, now); err != nil {
   785  						logger.Debugf("Network failed refreshing peer %s: %v", peer.id, err)
   786  					}
   787  				}
   788  
   789  				// we send the sync message because someone has sent connect
   790  				// and wants to either connect or reconnect to the network
   791  				// The faster it gets the network config (routes and peer graph)
   792  				// the faster the network converges to a stable state
   793  
   794  				go func() {
   795  					// get node peer graph to send back to the connecting node
   796  					node := PeersToProto(n.node, MaxDepth)
   797  
   798  					msg := &pb.Sync{
   799  						Peer: node,
   800  					}
   801  
   802  					// get a list of the best routes for each service in our routing table
   803  					routes, err := n.getProtoRoutes()
   804  					if err != nil {
   805  						logger.Debugf("Network node %s failed listing routes: %v", n.id, err)
   806  					}
   807  					// attached the routes to the message
   808  					msg.Routes = routes
   809  
   810  					// send sync message to the newly connected peer
   811  					if err := n.sendTo("sync", NetworkChannel, peer, msg); err != nil {
   812  						logger.Debugf("Network failed to send sync message: %v", err)
   813  					}
   814  				}()
   815  			case "peer":
   816  				// mark the time the message has been received
   817  				now := time.Now()
   818  				pbPeer := &pb.Peer{}
   819  
   820  				if err := proto.Unmarshal(m.msg.Body, pbPeer); err != nil {
   821  					logger.Debugf("Network tunnel [%s] peer unmarshal error: %v", NetworkChannel, err)
   822  					continue
   823  				}
   824  
   825  				// don't process your own messages
   826  				if pbPeer.Node.Id == n.options.Id {
   827  					continue
   828  				}
   829  
   830  				logger.Debugf("Network received peer message from: %s %s", pbPeer.Node.Id, pbPeer.Node.Address)
   831  
   832  				peer := &node{
   833  					id:       pbPeer.Node.Id,
   834  					address:  pbPeer.Node.Address,
   835  					link:     m.msg.Header["Micro-Link"],
   836  					peers:    make(map[string]*node),
   837  					status:   newPeerStatus(pbPeer),
   838  					lastSeen: now,
   839  				}
   840  
   841  				// update peer links
   842  
   843  				// TODO: should we do this only if we manage to add a peer
   844  				// What should we do if the peer links failed to be updated?
   845  				if err := n.updatePeerLinks(peer); err != nil {
   846  					logger.Debugf("Network failed updating peer links: %s", err)
   847  				}
   848  
   849  				// if it's a new peer i.e. we do not have it in our graph, we request full sync
   850  				if err := n.node.AddPeer(peer); err == nil {
   851  					go func() {
   852  						// marshal node graph into protobuf
   853  						node := PeersToProto(n.node, MaxDepth)
   854  
   855  						msg := &pb.Sync{
   856  							Peer: node,
   857  						}
   858  
   859  						// get a list of the best routes for each service in our routing table
   860  						routes, err := n.getProtoRoutes()
   861  						if err != nil {
   862  							logger.Debugf("Network node %s failed listing routes: %v", n.id, err)
   863  						}
   864  						// attached the routes to the message
   865  						msg.Routes = routes
   866  
   867  						// send sync message to the newly connected peer
   868  						if err := n.sendTo("sync", NetworkChannel, peer, msg); err != nil {
   869  							logger.Debugf("Network failed to send sync message: %v", err)
   870  						}
   871  					}()
   872  
   873  					continue
   874  					// if we already have the peer in our graph, skip further steps
   875  				} else if err != ErrPeerExists {
   876  					logger.Debugf("Network got error adding peer %v", err)
   877  					continue
   878  				}
   879  
   880  				logger.Tracef("Network peer exists, refreshing: %s", pbPeer.Node.Id)
   881  
   882  				// update lastSeen time for the peer
   883  				if err := n.RefreshPeer(peer.id, peer.link, now); err != nil {
   884  					logger.Debugf("Network failed refreshing peer %s: %v", pbPeer.Node.Id, err)
   885  				}
   886  
   887  				// NOTE: we don't unpack MaxDepth topology
   888  				peer = UnpackPeerTopology(pbPeer, now, MaxDepth-1)
   889  				// update the link
   890  				peer.link = m.msg.Header["Micro-Link"]
   891  
   892  				logger.Tracef("Network updating topology of node: %s", n.node.id)
   893  				if err := n.node.UpdatePeer(peer); err != nil {
   894  					logger.Debugf("Network failed to update peers: %v", err)
   895  				}
   896  
   897  				// tell the connect loop that we've been discovered
   898  				// so it stops sending connect messages out
   899  				select {
   900  				case n.discovered <- true:
   901  				default:
   902  					// don't block here
   903  				}
   904  			case "sync":
   905  				// record the timestamp of the message receipt
   906  				now := time.Now()
   907  
   908  				pbSync := &pb.Sync{}
   909  				if err := proto.Unmarshal(m.msg.Body, pbSync); err != nil {
   910  					logger.Debugf("Network tunnel [%s] sync unmarshal error: %v", NetworkChannel, err)
   911  					continue
   912  				}
   913  
   914  				// don't process your own messages
   915  				if pbSync.Peer.Node.Id == n.options.Id {
   916  					continue
   917  				}
   918  
   919  				logger.Debugf("Network received sync message from: %s", pbSync.Peer.Node.Id)
   920  
   921  				peer := &node{
   922  					id:       pbSync.Peer.Node.Id,
   923  					address:  pbSync.Peer.Node.Address,
   924  					link:     m.msg.Header["Micro-Link"],
   925  					peers:    make(map[string]*node),
   926  					status:   newPeerStatus(pbSync.Peer),
   927  					lastSeen: now,
   928  				}
   929  
   930  				// update peer links
   931  
   932  				// TODO: should we do this only if we manage to add a peer
   933  				// What should we do if the peer links failed to be updated?
   934  				if err := n.updatePeerLinks(peer); err != nil {
   935  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   936  						logger.Debugf("Network failed updating peer links: %s", err)
   937  					}
   938  				}
   939  
   940  				// add peer to the list of node peers
   941  				if err := n.node.AddPeer(peer); err == ErrPeerExists {
   942  					if logger.V(logger.TraceLevel, logger.DefaultLogger) {
   943  						logger.Tracef("Network peer exists, refreshing: %s", peer.id)
   944  					}
   945  					// update lastSeen time for the existing node
   946  					if err := n.RefreshPeer(peer.id, peer.link, now); err != nil {
   947  						if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   948  							logger.Debugf("Network failed refreshing peer %s: %v", peer.id, err)
   949  						}
   950  					}
   951  				}
   952  
   953  				// when we receive a sync message we update our routing table
   954  				// and send a peer message back to the network to announce our presence
   955  
   956  				// add all the routes we have received in the sync message
   957  				for _, pbRoute := range pbSync.Routes {
   958  					// unmarshal the routes received from remote peer
   959  					route := ProtoToRoute(pbRoute)
   960  					// continue if we are the originator of the route
   961  					if route.Router == n.router.Options().Id {
   962  						if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   963  							logger.Debugf("Network node %s skipping route addition: route already present", n.id)
   964  						}
   965  						continue
   966  					}
   967  
   968  					metric := n.getRouteMetric(route.Router, route.Gateway, route.Link)
   969  					// check we don't overflow max int 64
   970  					if d := route.Metric + metric; d <= 0 {
   971  						// set to max int64 if we overflow
   972  						route.Metric = math.MaxInt64
   973  					} else {
   974  						// set the combined value of metrics otherwise
   975  						route.Metric = d
   976  					}
   977  
   978  					q := []router.LookupOption{
   979  						router.LookupLink(route.Link),
   980  					}
   981  
   982  					routes, err := n.router.Lookup(route.Service, q...)
   983  					if err != nil && err != router.ErrRouteNotFound {
   984  						if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   985  							logger.Debugf("Network node %s failed listing best routes for %s: %v", n.id, route.Service, err)
   986  						}
   987  						continue
   988  					}
   989  
   990  					// we found no routes for the given service
   991  					// create the new route we have just received
   992  					if len(routes) == 0 {
   993  						if err := n.router.Table().Create(route); err != nil && err != router.ErrDuplicateRoute {
   994  							if logger.V(logger.DebugLevel, logger.DefaultLogger) {
   995  								logger.Debugf("Network node %s failed to add route: %v", n.id, err)
   996  							}
   997  						}
   998  						continue
   999  					}
  1000  
  1001  					// find the best route for the given service
  1002  					// from the routes that we would advertise
  1003  					bestRoute := routes[0]
  1004  					for _, r := range routes[0:] {
  1005  						if bestRoute.Metric > r.Metric {
  1006  							bestRoute = r
  1007  						}
  1008  					}
  1009  
  1010  					// Take the best route to given service and:
  1011  					// only add new routes if the metric is better
  1012  					// than the metric of our best route
  1013  
  1014  					if bestRoute.Metric <= route.Metric {
  1015  						continue
  1016  					}
  1017  
  1018  					// add route to the routing table
  1019  					if err := n.router.Table().Create(route); err != nil && err != router.ErrDuplicateRoute {
  1020  						if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1021  							logger.Debugf("Network node %s failed to add route: %v", n.id, err)
  1022  						}
  1023  					}
  1024  				}
  1025  
  1026  				// update your sync timestamp
  1027  				// NOTE: this might go away as we will be doing full table advert to random peer
  1028  				if err := n.RefreshSync(now); err != nil {
  1029  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1030  						logger.Debugf("Network failed refreshing sync time: %v", err)
  1031  					}
  1032  				}
  1033  
  1034  				go func() {
  1035  					// get node peer graph to send back to the syncing node
  1036  					msg := PeersToProto(n.node, MaxDepth)
  1037  
  1038  					// advertise yourself to the new node
  1039  					if err := n.sendTo("peer", NetworkChannel, peer, msg); err != nil {
  1040  						if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1041  							logger.Debugf("Network failed to advertise peers: %v", err)
  1042  						}
  1043  					}
  1044  				}()
  1045  			case "close":
  1046  				pbClose := &pb.Close{}
  1047  				if err := proto.Unmarshal(m.msg.Body, pbClose); err != nil {
  1048  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1049  						logger.Debugf("Network tunnel [%s] close unmarshal error: %v", NetworkChannel, err)
  1050  					}
  1051  					continue
  1052  				}
  1053  
  1054  				// don't process your own messages
  1055  				if pbClose.Node.Id == n.options.Id {
  1056  					continue
  1057  				}
  1058  
  1059  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1060  					logger.Debugf("Network received close message from: %s", pbClose.Node.Id)
  1061  				}
  1062  
  1063  				peer := &node{
  1064  					id:      pbClose.Node.Id,
  1065  					address: pbClose.Node.Address,
  1066  				}
  1067  
  1068  				if err := n.DeletePeerNode(peer.id); err != nil {
  1069  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1070  						logger.Debugf("Network failed to delete node %s routes: %v", peer.id, err)
  1071  					}
  1072  				}
  1073  
  1074  				if err := n.prunePeerRoutes(peer); err != nil {
  1075  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1076  						logger.Debugf("Network failed pruning peer %s routes: %v", peer.id, err)
  1077  					}
  1078  				}
  1079  
  1080  				// NOTE: we should maybe advertise this to the network so we converge faster on closed nodes
  1081  				// as opposed to our waiting until the node eventually gets pruned; something to think about
  1082  
  1083  				// delete peer from the peerLinks
  1084  				n.Lock()
  1085  				delete(n.peerLinks, pbClose.Node.Address)
  1086  				n.Unlock()
  1087  			}
  1088  		case <-n.closed:
  1089  			return
  1090  		}
  1091  	}
  1092  }
  1093  
  1094  // pruneRoutes prunes routes return by given query
  1095  func (n *mucpNetwork) pruneRoutes(q ...router.LookupOption) error {
  1096  	routes, err := n.router.Table().Read()
  1097  	if err != nil && err != router.ErrRouteNotFound {
  1098  		return err
  1099  	}
  1100  
  1101  	// filter and delete the routes in question
  1102  	for _, route := range router.Filter(routes, router.NewLookup(q...)) {
  1103  		n.router.Table().Delete(route)
  1104  	}
  1105  
  1106  	return nil
  1107  }
  1108  
  1109  // pruneNodeRoutes prunes routes that were either originated by or routable via given node
  1110  func (n *mucpNetwork) prunePeerRoutes(peer *node) error {
  1111  	// lookup all routes originated by router
  1112  	q := []router.LookupOption{
  1113  		router.LookupRouter(peer.id),
  1114  		router.LookupLink("*"),
  1115  	}
  1116  	if err := n.pruneRoutes(q...); err != nil {
  1117  		return err
  1118  	}
  1119  
  1120  	// lookup all routes routable via gw
  1121  	q = []router.LookupOption{
  1122  		router.LookupGateway(peer.address),
  1123  		router.LookupLink("*"),
  1124  	}
  1125  	if err := n.pruneRoutes(q...); err != nil {
  1126  		return err
  1127  	}
  1128  
  1129  	return nil
  1130  }
  1131  
  1132  // manage the process of announcing to peers and prune any peer nodes that have not been
  1133  // seen for a period of time. Also removes all the routes either originated by or routable
  1134  // by the stale nodes. it also resolves nodes periodically and adds them to the tunnel
  1135  func (n *mucpNetwork) manage() {
  1136  	rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
  1137  	announce := time.NewTicker(AnnounceTime)
  1138  	defer announce.Stop()
  1139  	prune := time.NewTicker(PruneTime)
  1140  	defer prune.Stop()
  1141  	netsync := time.NewTicker(SyncTime)
  1142  	defer netsync.Stop()
  1143  
  1144  	// list of links we've sent to
  1145  	links := make(map[string]time.Time)
  1146  
  1147  	for {
  1148  		select {
  1149  		case <-n.closed:
  1150  			return
  1151  		case <-announce.C:
  1152  			current := make(map[string]time.Time)
  1153  
  1154  			// build link map of current links
  1155  			for _, link := range n.tunnel.Links() {
  1156  				if n.isLoopback(link) {
  1157  					continue
  1158  				}
  1159  				// get an existing timestamp if it exists
  1160  				current[link.Id()] = links[link.Id()]
  1161  			}
  1162  
  1163  			// replace link map
  1164  			// we do this because a growing map is not
  1165  			// garbage collected
  1166  			links = current
  1167  
  1168  			n.RLock()
  1169  			var i int
  1170  			// create a list of peers to send to
  1171  			var peers []*node
  1172  
  1173  			// check peers to see if they need to be sent to
  1174  			for _, peer := range n.peers {
  1175  				if i >= 3 {
  1176  					break
  1177  				}
  1178  
  1179  				// get last sent
  1180  				lastSent := links[peer.link]
  1181  
  1182  				// check when we last sent to the peer
  1183  				// and send a peer message if we haven't
  1184  				if lastSent.IsZero() || time.Since(lastSent) > KeepAliveTime {
  1185  					link := peer.link
  1186  					id := peer.id
  1187  
  1188  					// might not exist for some weird reason
  1189  					if len(link) == 0 {
  1190  						// set the link via peer links
  1191  						l, ok := n.peerLinks[peer.address]
  1192  						if ok {
  1193  							if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1194  								logger.Debugf("Network link not found for peer %s cannot announce", peer.id)
  1195  							}
  1196  							continue
  1197  						}
  1198  						link = l.Id()
  1199  					}
  1200  
  1201  					// add to the list of peers we're going to send to
  1202  					peers = append(peers, &node{
  1203  						id:   id,
  1204  						link: link,
  1205  					})
  1206  
  1207  					// increment our count
  1208  					i++
  1209  				}
  1210  			}
  1211  
  1212  			n.RUnlock()
  1213  
  1214  			// peers to proto
  1215  			msg := PeersToProto(n.node, MaxDepth)
  1216  
  1217  			// we're only going to send to max 3 peers at any given tick
  1218  			for _, peer := range peers {
  1219  				// advertise yourself to the network
  1220  				if err := n.sendTo("peer", NetworkChannel, peer, msg); err != nil {
  1221  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1222  						logger.Debugf("Network failed to advertise peer %s: %v", peer.id, err)
  1223  					}
  1224  					continue
  1225  				}
  1226  
  1227  				// update last sent time
  1228  				links[peer.link] = time.Now()
  1229  			}
  1230  
  1231  			// now look at links we may not have sent to. this may occur
  1232  			// where a connect message was lost
  1233  			for link, lastSent := range links {
  1234  				if !lastSent.IsZero() || time.Since(lastSent) < KeepAliveTime {
  1235  					continue
  1236  				}
  1237  
  1238  				peer := &node{
  1239  					// unknown id of the peer
  1240  					link: link,
  1241  				}
  1242  
  1243  				// unknown link and peer so lets do the connect flow
  1244  				if err := n.sendTo("connect", NetworkChannel, peer, msg); err != nil {
  1245  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1246  						logger.Debugf("Network failed to connect %s: %v", peer.id, err)
  1247  					}
  1248  					continue
  1249  				}
  1250  
  1251  				links[peer.link] = time.Now()
  1252  			}
  1253  		case <-prune.C:
  1254  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1255  				logger.Debugf("Network node %s pruning stale peers", n.id)
  1256  			}
  1257  			pruned := n.PruneStalePeers(PruneTime)
  1258  
  1259  			for id, peer := range pruned {
  1260  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1261  					logger.Debugf("Network peer exceeded prune time: %s", id)
  1262  				}
  1263  				n.Lock()
  1264  				delete(n.peerLinks, peer.address)
  1265  				n.Unlock()
  1266  
  1267  				if err := n.prunePeerRoutes(peer); err != nil {
  1268  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1269  						logger.Debugf("Network failed pruning peer %s routes: %v", id, err)
  1270  					}
  1271  				}
  1272  			}
  1273  
  1274  			// get a list of all routes
  1275  			routes, err := n.options.Router.Table().Read()
  1276  			if err != nil {
  1277  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1278  					logger.Debugf("Network failed listing routes when pruning peers: %v", err)
  1279  				}
  1280  				continue
  1281  			}
  1282  
  1283  			// collect all the router IDs in the routing table
  1284  			routers := make(map[string]bool)
  1285  
  1286  			for _, route := range routes {
  1287  				// don't process routes originated by ourselves
  1288  				if route.Router == n.Id() {
  1289  					continue
  1290  				}
  1291  
  1292  				// check if its been processed
  1293  				if _, ok := routers[route.Router]; ok {
  1294  					continue
  1295  				}
  1296  
  1297  				// mark as processed
  1298  				routers[route.Router] = true
  1299  
  1300  				// if the router is in our peer graph do NOT delete routes originated by it
  1301  				if peer := n.node.GetPeerNode(route.Router); peer != nil {
  1302  					continue
  1303  				}
  1304  
  1305  				// otherwise delete all the routes originated by it
  1306  				if err := n.pruneRoutes(router.LookupRouter(route.Router)); err != nil {
  1307  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1308  						logger.Debugf("Network failed deleting routes by %s: %v", route.Router, err)
  1309  					}
  1310  				}
  1311  			}
  1312  		case <-netsync.C:
  1313  			// get a list of node peers
  1314  			peers := n.Peers()
  1315  
  1316  			// skip when there are no peers
  1317  			if len(peers) == 0 {
  1318  				continue
  1319  			}
  1320  
  1321  			// pick a random peer from the list of peers and request full sync
  1322  			peer := n.node.GetPeerNode(peers[rnd.Intn(len(peers))].Id())
  1323  			// skip if we can't find randomly selected peer
  1324  			if peer == nil {
  1325  				continue
  1326  			}
  1327  
  1328  			go func() {
  1329  				// get node peer graph to send back to the connecting node
  1330  				node := PeersToProto(n.node, MaxDepth)
  1331  
  1332  				msg := &pb.Sync{
  1333  					Peer: node,
  1334  				}
  1335  
  1336  				// get a list of the best routes for each service in our routing table
  1337  				routes, err := n.getProtoRoutes()
  1338  				if err != nil {
  1339  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1340  						logger.Debugf("Network node %s failed listing routes: %v", n.id, err)
  1341  					}
  1342  				}
  1343  				// attached the routes to the message
  1344  				msg.Routes = routes
  1345  
  1346  				// send sync message to the newly connected peer
  1347  				if err := n.sendTo("sync", NetworkChannel, peer, msg); err != nil {
  1348  					if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1349  						logger.Debugf("Network failed to send sync message: %v", err)
  1350  					}
  1351  				}
  1352  			}()
  1353  		}
  1354  	}
  1355  }
  1356  
  1357  // getAdvertProtoRoutes returns a list of routes to advertise to remote peer
  1358  // based on the advertisement strategy encoded in protobuf
  1359  // It returns error if the routes failed to be retrieved from the routing table
  1360  func (n *mucpNetwork) getProtoRoutes() ([]*pb.Route, error) {
  1361  	routes, err := n.router.Table().Read()
  1362  	if err != nil && err != router.ErrRouteNotFound {
  1363  		return nil, err
  1364  	}
  1365  
  1366  	// encode the routes to protobuf
  1367  	pbRoutes := make([]*pb.Route, 0, len(routes))
  1368  	for _, route := range routes {
  1369  		// generate new route proto
  1370  		pbRoute := RouteToProto(route)
  1371  		// mask the route before outbounding
  1372  		n.maskRoute(pbRoute)
  1373  		// add to list of routes
  1374  		pbRoutes = append(pbRoutes, pbRoute)
  1375  	}
  1376  
  1377  	return pbRoutes, nil
  1378  }
  1379  
  1380  func (n *mucpNetwork) sendConnect() {
  1381  	// send connect message to NetworkChannel
  1382  	// NOTE: in theory we could do this as soon as
  1383  	// Dial to NetworkChannel succeeds, but instead
  1384  	// we initialize all other node resources first
  1385  	msg := &pb.Connect{
  1386  		Node: &pb.Node{
  1387  			Id:      n.node.id,
  1388  			Address: n.node.address,
  1389  		},
  1390  	}
  1391  
  1392  	if err := n.sendMsg("connect", NetworkChannel, msg); err != nil {
  1393  		if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1394  			logger.Debugf("Network failed to send connect message: %s", err)
  1395  		}
  1396  	}
  1397  }
  1398  
  1399  // sendTo sends a message to a specific node as a one off.
  1400  // we need this because when links die, we have no discovery info,
  1401  // and sending to an existing multicast link doesn't immediately work
  1402  func (n *mucpNetwork) sendTo(method, channel string, peer *node, msg proto.Message) error {
  1403  	body, err := proto.Marshal(msg)
  1404  	if err != nil {
  1405  		return err
  1406  	}
  1407  
  1408  	// Create a unicast connection to the peer but don't do the open/accept flow
  1409  	c, err := n.tunnel.Dial(channel, tunnel.DialWait(false), tunnel.DialLink(peer.link))
  1410  	if err != nil {
  1411  		if peerNode := n.GetPeerNode(peer.id); peerNode != nil {
  1412  			// update node status when error happens
  1413  			peerNode.status.err.Update(err)
  1414  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1415  				logger.Debugf("Network increment peer %v error count to: %d", peerNode, peerNode, peerNode.status.Error().Count())
  1416  			}
  1417  			if count := peerNode.status.Error().Count(); count == MaxPeerErrors {
  1418  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1419  					logger.Debugf("Network peer %v error count exceeded %d. Prunning.", peerNode, MaxPeerErrors)
  1420  				}
  1421  				n.PrunePeer(peerNode.id)
  1422  			}
  1423  		}
  1424  		return err
  1425  	}
  1426  	defer c.Close()
  1427  
  1428  	id := peer.id
  1429  
  1430  	if len(id) == 0 {
  1431  		id = peer.link
  1432  	}
  1433  
  1434  	if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1435  		logger.Debugf("Network sending %s message from: %s to %s", method, n.options.Id, id)
  1436  	}
  1437  	tmsg := &transport.Message{
  1438  		Header: map[string]string{
  1439  			"Micro-Method": method,
  1440  		},
  1441  		Body: body,
  1442  	}
  1443  
  1444  	// setting the peer header
  1445  	if len(peer.id) > 0 {
  1446  		tmsg.Header["Micro-Peer"] = peer.id
  1447  	}
  1448  
  1449  	if err := c.Send(tmsg); err != nil {
  1450  		// TODO: Lookup peer in our graph
  1451  		if peerNode := n.GetPeerNode(peer.id); peerNode != nil {
  1452  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1453  				logger.Debugf("Network found peer %s: %v", peer.id, peerNode)
  1454  			}
  1455  			// update node status when error happens
  1456  			peerNode.status.err.Update(err)
  1457  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1458  				logger.Debugf("Network increment node peer %p %v count to: %d", peerNode, peerNode, peerNode.status.Error().Count())
  1459  			}
  1460  			if count := peerNode.status.Error().Count(); count == MaxPeerErrors {
  1461  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1462  					logger.Debugf("Network node peer %v count exceeded %d: %d", peerNode, MaxPeerErrors, peerNode.status.Error().Count())
  1463  				}
  1464  				n.PrunePeer(peerNode.id)
  1465  			}
  1466  		}
  1467  		return err
  1468  	}
  1469  
  1470  	return nil
  1471  }
  1472  
  1473  // sendMsg sends a message to the tunnel channel
  1474  func (n *mucpNetwork) sendMsg(method, channel string, msg proto.Message) error {
  1475  	body, err := proto.Marshal(msg)
  1476  	if err != nil {
  1477  		return err
  1478  	}
  1479  
  1480  	// check if the channel client is initialized
  1481  	n.RLock()
  1482  	client, ok := n.tunClient[channel]
  1483  	if !ok || client == nil {
  1484  		n.RUnlock()
  1485  		return ErrClientNotFound
  1486  	}
  1487  	n.RUnlock()
  1488  
  1489  	if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1490  		logger.Debugf("Network sending %s message from: %s", method, n.options.Id)
  1491  	}
  1492  
  1493  	return client.Send(&transport.Message{
  1494  		Header: map[string]string{
  1495  			"Micro-Method": method,
  1496  		},
  1497  		Body: body,
  1498  	})
  1499  }
  1500  
  1501  // updatePeerLinks updates link for a given peer
  1502  func (n *mucpNetwork) updatePeerLinks(peer *node) error {
  1503  	n.Lock()
  1504  	defer n.Unlock()
  1505  
  1506  	linkId := peer.link
  1507  
  1508  	if logger.V(logger.TraceLevel, logger.DefaultLogger) {
  1509  		logger.Tracef("Network looking up link %s in the peer links", linkId)
  1510  	}
  1511  
  1512  	// lookup the peer link
  1513  	var peerLink tunnel.Link
  1514  
  1515  	for _, link := range n.tunnel.Links() {
  1516  		if link.Id() == linkId {
  1517  			peerLink = link
  1518  			break
  1519  		}
  1520  	}
  1521  
  1522  	if peerLink == nil {
  1523  		return ErrPeerLinkNotFound
  1524  	}
  1525  
  1526  	if logger.V(logger.TraceLevel, logger.DefaultLogger) {
  1527  		// if the peerLink is found in the returned links update peerLinks
  1528  		logger.Tracef("Network updating peer links for peer %s", peer.address)
  1529  	}
  1530  
  1531  	// lookup a link and update it if better link is available
  1532  	if link, ok := n.peerLinks[peer.address]; ok {
  1533  		// if the existing has better Length then the new, replace it
  1534  		if link.Length() < peerLink.Length() {
  1535  			n.peerLinks[peer.address] = peerLink
  1536  		}
  1537  		return nil
  1538  	}
  1539  
  1540  	// add peerLink to the peerLinks map
  1541  	n.peerLinks[peer.address] = peerLink
  1542  
  1543  	return nil
  1544  }
  1545  
  1546  // isLoopback checks if a link is a loopback to ourselves
  1547  func (n *mucpNetwork) isLoopback(link tunnel.Link) bool {
  1548  	// skip loopback
  1549  	if link.Loopback() {
  1550  		return true
  1551  	}
  1552  
  1553  	// our advertise address
  1554  	loopback := n.server.Options().Advertise
  1555  	// actual address
  1556  	address := n.tunnel.Address()
  1557  
  1558  	// if remote is ourselves
  1559  	switch link.Remote() {
  1560  	case loopback, address:
  1561  		return true
  1562  	}
  1563  
  1564  	return false
  1565  }
  1566  
  1567  // connect will wait for a link to be established and send the connect
  1568  // message. We're trying to ensure convergence pretty quickly. So we want
  1569  // to hear back. In the case we become completely disconnected we'll
  1570  // connect again once a new link is established
  1571  func (n *mucpNetwork) connect() {
  1572  	// discovered lets us know what we received a peer message back
  1573  	var discovered bool
  1574  	var attempts int
  1575  
  1576  	for {
  1577  		// connected is used to define if the link is connected
  1578  		var connected bool
  1579  
  1580  		// check the links state
  1581  		for _, link := range n.tunnel.Links() {
  1582  			// skip loopback
  1583  			if n.isLoopback(link) {
  1584  				continue
  1585  			}
  1586  
  1587  			if link.State() == "connected" {
  1588  				connected = true
  1589  				break
  1590  			}
  1591  		}
  1592  
  1593  		// if we're not connected wait
  1594  		if !connected {
  1595  			// reset discovered
  1596  			discovered = false
  1597  			// sleep for a second
  1598  			time.Sleep(time.Second)
  1599  			// now try again
  1600  			continue
  1601  		}
  1602  
  1603  		// we're connected but are we discovered?
  1604  		if !discovered {
  1605  			// recreate the clients because all the tunnel links are gone
  1606  			// so we haven't send discovery beneath
  1607  			// NOTE: when starting the tunnel for the first time we might be recreating potentially
  1608  			// well functioning tunnel clients as "discovered" will be false until the
  1609  			// n.discovered channel is read at some point later on.
  1610  			if err := n.createClients(); err != nil {
  1611  				if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1612  					logger.Debugf("Failed to recreate network/control clients: %v", err)
  1613  				}
  1614  				continue
  1615  			}
  1616  
  1617  			// send the connect message
  1618  			n.sendConnect()
  1619  		}
  1620  
  1621  		// check if we've been discovered
  1622  		select {
  1623  		case <-n.discovered:
  1624  			discovered = true
  1625  			attempts = 0
  1626  		case <-n.closed:
  1627  			return
  1628  		case <-time.After(time.Second + backoff.Do(attempts)):
  1629  			// we have to try again
  1630  			attempts++
  1631  		}
  1632  	}
  1633  }
  1634  
  1635  // Connect connects the network
  1636  func (n *mucpNetwork) Connect() error {
  1637  	n.Lock()
  1638  	defer n.Unlock()
  1639  
  1640  	// connect network tunnel
  1641  	if err := n.tunnel.Connect(); err != nil {
  1642  		return err
  1643  	}
  1644  
  1645  	// return if already connected
  1646  	if n.connected {
  1647  		// initialise the nodes
  1648  		n.initNodes(false)
  1649  		// send the connect message
  1650  		go n.sendConnect()
  1651  		return nil
  1652  	}
  1653  
  1654  	// initialise the nodes
  1655  	n.initNodes(true)
  1656  
  1657  	// set our internal node address
  1658  	// if advertise address is not set
  1659  	if len(n.options.Advertise) == 0 {
  1660  		n.server.Init(server.Advertise(n.tunnel.Address()))
  1661  	}
  1662  
  1663  	// listen on NetworkChannel
  1664  	netListener, err := n.tunnel.Listen(
  1665  		NetworkChannel,
  1666  		tunnel.ListenMode(tunnel.Multicast),
  1667  	)
  1668  	if err != nil {
  1669  		return err
  1670  	}
  1671  
  1672  	// listen on ControlChannel
  1673  	ctrlListener, err := n.tunnel.Listen(
  1674  		ControlChannel,
  1675  		tunnel.ListenMode(tunnel.Multicast),
  1676  	)
  1677  	if err != nil {
  1678  		return err
  1679  	}
  1680  
  1681  	// dial into ControlChannel to send route adverts
  1682  	ctrlClient, err := n.tunnel.Dial(
  1683  		ControlChannel,
  1684  		tunnel.DialMode(tunnel.Multicast),
  1685  	)
  1686  	if err != nil {
  1687  		return err
  1688  	}
  1689  
  1690  	n.tunClient[ControlChannel] = ctrlClient
  1691  
  1692  	// dial into NetworkChannel to send network messages
  1693  	netClient, err := n.tunnel.Dial(
  1694  		NetworkChannel,
  1695  		tunnel.DialMode(tunnel.Multicast),
  1696  	)
  1697  	if err != nil {
  1698  		return err
  1699  	}
  1700  
  1701  	n.tunClient[NetworkChannel] = netClient
  1702  
  1703  	// create closed channel
  1704  	n.closed = make(chan bool)
  1705  
  1706  	// start advertising routes
  1707  	watcher, err := n.options.Router.Watch()
  1708  	if err != nil {
  1709  		return err
  1710  	}
  1711  
  1712  	advertChan, err := watcher.Chan()
  1713  	if err != nil {
  1714  		return err
  1715  	}
  1716  
  1717  	// start the server
  1718  	if err := n.server.Start(); err != nil {
  1719  		return err
  1720  	}
  1721  
  1722  	// advertise service routes
  1723  	go n.advertise(advertChan)
  1724  	// listen to network messages
  1725  	go n.processNetChan(netListener)
  1726  	// accept and process routes
  1727  	go n.processCtrlChan(ctrlListener)
  1728  	// manage connection once links are established
  1729  	go n.connect()
  1730  	// resolve nodes, broadcast announcements and prune stale nodes
  1731  	go n.manage()
  1732  
  1733  	// we're now connected
  1734  	n.connected = true
  1735  
  1736  	return nil
  1737  }
  1738  
  1739  func (n *mucpNetwork) close() error {
  1740  	// stop the server
  1741  	if err := n.server.Stop(); err != nil {
  1742  		return err
  1743  	}
  1744  
  1745  	// close the router
  1746  	if err := n.router.Close(); err != nil {
  1747  		return err
  1748  	}
  1749  
  1750  	// close the tunnel
  1751  	if err := n.tunnel.Close(); err != nil {
  1752  		return err
  1753  	}
  1754  
  1755  	return nil
  1756  }
  1757  
  1758  // createClients is used to create new clients in the event we lose all the tunnels
  1759  func (n *mucpNetwork) createClients() error {
  1760  	// dial into ControlChannel to send route adverts
  1761  	ctrlClient, err := n.tunnel.Dial(ControlChannel, tunnel.DialMode(tunnel.Multicast))
  1762  	if err != nil {
  1763  		return err
  1764  	}
  1765  
  1766  	// dial into NetworkChannel to send network messages
  1767  	netClient, err := n.tunnel.Dial(NetworkChannel, tunnel.DialMode(tunnel.Multicast))
  1768  	if err != nil {
  1769  		return err
  1770  	}
  1771  
  1772  	n.Lock()
  1773  	defer n.Unlock()
  1774  
  1775  	// set the control client
  1776  	c, ok := n.tunClient[ControlChannel]
  1777  	if ok {
  1778  		c.Close()
  1779  	}
  1780  	n.tunClient[ControlChannel] = ctrlClient
  1781  
  1782  	// set the network client
  1783  	c, ok = n.tunClient[NetworkChannel]
  1784  	if ok {
  1785  		c.Close()
  1786  	}
  1787  	n.tunClient[NetworkChannel] = netClient
  1788  
  1789  	return nil
  1790  }
  1791  
  1792  // Close closes network connection
  1793  func (n *mucpNetwork) Close() error {
  1794  	n.Lock()
  1795  
  1796  	if !n.connected {
  1797  		n.Unlock()
  1798  		return nil
  1799  	}
  1800  
  1801  	select {
  1802  	case <-n.closed:
  1803  		n.Unlock()
  1804  		return nil
  1805  	default:
  1806  		close(n.closed)
  1807  
  1808  		// set connected to false
  1809  		n.connected = false
  1810  
  1811  		// unlock the lock otherwise we'll deadlock sending the close
  1812  		n.Unlock()
  1813  
  1814  		msg := &pb.Close{
  1815  			Node: &pb.Node{
  1816  				Id:      n.node.id,
  1817  				Address: n.node.address,
  1818  			},
  1819  		}
  1820  
  1821  		if err := n.sendMsg("close", NetworkChannel, msg); err != nil {
  1822  			if logger.V(logger.DebugLevel, logger.DefaultLogger) {
  1823  				logger.Debugf("Network failed to send close message: %s", err)
  1824  			}
  1825  		}
  1826  		<-time.After(time.Millisecond * 100)
  1827  	}
  1828  
  1829  	return n.close()
  1830  }
  1831  
  1832  // Client returns network client
  1833  func (n *mucpNetwork) Client() client.Client {
  1834  	return n.client
  1835  }
  1836  
  1837  // Server returns network server
  1838  func (n *mucpNetwork) Server() server.Server {
  1839  	return n.server
  1840  }
  1841  
  1842  // RouteToProto encodes route into protobuf and returns it
  1843  func RouteToProto(route router.Route) *pb.Route {
  1844  	return &pb.Route{
  1845  		Service: route.Service,
  1846  		Address: route.Address,
  1847  		Gateway: route.Gateway,
  1848  		Network: route.Network,
  1849  		Router:  route.Router,
  1850  		Link:    route.Link,
  1851  		Metric:  int64(route.Metric),
  1852  	}
  1853  }
  1854  
  1855  // ProtoToRoute decodes protobuf route into router route and returns it
  1856  func ProtoToRoute(route *pb.Route) router.Route {
  1857  	return router.Route{
  1858  		Service: route.Service,
  1859  		Address: route.Address,
  1860  		Gateway: route.Gateway,
  1861  		Network: route.Network,
  1862  		Router:  route.Router,
  1863  		Link:    route.Link,
  1864  		Metric:  route.Metric,
  1865  	}
  1866  }