github.com/Heebron/moby@v0.0.0-20221111184709-6eab4f55faf7/libnetwork/agent.go (about)

     1  package libnetwork
     2  
     3  //go:generate protoc -I.:Godeps/_workspace/src/github.com/gogo/protobuf  --gogo_out=import_path=github.com/docker/docker/libnetwork,Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto:. agent.proto
     4  
     5  import (
     6  	"encoding/json"
     7  	"fmt"
     8  	"net"
     9  	"sort"
    10  	"sync"
    11  
    12  	"github.com/docker/docker/libnetwork/cluster"
    13  	"github.com/docker/docker/libnetwork/datastore"
    14  	"github.com/docker/docker/libnetwork/discoverapi"
    15  	"github.com/docker/docker/libnetwork/driverapi"
    16  	"github.com/docker/docker/libnetwork/networkdb"
    17  	"github.com/docker/docker/libnetwork/types"
    18  	"github.com/docker/go-events"
    19  	"github.com/gogo/protobuf/proto"
    20  	"github.com/sirupsen/logrus"
    21  )
    22  
    23  const (
    24  	subsysGossip = "networking:gossip"
    25  	subsysIPSec  = "networking:ipsec"
    26  	keyringSize  = 3
    27  )
    28  
    29  // ByTime implements sort.Interface for []*types.EncryptionKey based on
    30  // the LamportTime field.
    31  type ByTime []*types.EncryptionKey
    32  
    33  func (b ByTime) Len() int           { return len(b) }
    34  func (b ByTime) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
    35  func (b ByTime) Less(i, j int) bool { return b[i].LamportTime < b[j].LamportTime }
    36  
    37  type agent struct {
    38  	networkDB         *networkdb.NetworkDB
    39  	bindAddr          string
    40  	advertiseAddr     string
    41  	dataPathAddr      string
    42  	coreCancelFuncs   []func()
    43  	driverCancelFuncs map[string][]func()
    44  	sync.Mutex
    45  }
    46  
    47  func (a *agent) dataPathAddress() string {
    48  	a.Lock()
    49  	defer a.Unlock()
    50  	if a.dataPathAddr != "" {
    51  		return a.dataPathAddr
    52  	}
    53  	return a.advertiseAddr
    54  }
    55  
    56  const libnetworkEPTable = "endpoint_table"
    57  
    58  func getBindAddr(ifaceName string) (string, error) {
    59  	iface, err := net.InterfaceByName(ifaceName)
    60  	if err != nil {
    61  		return "", fmt.Errorf("failed to find interface %s: %v", ifaceName, err)
    62  	}
    63  
    64  	addrs, err := iface.Addrs()
    65  	if err != nil {
    66  		return "", fmt.Errorf("failed to get interface addresses: %v", err)
    67  	}
    68  
    69  	for _, a := range addrs {
    70  		addr, ok := a.(*net.IPNet)
    71  		if !ok {
    72  			continue
    73  		}
    74  		addrIP := addr.IP
    75  
    76  		if addrIP.IsLinkLocalUnicast() {
    77  			continue
    78  		}
    79  
    80  		return addrIP.String(), nil
    81  	}
    82  
    83  	return "", fmt.Errorf("failed to get bind address")
    84  }
    85  
    86  func resolveAddr(addrOrInterface string) (string, error) {
    87  	// Try and see if this is a valid IP address
    88  	if net.ParseIP(addrOrInterface) != nil {
    89  		return addrOrInterface, nil
    90  	}
    91  
    92  	addr, err := net.ResolveIPAddr("ip", addrOrInterface)
    93  	if err != nil {
    94  		// If not a valid IP address, it should be a valid interface
    95  		return getBindAddr(addrOrInterface)
    96  	}
    97  	return addr.String(), nil
    98  }
    99  
   100  func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
   101  	drvEnc := discoverapi.DriverEncryptionUpdate{}
   102  
   103  	a := c.getAgent()
   104  	if a == nil {
   105  		logrus.Debug("Skipping key change as agent is nil")
   106  		return nil
   107  	}
   108  
   109  	// Find the deleted key. If the deleted key was the primary key,
   110  	// a new primary key should be set before removing if from keyring.
   111  	c.Lock()
   112  	added := []byte{}
   113  	deleted := []byte{}
   114  	j := len(c.keys)
   115  	for i := 0; i < j; {
   116  		same := false
   117  		for _, key := range keys {
   118  			if same = key.LamportTime == c.keys[i].LamportTime; same {
   119  				break
   120  			}
   121  		}
   122  		if !same {
   123  			cKey := c.keys[i]
   124  			if cKey.Subsystem == subsysGossip {
   125  				deleted = cKey.Key
   126  			}
   127  
   128  			if cKey.Subsystem == subsysIPSec {
   129  				drvEnc.Prune = cKey.Key
   130  				drvEnc.PruneTag = cKey.LamportTime
   131  			}
   132  			c.keys[i], c.keys[j-1] = c.keys[j-1], c.keys[i]
   133  			c.keys[j-1] = nil
   134  			j--
   135  		}
   136  		i++
   137  	}
   138  	c.keys = c.keys[:j]
   139  
   140  	// Find the new key and add it to the key ring
   141  	for _, key := range keys {
   142  		same := false
   143  		for _, cKey := range c.keys {
   144  			if same = cKey.LamportTime == key.LamportTime; same {
   145  				break
   146  			}
   147  		}
   148  		if !same {
   149  			c.keys = append(c.keys, key)
   150  			if key.Subsystem == subsysGossip {
   151  				added = key.Key
   152  			}
   153  
   154  			if key.Subsystem == subsysIPSec {
   155  				drvEnc.Key = key.Key
   156  				drvEnc.Tag = key.LamportTime
   157  			}
   158  		}
   159  	}
   160  	c.Unlock()
   161  
   162  	if len(added) > 0 {
   163  		a.networkDB.SetKey(added)
   164  	}
   165  
   166  	key, _, err := c.getPrimaryKeyTag(subsysGossip)
   167  	if err != nil {
   168  		return err
   169  	}
   170  	a.networkDB.SetPrimaryKey(key)
   171  
   172  	key, tag, err := c.getPrimaryKeyTag(subsysIPSec)
   173  	if err != nil {
   174  		return err
   175  	}
   176  	drvEnc.Primary = key
   177  	drvEnc.PrimaryTag = tag
   178  
   179  	if len(deleted) > 0 {
   180  		a.networkDB.RemoveKey(deleted)
   181  	}
   182  
   183  	c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
   184  		err := driver.DiscoverNew(discoverapi.EncryptionKeysUpdate, drvEnc)
   185  		if err != nil {
   186  			logrus.Warnf("Failed to update datapath keys in driver %s: %v", name, err)
   187  			// Attempt to reconfigure keys in case of a update failure
   188  			// which can arise due to a mismatch of keys
   189  			// if worker nodes get temporarily disconnected
   190  			logrus.Warnf("Reconfiguring datapath keys for  %s", name)
   191  			drvCfgEnc := discoverapi.DriverEncryptionConfig{}
   192  			drvCfgEnc.Keys, drvCfgEnc.Tags = c.getKeys(subsysIPSec)
   193  			err = driver.DiscoverNew(discoverapi.EncryptionKeysConfig, drvCfgEnc)
   194  			if err != nil {
   195  				logrus.Warnf("Failed to reset datapath keys in driver %s: %v", name, err)
   196  			}
   197  		}
   198  		return false
   199  	})
   200  
   201  	return nil
   202  }
   203  
   204  func (c *controller) agentSetup(clusterProvider cluster.Provider) error {
   205  	agent := c.getAgent()
   206  
   207  	// If the agent is already present there is no need to try to initialize it again
   208  	if agent != nil {
   209  		return nil
   210  	}
   211  
   212  	bindAddr := clusterProvider.GetLocalAddress()
   213  	advAddr := clusterProvider.GetAdvertiseAddress()
   214  	dataAddr := clusterProvider.GetDataPathAddress()
   215  	remoteList := clusterProvider.GetRemoteAddressList()
   216  	remoteAddrList := make([]string, 0, len(remoteList))
   217  	for _, remote := range remoteList {
   218  		addr, _, _ := net.SplitHostPort(remote)
   219  		remoteAddrList = append(remoteAddrList, addr)
   220  	}
   221  
   222  	listen := clusterProvider.GetListenAddress()
   223  	listenAddr, _, _ := net.SplitHostPort(listen)
   224  
   225  	logrus.Infof("Initializing Libnetwork Agent Listen-Addr=%s Local-addr=%s Adv-addr=%s Data-addr=%s Remote-addr-list=%v MTU=%d",
   226  		listenAddr, bindAddr, advAddr, dataAddr, remoteAddrList, c.Config().NetworkControlPlaneMTU)
   227  	if advAddr != "" && agent == nil {
   228  		if err := c.agentInit(listenAddr, bindAddr, advAddr, dataAddr); err != nil {
   229  			logrus.Errorf("error in agentInit: %v", err)
   230  			return err
   231  		}
   232  		c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
   233  			if capability.ConnectivityScope == datastore.GlobalScope {
   234  				c.agentDriverNotify(driver)
   235  			}
   236  			return false
   237  		})
   238  	}
   239  
   240  	if len(remoteAddrList) > 0 {
   241  		if err := c.agentJoin(remoteAddrList); err != nil {
   242  			logrus.Errorf("Error in joining gossip cluster : %v(join will be retried in background)", err)
   243  		}
   244  	}
   245  
   246  	return nil
   247  }
   248  
   249  // For a given subsystem getKeys sorts the keys by lamport time and returns
   250  // slice of keys and lamport time which can used as a unique tag for the keys
   251  func (c *controller) getKeys(subsys string) ([][]byte, []uint64) {
   252  	c.Lock()
   253  	defer c.Unlock()
   254  
   255  	sort.Sort(ByTime(c.keys))
   256  
   257  	keys := [][]byte{}
   258  	tags := []uint64{}
   259  	for _, key := range c.keys {
   260  		if key.Subsystem == subsys {
   261  			keys = append(keys, key.Key)
   262  			tags = append(tags, key.LamportTime)
   263  		}
   264  	}
   265  
   266  	keys[0], keys[1] = keys[1], keys[0]
   267  	tags[0], tags[1] = tags[1], tags[0]
   268  	return keys, tags
   269  }
   270  
   271  // getPrimaryKeyTag returns the primary key for a given subsystem from the
   272  // list of sorted key and the associated tag
   273  func (c *controller) getPrimaryKeyTag(subsys string) ([]byte, uint64, error) {
   274  	c.Lock()
   275  	defer c.Unlock()
   276  	sort.Sort(ByTime(c.keys))
   277  	keys := []*types.EncryptionKey{}
   278  	for _, key := range c.keys {
   279  		if key.Subsystem == subsys {
   280  			keys = append(keys, key)
   281  		}
   282  	}
   283  	return keys[1].Key, keys[1].LamportTime, nil
   284  }
   285  
   286  func (c *controller) agentInit(listenAddr, bindAddrOrInterface, advertiseAddr, dataPathAddr string) error {
   287  	bindAddr, err := resolveAddr(bindAddrOrInterface)
   288  	if err != nil {
   289  		return err
   290  	}
   291  
   292  	keys, _ := c.getKeys(subsysGossip)
   293  
   294  	netDBConf := networkdb.DefaultConfig()
   295  	netDBConf.BindAddr = listenAddr
   296  	netDBConf.AdvertiseAddr = advertiseAddr
   297  	netDBConf.Keys = keys
   298  	if c.Config().NetworkControlPlaneMTU != 0 {
   299  		// Consider the MTU remove the IP hdr (IPv4 or IPv6) and the TCP/UDP hdr.
   300  		// To be on the safe side let's cut 100 bytes
   301  		netDBConf.PacketBufferSize = (c.Config().NetworkControlPlaneMTU - 100)
   302  		logrus.Debugf("Control plane MTU: %d will initialize NetworkDB with: %d",
   303  			c.Config().NetworkControlPlaneMTU, netDBConf.PacketBufferSize)
   304  	}
   305  	nDB, err := networkdb.New(netDBConf)
   306  	if err != nil {
   307  		return err
   308  	}
   309  
   310  	// Register the diagnostic handlers
   311  	c.DiagnosticServer.RegisterHandler(nDB, networkdb.NetDbPaths2Func)
   312  
   313  	var cancelList []func()
   314  	ch, cancel := nDB.Watch(libnetworkEPTable, "", "")
   315  	cancelList = append(cancelList, cancel)
   316  	nodeCh, cancel := nDB.Watch(networkdb.NodeTable, "", "")
   317  	cancelList = append(cancelList, cancel)
   318  
   319  	c.Lock()
   320  	c.agent = &agent{
   321  		networkDB:         nDB,
   322  		bindAddr:          bindAddr,
   323  		advertiseAddr:     advertiseAddr,
   324  		dataPathAddr:      dataPathAddr,
   325  		coreCancelFuncs:   cancelList,
   326  		driverCancelFuncs: make(map[string][]func()),
   327  	}
   328  	c.Unlock()
   329  
   330  	go c.handleTableEvents(ch, c.handleEpTableEvent)
   331  	go c.handleTableEvents(nodeCh, c.handleNodeTableEvent)
   332  
   333  	drvEnc := discoverapi.DriverEncryptionConfig{}
   334  	keys, tags := c.getKeys(subsysIPSec)
   335  	drvEnc.Keys = keys
   336  	drvEnc.Tags = tags
   337  
   338  	c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
   339  		err := driver.DiscoverNew(discoverapi.EncryptionKeysConfig, drvEnc)
   340  		if err != nil {
   341  			logrus.Warnf("Failed to set datapath keys in driver %s: %v", name, err)
   342  		}
   343  		return false
   344  	})
   345  
   346  	c.WalkNetworks(joinCluster)
   347  
   348  	return nil
   349  }
   350  
   351  func (c *controller) agentJoin(remoteAddrList []string) error {
   352  	agent := c.getAgent()
   353  	if agent == nil {
   354  		return nil
   355  	}
   356  	return agent.networkDB.Join(remoteAddrList)
   357  }
   358  
   359  func (c *controller) agentDriverNotify(d driverapi.Driver) {
   360  	agent := c.getAgent()
   361  	if agent == nil {
   362  		return
   363  	}
   364  
   365  	if err := d.DiscoverNew(discoverapi.NodeDiscovery, discoverapi.NodeDiscoveryData{
   366  		Address:     agent.dataPathAddress(),
   367  		BindAddress: agent.bindAddr,
   368  		Self:        true,
   369  	}); err != nil {
   370  		logrus.Warnf("Failed the node discovery in driver: %v", err)
   371  	}
   372  
   373  	drvEnc := discoverapi.DriverEncryptionConfig{}
   374  	keys, tags := c.getKeys(subsysIPSec)
   375  	drvEnc.Keys = keys
   376  	drvEnc.Tags = tags
   377  
   378  	if err := d.DiscoverNew(discoverapi.EncryptionKeysConfig, drvEnc); err != nil {
   379  		logrus.Warnf("Failed to set datapath keys in driver: %v", err)
   380  	}
   381  }
   382  
   383  func (c *controller) agentClose() {
   384  	// Acquire current agent instance and reset its pointer
   385  	// then run closing functions
   386  	c.Lock()
   387  	agent := c.agent
   388  	c.agent = nil
   389  	c.Unlock()
   390  
   391  	// when the agent is closed the cluster provider should be cleaned up
   392  	c.SetClusterProvider(nil)
   393  
   394  	if agent == nil {
   395  		return
   396  	}
   397  
   398  	var cancelList []func()
   399  
   400  	agent.Lock()
   401  	for _, cancelFuncs := range agent.driverCancelFuncs {
   402  		cancelList = append(cancelList, cancelFuncs...)
   403  	}
   404  
   405  	// Add also the cancel functions for the network db
   406  	cancelList = append(cancelList, agent.coreCancelFuncs...)
   407  	agent.Unlock()
   408  
   409  	for _, cancel := range cancelList {
   410  		cancel()
   411  	}
   412  
   413  	agent.networkDB.Close()
   414  }
   415  
   416  // Task has the backend container details
   417  type Task struct {
   418  	Name       string
   419  	EndpointID string
   420  	EndpointIP string
   421  	Info       map[string]string
   422  }
   423  
   424  // ServiceInfo has service specific details along with the list of backend tasks
   425  type ServiceInfo struct {
   426  	VIP          string
   427  	LocalLBIndex int
   428  	Tasks        []Task
   429  	Ports        []string
   430  }
   431  
   432  type epRecord struct {
   433  	ep      EndpointRecord
   434  	info    map[string]string
   435  	lbIndex int
   436  }
   437  
   438  func (n *network) Services() map[string]ServiceInfo {
   439  	eps := make(map[string]epRecord)
   440  
   441  	if !n.isClusterEligible() {
   442  		return nil
   443  	}
   444  	agent := n.getController().getAgent()
   445  	if agent == nil {
   446  		return nil
   447  	}
   448  
   449  	// Walk through libnetworkEPTable and fetch the driver agnostic endpoint info
   450  	entries := agent.networkDB.GetTableByNetwork(libnetworkEPTable, n.id)
   451  	for eid, value := range entries {
   452  		var epRec EndpointRecord
   453  		nid := n.ID()
   454  		if err := proto.Unmarshal(value.Value, &epRec); err != nil {
   455  			logrus.Errorf("Unmarshal of libnetworkEPTable failed for endpoint %s in network %s, %v", eid, nid, err)
   456  			continue
   457  		}
   458  		i := n.getController().getLBIndex(epRec.ServiceID, nid, epRec.IngressPorts)
   459  		eps[eid] = epRecord{
   460  			ep:      epRec,
   461  			lbIndex: i,
   462  		}
   463  	}
   464  
   465  	// Walk through the driver's tables, have the driver decode the entries
   466  	// and return the tuple {ep ID, value}. value is a string that coveys
   467  	// relevant info about the endpoint.
   468  	d, err := n.driver(true)
   469  	if err != nil {
   470  		logrus.Errorf("Could not resolve driver for network %s/%s while fetching services: %v", n.networkType, n.ID(), err)
   471  		return nil
   472  	}
   473  	for _, table := range n.driverTables {
   474  		if table.objType != driverapi.EndpointObject {
   475  			continue
   476  		}
   477  		entries := agent.networkDB.GetTableByNetwork(table.name, n.id)
   478  		for key, value := range entries {
   479  			epID, info := d.DecodeTableEntry(table.name, key, value.Value)
   480  			if ep, ok := eps[epID]; !ok {
   481  				logrus.Errorf("Inconsistent driver and libnetwork state for endpoint %s", epID)
   482  			} else {
   483  				ep.info = info
   484  				eps[epID] = ep
   485  			}
   486  		}
   487  	}
   488  
   489  	// group the endpoints into a map keyed by the service name
   490  	sinfo := make(map[string]ServiceInfo)
   491  	for ep, epr := range eps {
   492  		var (
   493  			s  ServiceInfo
   494  			ok bool
   495  		)
   496  		if s, ok = sinfo[epr.ep.ServiceName]; !ok {
   497  			s = ServiceInfo{
   498  				VIP:          epr.ep.VirtualIP,
   499  				LocalLBIndex: epr.lbIndex,
   500  			}
   501  		}
   502  		ports := []string{}
   503  		if s.Ports == nil {
   504  			for _, port := range epr.ep.IngressPorts {
   505  				p := fmt.Sprintf("Target: %d, Publish: %d", port.TargetPort, port.PublishedPort)
   506  				ports = append(ports, p)
   507  			}
   508  			s.Ports = ports
   509  		}
   510  		s.Tasks = append(s.Tasks, Task{
   511  			Name:       epr.ep.Name,
   512  			EndpointID: ep,
   513  			EndpointIP: epr.ep.EndpointIP,
   514  			Info:       epr.info,
   515  		})
   516  		sinfo[epr.ep.ServiceName] = s
   517  	}
   518  	return sinfo
   519  }
   520  
   521  func (n *network) isClusterEligible() bool {
   522  	if n.scope != datastore.SwarmScope || !n.driverIsMultihost() {
   523  		return false
   524  	}
   525  	return n.getController().getAgent() != nil
   526  }
   527  
   528  func (n *network) joinCluster() error {
   529  	if !n.isClusterEligible() {
   530  		return nil
   531  	}
   532  
   533  	agent := n.getController().getAgent()
   534  	if agent == nil {
   535  		return nil
   536  	}
   537  
   538  	return agent.networkDB.JoinNetwork(n.ID())
   539  }
   540  
   541  func (n *network) leaveCluster() error {
   542  	if !n.isClusterEligible() {
   543  		return nil
   544  	}
   545  
   546  	agent := n.getController().getAgent()
   547  	if agent == nil {
   548  		return nil
   549  	}
   550  
   551  	return agent.networkDB.LeaveNetwork(n.ID())
   552  }
   553  
   554  func (ep *endpoint) addDriverInfoToCluster() error {
   555  	n := ep.getNetwork()
   556  	if !n.isClusterEligible() {
   557  		return nil
   558  	}
   559  	if ep.joinInfo == nil {
   560  		return nil
   561  	}
   562  
   563  	agent := n.getController().getAgent()
   564  	if agent == nil {
   565  		return nil
   566  	}
   567  
   568  	for _, te := range ep.joinInfo.driverTableEntries {
   569  		if err := agent.networkDB.CreateEntry(te.tableName, n.ID(), te.key, te.value); err != nil {
   570  			return err
   571  		}
   572  	}
   573  	return nil
   574  }
   575  
   576  func (ep *endpoint) deleteDriverInfoFromCluster() error {
   577  	n := ep.getNetwork()
   578  	if !n.isClusterEligible() {
   579  		return nil
   580  	}
   581  	if ep.joinInfo == nil {
   582  		return nil
   583  	}
   584  
   585  	agent := n.getController().getAgent()
   586  	if agent == nil {
   587  		return nil
   588  	}
   589  
   590  	for _, te := range ep.joinInfo.driverTableEntries {
   591  		if err := agent.networkDB.DeleteEntry(te.tableName, n.ID(), te.key); err != nil {
   592  			return err
   593  		}
   594  	}
   595  	return nil
   596  }
   597  
   598  func (ep *endpoint) addServiceInfoToCluster(sb *sandbox) error {
   599  	if ep.isAnonymous() && len(ep.myAliases) == 0 || ep.Iface() == nil || ep.Iface().Address() == nil {
   600  		return nil
   601  	}
   602  
   603  	n := ep.getNetwork()
   604  	if !n.isClusterEligible() {
   605  		return nil
   606  	}
   607  
   608  	sb.Service.Lock()
   609  	defer sb.Service.Unlock()
   610  	logrus.Debugf("addServiceInfoToCluster START for %s %s", ep.svcName, ep.ID())
   611  
   612  	// Check that the endpoint is still present on the sandbox before adding it to the service discovery.
   613  	// This is to handle a race between the EnableService and the sbLeave
   614  	// It is possible that the EnableService starts, fetches the list of the endpoints and
   615  	// by the time the addServiceInfoToCluster is called the endpoint got removed from the sandbox
   616  	// The risk is that the deleteServiceInfoToCluster happens before the addServiceInfoToCluster.
   617  	// This check under the Service lock of the sandbox ensure the correct behavior.
   618  	// If the addServiceInfoToCluster arrives first may find or not the endpoint and will proceed or exit
   619  	// but in any case the deleteServiceInfoToCluster will follow doing the cleanup if needed.
   620  	// In case the deleteServiceInfoToCluster arrives first, this one is happening after the endpoint is
   621  	// removed from the list, in this situation the delete will bail out not finding any data to cleanup
   622  	// and the add will bail out not finding the endpoint on the sandbox.
   623  	if e := sb.getEndpoint(ep.ID()); e == nil {
   624  		logrus.Warnf("addServiceInfoToCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID())
   625  		return nil
   626  	}
   627  
   628  	c := n.getController()
   629  	agent := c.getAgent()
   630  
   631  	name := ep.Name()
   632  	if ep.isAnonymous() {
   633  		name = ep.MyAliases()[0]
   634  	}
   635  
   636  	var ingressPorts []*PortConfig
   637  	if ep.svcID != "" {
   638  		// This is a task part of a service
   639  		// Gossip ingress ports only in ingress network.
   640  		if n.ingress {
   641  			ingressPorts = ep.ingressPorts
   642  		}
   643  		if err := c.addServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), name, ep.virtualIP, ingressPorts, ep.svcAliases, ep.myAliases, ep.Iface().Address().IP, "addServiceInfoToCluster"); err != nil {
   644  			return err
   645  		}
   646  	} else {
   647  		// This is a container simply attached to an attachable network
   648  		if err := c.addContainerNameResolution(n.ID(), ep.ID(), name, ep.myAliases, ep.Iface().Address().IP, "addServiceInfoToCluster"); err != nil {
   649  			return err
   650  		}
   651  	}
   652  
   653  	buf, err := proto.Marshal(&EndpointRecord{
   654  		Name:            name,
   655  		ServiceName:     ep.svcName,
   656  		ServiceID:       ep.svcID,
   657  		VirtualIP:       ep.virtualIP.String(),
   658  		IngressPorts:    ingressPorts,
   659  		Aliases:         ep.svcAliases,
   660  		TaskAliases:     ep.myAliases,
   661  		EndpointIP:      ep.Iface().Address().IP.String(),
   662  		ServiceDisabled: false,
   663  	})
   664  	if err != nil {
   665  		return err
   666  	}
   667  
   668  	if agent != nil {
   669  		if err := agent.networkDB.CreateEntry(libnetworkEPTable, n.ID(), ep.ID(), buf); err != nil {
   670  			logrus.Warnf("addServiceInfoToCluster NetworkDB CreateEntry failed for %s %s err:%s", ep.id, n.id, err)
   671  			return err
   672  		}
   673  	}
   674  
   675  	logrus.Debugf("addServiceInfoToCluster END for %s %s", ep.svcName, ep.ID())
   676  
   677  	return nil
   678  }
   679  
   680  func (ep *endpoint) deleteServiceInfoFromCluster(sb *sandbox, fullRemove bool, method string) error {
   681  	if ep.isAnonymous() && len(ep.myAliases) == 0 {
   682  		return nil
   683  	}
   684  
   685  	n := ep.getNetwork()
   686  	if !n.isClusterEligible() {
   687  		return nil
   688  	}
   689  
   690  	sb.Service.Lock()
   691  	defer sb.Service.Unlock()
   692  	logrus.Debugf("deleteServiceInfoFromCluster from %s START for %s %s", method, ep.svcName, ep.ID())
   693  
   694  	// Avoid a race w/ with a container that aborts preemptively.  This would
   695  	// get caught in disableServceInNetworkDB, but we check here to make the
   696  	// nature of the condition more clear.
   697  	// See comment in addServiceInfoToCluster()
   698  	if e := sb.getEndpoint(ep.ID()); e == nil {
   699  		logrus.Warnf("deleteServiceInfoFromCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID())
   700  		return nil
   701  	}
   702  
   703  	c := n.getController()
   704  	agent := c.getAgent()
   705  
   706  	name := ep.Name()
   707  	if ep.isAnonymous() {
   708  		name = ep.MyAliases()[0]
   709  	}
   710  
   711  	if agent != nil {
   712  		// First update the networkDB then locally
   713  		if fullRemove {
   714  			if err := agent.networkDB.DeleteEntry(libnetworkEPTable, n.ID(), ep.ID()); err != nil {
   715  				logrus.Warnf("deleteServiceInfoFromCluster NetworkDB DeleteEntry failed for %s %s err:%s", ep.id, n.id, err)
   716  			}
   717  		} else {
   718  			disableServiceInNetworkDB(agent, n, ep)
   719  		}
   720  	}
   721  
   722  	if ep.Iface() != nil && ep.Iface().Address() != nil {
   723  		if ep.svcID != "" {
   724  			// This is a task part of a service
   725  			var ingressPorts []*PortConfig
   726  			if n.ingress {
   727  				ingressPorts = ep.ingressPorts
   728  			}
   729  			if err := c.rmServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), name, ep.virtualIP, ingressPorts, ep.svcAliases, ep.myAliases, ep.Iface().Address().IP, "deleteServiceInfoFromCluster", true, fullRemove); err != nil {
   730  				return err
   731  			}
   732  		} else {
   733  			// This is a container simply attached to an attachable network
   734  			if err := c.delContainerNameResolution(n.ID(), ep.ID(), name, ep.myAliases, ep.Iface().Address().IP, "deleteServiceInfoFromCluster"); err != nil {
   735  				return err
   736  			}
   737  		}
   738  	}
   739  
   740  	logrus.Debugf("deleteServiceInfoFromCluster from %s END for %s %s", method, ep.svcName, ep.ID())
   741  
   742  	return nil
   743  }
   744  
   745  func disableServiceInNetworkDB(a *agent, n *network, ep *endpoint) {
   746  	var epRec EndpointRecord
   747  
   748  	logrus.Debugf("disableServiceInNetworkDB for %s %s", ep.svcName, ep.ID())
   749  
   750  	// Update existing record to indicate that the service is disabled
   751  	inBuf, err := a.networkDB.GetEntry(libnetworkEPTable, n.ID(), ep.ID())
   752  	if err != nil {
   753  		logrus.Warnf("disableServiceInNetworkDB GetEntry failed for %s %s err:%s", ep.id, n.id, err)
   754  		return
   755  	}
   756  	// Should never fail
   757  	if err := proto.Unmarshal(inBuf, &epRec); err != nil {
   758  		logrus.Errorf("disableServiceInNetworkDB unmarshal failed for %s %s err:%s", ep.id, n.id, err)
   759  		return
   760  	}
   761  	epRec.ServiceDisabled = true
   762  	// Should never fail
   763  	outBuf, err := proto.Marshal(&epRec)
   764  	if err != nil {
   765  		logrus.Errorf("disableServiceInNetworkDB marshalling failed for %s %s err:%s", ep.id, n.id, err)
   766  		return
   767  	}
   768  	// Send update to the whole cluster
   769  	if err := a.networkDB.UpdateEntry(libnetworkEPTable, n.ID(), ep.ID(), outBuf); err != nil {
   770  		logrus.Warnf("disableServiceInNetworkDB UpdateEntry failed for %s %s err:%s", ep.id, n.id, err)
   771  	}
   772  }
   773  
   774  func (n *network) addDriverWatches() {
   775  	if !n.isClusterEligible() {
   776  		return
   777  	}
   778  
   779  	c := n.getController()
   780  	agent := c.getAgent()
   781  	if agent == nil {
   782  		return
   783  	}
   784  	for _, table := range n.driverTables {
   785  		ch, cancel := agent.networkDB.Watch(table.name, n.ID(), "")
   786  		agent.Lock()
   787  		agent.driverCancelFuncs[n.ID()] = append(agent.driverCancelFuncs[n.ID()], cancel)
   788  		agent.Unlock()
   789  		go c.handleTableEvents(ch, n.handleDriverTableEvent)
   790  		d, err := n.driver(false)
   791  		if err != nil {
   792  			logrus.Errorf("Could not resolve driver %s while walking driver tabl: %v", n.networkType, err)
   793  			return
   794  		}
   795  
   796  		err = agent.networkDB.WalkTable(table.name, func(nid, key string, value []byte, deleted bool) bool {
   797  			// skip the entries that are mark for deletion, this is safe because this function is
   798  			// called at initialization time so there is no state to delete
   799  			if nid == n.ID() && !deleted {
   800  				d.EventNotify(driverapi.Create, nid, table.name, key, value)
   801  			}
   802  			return false
   803  		})
   804  		if err != nil {
   805  			logrus.WithError(err).Warn("Error while walking networkdb")
   806  		}
   807  	}
   808  }
   809  
   810  func (n *network) cancelDriverWatches() {
   811  	if !n.isClusterEligible() {
   812  		return
   813  	}
   814  
   815  	agent := n.getController().getAgent()
   816  	if agent == nil {
   817  		return
   818  	}
   819  
   820  	agent.Lock()
   821  	cancelFuncs := agent.driverCancelFuncs[n.ID()]
   822  	delete(agent.driverCancelFuncs, n.ID())
   823  	agent.Unlock()
   824  
   825  	for _, cancel := range cancelFuncs {
   826  		cancel()
   827  	}
   828  }
   829  
   830  func (c *controller) handleTableEvents(ch *events.Channel, fn func(events.Event)) {
   831  	for {
   832  		select {
   833  		case ev := <-ch.C:
   834  			fn(ev)
   835  		case <-ch.Done():
   836  			return
   837  		}
   838  	}
   839  }
   840  
   841  func (n *network) handleDriverTableEvent(ev events.Event) {
   842  	d, err := n.driver(false)
   843  	if err != nil {
   844  		logrus.Errorf("Could not resolve driver %s while handling driver table event: %v", n.networkType, err)
   845  		return
   846  	}
   847  
   848  	var (
   849  		etype driverapi.EventType
   850  		tname string
   851  		key   string
   852  		value []byte
   853  	)
   854  
   855  	switch event := ev.(type) {
   856  	case networkdb.CreateEvent:
   857  		tname = event.Table
   858  		key = event.Key
   859  		value = event.Value
   860  		etype = driverapi.Create
   861  	case networkdb.DeleteEvent:
   862  		tname = event.Table
   863  		key = event.Key
   864  		value = event.Value
   865  		etype = driverapi.Delete
   866  	case networkdb.UpdateEvent:
   867  		tname = event.Table
   868  		key = event.Key
   869  		value = event.Value
   870  		etype = driverapi.Delete
   871  	}
   872  
   873  	d.EventNotify(etype, n.ID(), tname, key, value)
   874  }
   875  
   876  func (c *controller) handleNodeTableEvent(ev events.Event) {
   877  	var (
   878  		value    []byte
   879  		isAdd    bool
   880  		nodeAddr networkdb.NodeAddr
   881  	)
   882  	switch event := ev.(type) {
   883  	case networkdb.CreateEvent:
   884  		value = event.Value
   885  		isAdd = true
   886  	case networkdb.DeleteEvent:
   887  		value = event.Value
   888  	case networkdb.UpdateEvent:
   889  		logrus.Errorf("Unexpected update node table event = %#v", event)
   890  	}
   891  
   892  	err := json.Unmarshal(value, &nodeAddr)
   893  	if err != nil {
   894  		logrus.Errorf("Error unmarshalling node table event %v", err)
   895  		return
   896  	}
   897  	c.processNodeDiscovery([]net.IP{nodeAddr.Addr}, isAdd)
   898  }
   899  
   900  func (c *controller) handleEpTableEvent(ev events.Event) {
   901  	var (
   902  		nid   string
   903  		eid   string
   904  		value []byte
   905  		epRec EndpointRecord
   906  	)
   907  
   908  	switch event := ev.(type) {
   909  	case networkdb.CreateEvent:
   910  		nid = event.NetworkID
   911  		eid = event.Key
   912  		value = event.Value
   913  	case networkdb.DeleteEvent:
   914  		nid = event.NetworkID
   915  		eid = event.Key
   916  		value = event.Value
   917  	case networkdb.UpdateEvent:
   918  		nid = event.NetworkID
   919  		eid = event.Key
   920  		value = event.Value
   921  	default:
   922  		logrus.Errorf("Unexpected update service table event = %#v", event)
   923  		return
   924  	}
   925  
   926  	err := proto.Unmarshal(value, &epRec)
   927  	if err != nil {
   928  		logrus.Errorf("Failed to unmarshal service table value: %v", err)
   929  		return
   930  	}
   931  
   932  	containerName := epRec.Name
   933  	svcName := epRec.ServiceName
   934  	svcID := epRec.ServiceID
   935  	vip := net.ParseIP(epRec.VirtualIP)
   936  	ip := net.ParseIP(epRec.EndpointIP)
   937  	ingressPorts := epRec.IngressPorts
   938  	serviceAliases := epRec.Aliases
   939  	taskAliases := epRec.TaskAliases
   940  
   941  	if containerName == "" || ip == nil {
   942  		logrus.Errorf("Invalid endpoint name/ip received while handling service table event %s", value)
   943  		return
   944  	}
   945  
   946  	switch ev.(type) {
   947  	case networkdb.CreateEvent:
   948  		logrus.Debugf("handleEpTableEvent ADD %s R:%v", eid, epRec)
   949  		if svcID != "" {
   950  			// This is a remote task part of a service
   951  			if err := c.addServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent"); err != nil {
   952  				logrus.Errorf("failed adding service binding for %s epRec:%v err:%v", eid, epRec, err)
   953  				return
   954  			}
   955  		} else {
   956  			// This is a remote container simply attached to an attachable network
   957  			if err := c.addContainerNameResolution(nid, eid, containerName, taskAliases, ip, "handleEpTableEvent"); err != nil {
   958  				logrus.Errorf("failed adding container name resolution for %s epRec:%v err:%v", eid, epRec, err)
   959  			}
   960  		}
   961  
   962  	case networkdb.DeleteEvent:
   963  		logrus.Debugf("handleEpTableEvent DEL %s R:%v", eid, epRec)
   964  		if svcID != "" {
   965  			// This is a remote task part of a service
   966  			if err := c.rmServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent", true, true); err != nil {
   967  				logrus.Errorf("failed removing service binding for %s epRec:%v err:%v", eid, epRec, err)
   968  				return
   969  			}
   970  		} else {
   971  			// This is a remote container simply attached to an attachable network
   972  			if err := c.delContainerNameResolution(nid, eid, containerName, taskAliases, ip, "handleEpTableEvent"); err != nil {
   973  				logrus.Errorf("failed removing container name resolution for %s epRec:%v err:%v", eid, epRec, err)
   974  			}
   975  		}
   976  	case networkdb.UpdateEvent:
   977  		logrus.Debugf("handleEpTableEvent UPD %s R:%v", eid, epRec)
   978  		// We currently should only get these to inform us that an endpoint
   979  		// is disabled.  Report if otherwise.
   980  		if svcID == "" || !epRec.ServiceDisabled {
   981  			logrus.Errorf("Unexpected update table event for %s epRec:%v", eid, epRec)
   982  			return
   983  		}
   984  		// This is a remote task that is part of a service that is now disabled
   985  		if err := c.rmServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent", true, false); err != nil {
   986  			logrus.Errorf("failed disabling service binding for %s epRec:%v err:%v", eid, epRec, err)
   987  			return
   988  		}
   989  	}
   990  }