github.com/tonistiigi/docker@v0.10.1-0.20240229224939-974013b0dc6a/libnetwork/agent.go (about) 1 package libnetwork 2 3 //go:generate protoc -I=. -I=../vendor/ --gogofaster_out=import_path=github.com/docker/docker/libnetwork:. agent.proto 4 5 import ( 6 "context" 7 "encoding/json" 8 "fmt" 9 "net" 10 "sort" 11 "sync" 12 13 "github.com/containerd/log" 14 "github.com/docker/docker/libnetwork/cluster" 15 "github.com/docker/docker/libnetwork/discoverapi" 16 "github.com/docker/docker/libnetwork/driverapi" 17 "github.com/docker/docker/libnetwork/networkdb" 18 "github.com/docker/docker/libnetwork/scope" 19 "github.com/docker/docker/libnetwork/types" 20 "github.com/docker/go-events" 21 "github.com/gogo/protobuf/proto" 22 ) 23 24 const ( 25 subsysGossip = "networking:gossip" 26 subsysIPSec = "networking:ipsec" 27 keyringSize = 3 28 ) 29 30 // ByTime implements sort.Interface for []*types.EncryptionKey based on 31 // the LamportTime field. 32 type ByTime []*types.EncryptionKey 33 34 func (b ByTime) Len() int { return len(b) } 35 func (b ByTime) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 36 func (b ByTime) Less(i, j int) bool { return b[i].LamportTime < b[j].LamportTime } 37 38 type nwAgent struct { 39 networkDB *networkdb.NetworkDB 40 bindAddr net.IP 41 advertiseAddr string 42 dataPathAddr string 43 coreCancelFuncs []func() 44 driverCancelFuncs map[string][]func() 45 mu sync.Mutex 46 } 47 48 func (a *nwAgent) dataPathAddress() string { 49 a.mu.Lock() 50 defer a.mu.Unlock() 51 if a.dataPathAddr != "" { 52 return a.dataPathAddr 53 } 54 return a.advertiseAddr 55 } 56 57 const libnetworkEPTable = "endpoint_table" 58 59 func getBindAddr(ifaceName string) (net.IP, error) { 60 iface, err := net.InterfaceByName(ifaceName) 61 if err != nil { 62 return nil, fmt.Errorf("failed to find interface %s: %v", ifaceName, err) 63 } 64 65 addrs, err := iface.Addrs() 66 if err != nil { 67 return nil, fmt.Errorf("failed to get interface addresses: %v", err) 68 } 69 70 for _, a := range addrs { 71 addr, ok := a.(*net.IPNet) 72 if !ok { 73 continue 74 } 75 addrIP := addr.IP 76 77 if addrIP.IsLinkLocalUnicast() { 78 continue 79 } 80 81 return addrIP, nil 82 } 83 84 return nil, fmt.Errorf("failed to get bind address") 85 } 86 87 // resolveAddr resolves the given address, which can be one of, and 88 // parsed in the following order or priority: 89 // 90 // - a well-formed IP-address 91 // - a hostname 92 // - an interface-name 93 func resolveAddr(addrOrInterface string) (net.IP, error) { 94 // Try and see if this is a valid IP address 95 if ip := net.ParseIP(addrOrInterface); ip != nil { 96 return ip, nil 97 } 98 99 // If not a valid IP address, it could be a hostname. 100 addr, err := net.ResolveIPAddr("ip", addrOrInterface) 101 if err != nil { 102 // If hostname lookup failed, try to look for an interface with the given name. 103 return getBindAddr(addrOrInterface) 104 } 105 return addr.IP, nil 106 } 107 108 func (c *Controller) handleKeyChange(keys []*types.EncryptionKey) error { 109 drvEnc := discoverapi.DriverEncryptionUpdate{} 110 111 agent := c.getAgent() 112 if agent == nil { 113 log.G(context.TODO()).Debug("Skipping key change as agent is nil") 114 return nil 115 } 116 117 // Find the deleted key. If the deleted key was the primary key, 118 // a new primary key should be set before removing if from keyring. 119 c.mu.Lock() 120 added := []byte{} 121 deleted := []byte{} 122 j := len(c.keys) 123 for i := 0; i < j; { 124 same := false 125 for _, key := range keys { 126 if same = key.LamportTime == c.keys[i].LamportTime; same { 127 break 128 } 129 } 130 if !same { 131 cKey := c.keys[i] 132 if cKey.Subsystem == subsysGossip { 133 deleted = cKey.Key 134 } 135 136 if cKey.Subsystem == subsysIPSec { 137 drvEnc.Prune = cKey.Key 138 drvEnc.PruneTag = cKey.LamportTime 139 } 140 c.keys[i], c.keys[j-1] = c.keys[j-1], c.keys[i] 141 c.keys[j-1] = nil 142 j-- 143 } 144 i++ 145 } 146 c.keys = c.keys[:j] 147 148 // Find the new key and add it to the key ring 149 for _, key := range keys { 150 same := false 151 for _, cKey := range c.keys { 152 if same = cKey.LamportTime == key.LamportTime; same { 153 break 154 } 155 } 156 if !same { 157 c.keys = append(c.keys, key) 158 if key.Subsystem == subsysGossip { 159 added = key.Key 160 } 161 162 if key.Subsystem == subsysIPSec { 163 drvEnc.Key = key.Key 164 drvEnc.Tag = key.LamportTime 165 } 166 } 167 } 168 c.mu.Unlock() 169 170 if len(added) > 0 { 171 agent.networkDB.SetKey(added) 172 } 173 174 key, _, err := c.getPrimaryKeyTag(subsysGossip) 175 if err != nil { 176 return err 177 } 178 agent.networkDB.SetPrimaryKey(key) 179 180 key, tag, err := c.getPrimaryKeyTag(subsysIPSec) 181 if err != nil { 182 return err 183 } 184 drvEnc.Primary = key 185 drvEnc.PrimaryTag = tag 186 187 if len(deleted) > 0 { 188 agent.networkDB.RemoveKey(deleted) 189 } 190 191 c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool { 192 dr, ok := driver.(discoverapi.Discover) 193 if !ok { 194 return false 195 } 196 if err := dr.DiscoverNew(discoverapi.EncryptionKeysUpdate, drvEnc); err != nil { 197 log.G(context.TODO()).Warnf("Failed to update datapath keys in driver %s: %v", name, err) 198 // Attempt to reconfigure keys in case of a update failure 199 // which can arise due to a mismatch of keys 200 // if worker nodes get temporarily disconnected 201 log.G(context.TODO()).Warnf("Reconfiguring datapath keys for %s", name) 202 drvCfgEnc := discoverapi.DriverEncryptionConfig{} 203 drvCfgEnc.Keys, drvCfgEnc.Tags = c.getKeys(subsysIPSec) 204 err = dr.DiscoverNew(discoverapi.EncryptionKeysConfig, drvCfgEnc) 205 if err != nil { 206 log.G(context.TODO()).Warnf("Failed to reset datapath keys in driver %s: %v", name, err) 207 } 208 } 209 return false 210 }) 211 212 return nil 213 } 214 215 func (c *Controller) agentSetup(clusterProvider cluster.Provider) error { 216 agent := c.getAgent() 217 if agent != nil { 218 // agent is already present, so there is no need initialize it again. 219 return nil 220 } 221 222 bindAddr := clusterProvider.GetLocalAddress() 223 advAddr := clusterProvider.GetAdvertiseAddress() 224 dataAddr := clusterProvider.GetDataPathAddress() 225 remoteList := clusterProvider.GetRemoteAddressList() 226 remoteAddrList := make([]string, 0, len(remoteList)) 227 for _, remote := range remoteList { 228 addr, _, _ := net.SplitHostPort(remote) 229 remoteAddrList = append(remoteAddrList, addr) 230 } 231 232 listen := clusterProvider.GetListenAddress() 233 listenAddr, _, _ := net.SplitHostPort(listen) 234 235 log.G(context.TODO()).WithFields(log.Fields{ 236 "listen-addr": listenAddr, 237 "local-addr": bindAddr, 238 "advertise-addr": advAddr, 239 "data-path-addr": dataAddr, 240 "remote-addr-list": remoteAddrList, 241 "network-control-plane-mtu": c.Config().NetworkControlPlaneMTU, 242 }).Info("Initializing Libnetwork Agent") 243 if advAddr != "" { 244 if err := c.agentInit(listenAddr, bindAddr, advAddr, dataAddr); err != nil { 245 log.G(context.TODO()).WithError(err).Errorf("Error in agentInit") 246 return err 247 } 248 c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool { 249 if capability.ConnectivityScope == scope.Global { 250 if d, ok := driver.(discoverapi.Discover); ok { 251 c.agentDriverNotify(d) 252 } 253 } 254 return false 255 }) 256 } 257 258 if len(remoteAddrList) > 0 { 259 if err := c.agentJoin(remoteAddrList); err != nil { 260 log.G(context.TODO()).WithError(err).Error("Error in joining gossip cluster: join will be retried in background") 261 } 262 } 263 264 return nil 265 } 266 267 // For a given subsystem getKeys sorts the keys by lamport time and returns 268 // slice of keys and lamport time which can used as a unique tag for the keys 269 func (c *Controller) getKeys(subsystem string) (keys [][]byte, tags []uint64) { 270 c.mu.Lock() 271 defer c.mu.Unlock() 272 273 sort.Sort(ByTime(c.keys)) 274 275 keys = make([][]byte, 0, len(c.keys)) 276 tags = make([]uint64, 0, len(c.keys)) 277 for _, key := range c.keys { 278 if key.Subsystem == subsystem { 279 keys = append(keys, key.Key) 280 tags = append(tags, key.LamportTime) 281 } 282 } 283 284 if len(keys) > 1 { 285 // TODO(thaJeztah): why are we swapping order here? This code was added in https://github.com/moby/libnetwork/commit/e83d68b7d1fd9c479120914024242238f791b4dc 286 keys[0], keys[1] = keys[1], keys[0] 287 tags[0], tags[1] = tags[1], tags[0] 288 } 289 return keys, tags 290 } 291 292 // getPrimaryKeyTag returns the primary key for a given subsystem from the 293 // list of sorted key and the associated tag 294 func (c *Controller) getPrimaryKeyTag(subsystem string) (key []byte, lamportTime uint64, _ error) { 295 c.mu.Lock() 296 defer c.mu.Unlock() 297 sort.Sort(ByTime(c.keys)) 298 keys := make([]*types.EncryptionKey, 0, len(c.keys)) 299 for _, k := range c.keys { 300 if k.Subsystem == subsystem { 301 keys = append(keys, k) 302 } 303 } 304 if len(keys) < 2 { 305 return nil, 0, fmt.Errorf("no primary key found for %s subsystem: %d keys found on controller, expected at least 2", subsystem, len(keys)) 306 } 307 return keys[1].Key, keys[1].LamportTime, nil 308 } 309 310 func (c *Controller) agentInit(listenAddr, bindAddrOrInterface, advertiseAddr, dataPathAddr string) error { 311 bindAddr, err := resolveAddr(bindAddrOrInterface) 312 if err != nil { 313 return err 314 } 315 316 keys, _ := c.getKeys(subsysGossip) 317 318 netDBConf := networkdb.DefaultConfig() 319 netDBConf.BindAddr = listenAddr 320 netDBConf.AdvertiseAddr = advertiseAddr 321 netDBConf.Keys = keys 322 if c.Config().NetworkControlPlaneMTU != 0 { 323 // Consider the MTU remove the IP hdr (IPv4 or IPv6) and the TCP/UDP hdr. 324 // To be on the safe side let's cut 100 bytes 325 netDBConf.PacketBufferSize = (c.Config().NetworkControlPlaneMTU - 100) 326 log.G(context.TODO()).Debugf("Control plane MTU: %d will initialize NetworkDB with: %d", 327 c.Config().NetworkControlPlaneMTU, netDBConf.PacketBufferSize) 328 } 329 nDB, err := networkdb.New(netDBConf) 330 if err != nil { 331 return err 332 } 333 334 // Register the diagnostic handlers 335 nDB.RegisterDiagnosticHandlers(c.DiagnosticServer) 336 337 var cancelList []func() 338 ch, cancel := nDB.Watch(libnetworkEPTable, "") 339 cancelList = append(cancelList, cancel) 340 nodeCh, cancel := nDB.Watch(networkdb.NodeTable, "") 341 cancelList = append(cancelList, cancel) 342 343 c.mu.Lock() 344 c.agent = &nwAgent{ 345 networkDB: nDB, 346 bindAddr: bindAddr, 347 advertiseAddr: advertiseAddr, 348 dataPathAddr: dataPathAddr, 349 coreCancelFuncs: cancelList, 350 driverCancelFuncs: make(map[string][]func()), 351 } 352 c.mu.Unlock() 353 354 go c.handleTableEvents(ch, c.handleEpTableEvent) 355 go c.handleTableEvents(nodeCh, c.handleNodeTableEvent) 356 357 keys, tags := c.getKeys(subsysIPSec) 358 c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool { 359 if dr, ok := driver.(discoverapi.Discover); ok { 360 if err := dr.DiscoverNew(discoverapi.EncryptionKeysConfig, discoverapi.DriverEncryptionConfig{ 361 Keys: keys, 362 Tags: tags, 363 }); err != nil { 364 log.G(context.TODO()).Warnf("Failed to set datapath keys in driver %s: %v", name, err) 365 } 366 } 367 return false 368 }) 369 370 c.WalkNetworks(joinCluster) 371 372 return nil 373 } 374 375 func (c *Controller) agentJoin(remoteAddrList []string) error { 376 agent := c.getAgent() 377 if agent == nil { 378 return nil 379 } 380 return agent.networkDB.Join(remoteAddrList) 381 } 382 383 func (c *Controller) agentDriverNotify(d discoverapi.Discover) { 384 agent := c.getAgent() 385 if agent == nil { 386 return 387 } 388 389 if err := d.DiscoverNew(discoverapi.NodeDiscovery, discoverapi.NodeDiscoveryData{ 390 Address: agent.dataPathAddress(), 391 BindAddress: agent.bindAddr.String(), 392 Self: true, 393 }); err != nil { 394 log.G(context.TODO()).Warnf("Failed the node discovery in driver: %v", err) 395 } 396 397 keys, tags := c.getKeys(subsysIPSec) 398 if err := d.DiscoverNew(discoverapi.EncryptionKeysConfig, discoverapi.DriverEncryptionConfig{ 399 Keys: keys, 400 Tags: tags, 401 }); err != nil { 402 log.G(context.TODO()).Warnf("Failed to set datapath keys in driver: %v", err) 403 } 404 } 405 406 func (c *Controller) agentClose() { 407 // Acquire current agent instance and reset its pointer 408 // then run closing functions 409 c.mu.Lock() 410 agent := c.agent 411 c.agent = nil 412 c.mu.Unlock() 413 414 // when the agent is closed the cluster provider should be cleaned up 415 c.SetClusterProvider(nil) 416 417 if agent == nil { 418 return 419 } 420 421 var cancelList []func() 422 423 agent.mu.Lock() 424 for _, cancelFuncs := range agent.driverCancelFuncs { 425 cancelList = append(cancelList, cancelFuncs...) 426 } 427 428 // Add also the cancel functions for the network db 429 cancelList = append(cancelList, agent.coreCancelFuncs...) 430 agent.mu.Unlock() 431 432 for _, cancel := range cancelList { 433 cancel() 434 } 435 436 agent.networkDB.Close() 437 } 438 439 // Task has the backend container details 440 type Task struct { 441 Name string 442 EndpointID string 443 EndpointIP string 444 Info map[string]string 445 } 446 447 // ServiceInfo has service specific details along with the list of backend tasks 448 type ServiceInfo struct { 449 VIP string 450 LocalLBIndex int 451 Tasks []Task 452 Ports []string 453 } 454 455 type epRecord struct { 456 ep EndpointRecord 457 info map[string]string 458 lbIndex int 459 } 460 461 // Services returns a map of services keyed by the service name with the details 462 // of all the tasks that belong to the service. Applicable only in swarm mode. 463 func (n *Network) Services() map[string]ServiceInfo { 464 agent, ok := n.clusterAgent() 465 if !ok { 466 return nil 467 } 468 nwID := n.ID() 469 d, err := n.driver(true) 470 if err != nil { 471 log.G(context.TODO()).Errorf("Could not resolve driver for network %s/%s while fetching services: %v", n.networkType, nwID, err) 472 return nil 473 } 474 475 // Walk through libnetworkEPTable and fetch the driver agnostic endpoint info 476 eps := make(map[string]epRecord) 477 c := n.getController() 478 for eid, value := range agent.networkDB.GetTableByNetwork(libnetworkEPTable, nwID) { 479 var epRec EndpointRecord 480 if err := proto.Unmarshal(value.Value, &epRec); err != nil { 481 log.G(context.TODO()).Errorf("Unmarshal of libnetworkEPTable failed for endpoint %s in network %s, %v", eid, nwID, err) 482 continue 483 } 484 eps[eid] = epRecord{ 485 ep: epRec, 486 lbIndex: c.getLBIndex(epRec.ServiceID, nwID, epRec.IngressPorts), 487 } 488 } 489 490 // Walk through the driver's tables, have the driver decode the entries 491 // and return the tuple {ep ID, value}. value is a string that coveys 492 // relevant info about the endpoint. 493 for _, table := range n.driverTables { 494 if table.objType != driverapi.EndpointObject { 495 continue 496 } 497 for key, value := range agent.networkDB.GetTableByNetwork(table.name, nwID) { 498 epID, info := d.DecodeTableEntry(table.name, key, value.Value) 499 if ep, ok := eps[epID]; !ok { 500 log.G(context.TODO()).Errorf("Inconsistent driver and libnetwork state for endpoint %s", epID) 501 } else { 502 ep.info = info 503 eps[epID] = ep 504 } 505 } 506 } 507 508 // group the endpoints into a map keyed by the service name 509 sinfo := make(map[string]ServiceInfo) 510 for ep, epr := range eps { 511 s, ok := sinfo[epr.ep.ServiceName] 512 if !ok { 513 s = ServiceInfo{ 514 VIP: epr.ep.VirtualIP, 515 LocalLBIndex: epr.lbIndex, 516 } 517 } 518 if s.Ports == nil { 519 ports := make([]string, 0, len(epr.ep.IngressPorts)) 520 for _, port := range epr.ep.IngressPorts { 521 ports = append(ports, fmt.Sprintf("Target: %d, Publish: %d", port.TargetPort, port.PublishedPort)) 522 } 523 s.Ports = ports 524 } 525 s.Tasks = append(s.Tasks, Task{ 526 Name: epr.ep.Name, 527 EndpointID: ep, 528 EndpointIP: epr.ep.EndpointIP, 529 Info: epr.info, 530 }) 531 sinfo[epr.ep.ServiceName] = s 532 } 533 return sinfo 534 } 535 536 // clusterAgent returns the cluster agent if the network is a swarm-scoped, 537 // multi-host network. 538 func (n *Network) clusterAgent() (agent *nwAgent, ok bool) { 539 if n.scope != scope.Swarm || !n.driverIsMultihost() { 540 return nil, false 541 } 542 a := n.getController().getAgent() 543 return a, a != nil 544 } 545 546 func (n *Network) joinCluster() error { 547 agent, ok := n.clusterAgent() 548 if !ok { 549 return nil 550 } 551 return agent.networkDB.JoinNetwork(n.ID()) 552 } 553 554 func (n *Network) leaveCluster() error { 555 agent, ok := n.clusterAgent() 556 if !ok { 557 return nil 558 } 559 return agent.networkDB.LeaveNetwork(n.ID()) 560 } 561 562 func (ep *Endpoint) addDriverInfoToCluster() error { 563 if ep.joinInfo == nil || len(ep.joinInfo.driverTableEntries) == 0 { 564 return nil 565 } 566 n := ep.getNetwork() 567 agent, ok := n.clusterAgent() 568 if !ok { 569 return nil 570 } 571 572 nwID := n.ID() 573 for _, te := range ep.joinInfo.driverTableEntries { 574 if err := agent.networkDB.CreateEntry(te.tableName, nwID, te.key, te.value); err != nil { 575 return err 576 } 577 } 578 return nil 579 } 580 581 func (ep *Endpoint) deleteDriverInfoFromCluster() error { 582 if ep.joinInfo == nil || len(ep.joinInfo.driverTableEntries) == 0 { 583 return nil 584 } 585 n := ep.getNetwork() 586 agent, ok := n.clusterAgent() 587 if !ok { 588 return nil 589 } 590 591 nwID := n.ID() 592 for _, te := range ep.joinInfo.driverTableEntries { 593 if err := agent.networkDB.DeleteEntry(te.tableName, nwID, te.key); err != nil { 594 return err 595 } 596 } 597 return nil 598 } 599 600 func (ep *Endpoint) addServiceInfoToCluster(sb *Sandbox) error { 601 if len(ep.dnsNames) == 0 || ep.Iface() == nil || ep.Iface().Address() == nil { 602 return nil 603 } 604 605 n := ep.getNetwork() 606 agent, ok := n.clusterAgent() 607 if !ok { 608 return nil 609 } 610 611 sb.service.Lock() 612 defer sb.service.Unlock() 613 log.G(context.TODO()).Debugf("addServiceInfoToCluster START for %s %s", ep.svcName, ep.ID()) 614 615 // Check that the endpoint is still present on the sandbox before adding it to the service discovery. 616 // This is to handle a race between the EnableService and the sbLeave 617 // It is possible that the EnableService starts, fetches the list of the endpoints and 618 // by the time the addServiceInfoToCluster is called the endpoint got removed from the sandbox 619 // The risk is that the deleteServiceInfoToCluster happens before the addServiceInfoToCluster. 620 // This check under the Service lock of the sandbox ensure the correct behavior. 621 // If the addServiceInfoToCluster arrives first may find or not the endpoint and will proceed or exit 622 // but in any case the deleteServiceInfoToCluster will follow doing the cleanup if needed. 623 // In case the deleteServiceInfoToCluster arrives first, this one is happening after the endpoint is 624 // removed from the list, in this situation the delete will bail out not finding any data to cleanup 625 // and the add will bail out not finding the endpoint on the sandbox. 626 if err := sb.GetEndpoint(ep.ID()); err == nil { 627 log.G(context.TODO()).Warnf("addServiceInfoToCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID()) 628 return nil 629 } 630 631 dnsNames := ep.getDNSNames() 632 primaryDNSName, dnsAliases := dnsNames[0], dnsNames[1:] 633 634 var ingressPorts []*PortConfig 635 if ep.svcID != "" { 636 // This is a task part of a service 637 // Gossip ingress ports only in ingress network. 638 if n.ingress { 639 ingressPorts = ep.ingressPorts 640 } 641 if err := n.getController().addServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), primaryDNSName, ep.virtualIP, ingressPorts, ep.svcAliases, dnsAliases, ep.Iface().Address().IP, "addServiceInfoToCluster"); err != nil { 642 return err 643 } 644 } else { 645 // This is a container simply attached to an attachable network 646 if err := n.getController().addContainerNameResolution(n.ID(), ep.ID(), primaryDNSName, dnsAliases, ep.Iface().Address().IP, "addServiceInfoToCluster"); err != nil { 647 return err 648 } 649 } 650 651 buf, err := proto.Marshal(&EndpointRecord{ 652 Name: primaryDNSName, 653 ServiceName: ep.svcName, 654 ServiceID: ep.svcID, 655 VirtualIP: ep.virtualIP.String(), 656 IngressPorts: ingressPorts, 657 Aliases: ep.svcAliases, 658 TaskAliases: dnsAliases, 659 EndpointIP: ep.Iface().Address().IP.String(), 660 ServiceDisabled: false, 661 }) 662 if err != nil { 663 return err 664 } 665 666 if err := agent.networkDB.CreateEntry(libnetworkEPTable, n.ID(), ep.ID(), buf); err != nil { 667 log.G(context.TODO()).Warnf("addServiceInfoToCluster NetworkDB CreateEntry failed for %s %s err:%s", ep.id, n.id, err) 668 return err 669 } 670 671 log.G(context.TODO()).Debugf("addServiceInfoToCluster END for %s %s", ep.svcName, ep.ID()) 672 673 return nil 674 } 675 676 func (ep *Endpoint) deleteServiceInfoFromCluster(sb *Sandbox, fullRemove bool, method string) error { 677 if len(ep.dnsNames) == 0 { 678 return nil 679 } 680 681 n := ep.getNetwork() 682 agent, ok := n.clusterAgent() 683 if !ok { 684 return nil 685 } 686 687 sb.service.Lock() 688 defer sb.service.Unlock() 689 log.G(context.TODO()).Debugf("deleteServiceInfoFromCluster from %s START for %s %s", method, ep.svcName, ep.ID()) 690 691 // Avoid a race w/ with a container that aborts preemptively. This would 692 // get caught in disableServceInNetworkDB, but we check here to make the 693 // nature of the condition more clear. 694 // See comment in addServiceInfoToCluster() 695 if err := sb.GetEndpoint(ep.ID()); err == nil { 696 log.G(context.TODO()).Warnf("deleteServiceInfoFromCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID()) 697 return nil 698 } 699 700 dnsNames := ep.getDNSNames() 701 primaryDNSName, dnsAliases := dnsNames[0], dnsNames[1:] 702 703 // First update the networkDB then locally 704 if fullRemove { 705 if err := agent.networkDB.DeleteEntry(libnetworkEPTable, n.ID(), ep.ID()); err != nil { 706 log.G(context.TODO()).Warnf("deleteServiceInfoFromCluster NetworkDB DeleteEntry failed for %s %s err:%s", ep.id, n.id, err) 707 } 708 } else { 709 disableServiceInNetworkDB(agent, n, ep) 710 } 711 712 if ep.Iface() != nil && ep.Iface().Address() != nil { 713 if ep.svcID != "" { 714 // This is a task part of a service 715 var ingressPorts []*PortConfig 716 if n.ingress { 717 ingressPorts = ep.ingressPorts 718 } 719 if err := n.getController().rmServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), primaryDNSName, ep.virtualIP, ingressPorts, ep.svcAliases, dnsAliases, ep.Iface().Address().IP, "deleteServiceInfoFromCluster", true, fullRemove); err != nil { 720 return err 721 } 722 } else { 723 // This is a container simply attached to an attachable network 724 if err := n.getController().delContainerNameResolution(n.ID(), ep.ID(), primaryDNSName, dnsAliases, ep.Iface().Address().IP, "deleteServiceInfoFromCluster"); err != nil { 725 return err 726 } 727 } 728 } 729 730 log.G(context.TODO()).Debugf("deleteServiceInfoFromCluster from %s END for %s %s", method, ep.svcName, ep.ID()) 731 732 return nil 733 } 734 735 func disableServiceInNetworkDB(a *nwAgent, n *Network, ep *Endpoint) { 736 var epRec EndpointRecord 737 738 log.G(context.TODO()).Debugf("disableServiceInNetworkDB for %s %s", ep.svcName, ep.ID()) 739 740 // Update existing record to indicate that the service is disabled 741 inBuf, err := a.networkDB.GetEntry(libnetworkEPTable, n.ID(), ep.ID()) 742 if err != nil { 743 log.G(context.TODO()).Warnf("disableServiceInNetworkDB GetEntry failed for %s %s err:%s", ep.id, n.id, err) 744 return 745 } 746 // Should never fail 747 if err := proto.Unmarshal(inBuf, &epRec); err != nil { 748 log.G(context.TODO()).Errorf("disableServiceInNetworkDB unmarshal failed for %s %s err:%s", ep.id, n.id, err) 749 return 750 } 751 epRec.ServiceDisabled = true 752 // Should never fail 753 outBuf, err := proto.Marshal(&epRec) 754 if err != nil { 755 log.G(context.TODO()).Errorf("disableServiceInNetworkDB marshalling failed for %s %s err:%s", ep.id, n.id, err) 756 return 757 } 758 // Send update to the whole cluster 759 if err := a.networkDB.UpdateEntry(libnetworkEPTable, n.ID(), ep.ID(), outBuf); err != nil { 760 log.G(context.TODO()).Warnf("disableServiceInNetworkDB UpdateEntry failed for %s %s err:%s", ep.id, n.id, err) 761 } 762 } 763 764 func (n *Network) addDriverWatches() { 765 if len(n.driverTables) == 0 { 766 return 767 } 768 agent, ok := n.clusterAgent() 769 if !ok { 770 return 771 } 772 773 c := n.getController() 774 for _, table := range n.driverTables { 775 ch, cancel := agent.networkDB.Watch(table.name, n.ID()) 776 agent.mu.Lock() 777 agent.driverCancelFuncs[n.ID()] = append(agent.driverCancelFuncs[n.ID()], cancel) 778 agent.mu.Unlock() 779 go c.handleTableEvents(ch, n.handleDriverTableEvent) 780 d, err := n.driver(false) 781 if err != nil { 782 log.G(context.TODO()).Errorf("Could not resolve driver %s while walking driver tabl: %v", n.networkType, err) 783 return 784 } 785 786 err = agent.networkDB.WalkTable(table.name, func(nid, key string, value []byte, deleted bool) bool { 787 // skip the entries that are mark for deletion, this is safe because this function is 788 // called at initialization time so there is no state to delete 789 if nid == n.ID() && !deleted { 790 d.EventNotify(driverapi.Create, nid, table.name, key, value) 791 } 792 return false 793 }) 794 if err != nil { 795 log.G(context.TODO()).WithError(err).Warn("Error while walking networkdb") 796 } 797 } 798 } 799 800 func (n *Network) cancelDriverWatches() { 801 agent, ok := n.clusterAgent() 802 if !ok { 803 return 804 } 805 806 agent.mu.Lock() 807 cancelFuncs := agent.driverCancelFuncs[n.ID()] 808 delete(agent.driverCancelFuncs, n.ID()) 809 agent.mu.Unlock() 810 811 for _, cancel := range cancelFuncs { 812 cancel() 813 } 814 } 815 816 func (c *Controller) handleTableEvents(ch *events.Channel, fn func(events.Event)) { 817 for { 818 select { 819 case ev := <-ch.C: 820 fn(ev) 821 case <-ch.Done(): 822 return 823 } 824 } 825 } 826 827 func (n *Network) handleDriverTableEvent(ev events.Event) { 828 d, err := n.driver(false) 829 if err != nil { 830 log.G(context.TODO()).Errorf("Could not resolve driver %s while handling driver table event: %v", n.networkType, err) 831 return 832 } 833 834 var ( 835 etype driverapi.EventType 836 tname string 837 key string 838 value []byte 839 ) 840 841 switch event := ev.(type) { 842 case networkdb.CreateEvent: 843 tname = event.Table 844 key = event.Key 845 value = event.Value 846 etype = driverapi.Create 847 case networkdb.DeleteEvent: 848 tname = event.Table 849 key = event.Key 850 value = event.Value 851 etype = driverapi.Delete 852 case networkdb.UpdateEvent: 853 tname = event.Table 854 key = event.Key 855 value = event.Value 856 etype = driverapi.Delete 857 } 858 859 d.EventNotify(etype, n.ID(), tname, key, value) 860 } 861 862 func (c *Controller) handleNodeTableEvent(ev events.Event) { 863 var ( 864 value []byte 865 isAdd bool 866 nodeAddr networkdb.NodeAddr 867 ) 868 switch event := ev.(type) { 869 case networkdb.CreateEvent: 870 value = event.Value 871 isAdd = true 872 case networkdb.DeleteEvent: 873 value = event.Value 874 case networkdb.UpdateEvent: 875 log.G(context.TODO()).Errorf("Unexpected update node table event = %#v", event) 876 } 877 878 err := json.Unmarshal(value, &nodeAddr) 879 if err != nil { 880 log.G(context.TODO()).Errorf("Error unmarshalling node table event %v", err) 881 return 882 } 883 c.processNodeDiscovery([]net.IP{nodeAddr.Addr}, isAdd) 884 } 885 886 func (c *Controller) handleEpTableEvent(ev events.Event) { 887 var ( 888 nid string 889 eid string 890 value []byte 891 epRec EndpointRecord 892 ) 893 894 switch event := ev.(type) { 895 case networkdb.CreateEvent: 896 nid = event.NetworkID 897 eid = event.Key 898 value = event.Value 899 case networkdb.DeleteEvent: 900 nid = event.NetworkID 901 eid = event.Key 902 value = event.Value 903 case networkdb.UpdateEvent: 904 nid = event.NetworkID 905 eid = event.Key 906 value = event.Value 907 default: 908 log.G(context.TODO()).Errorf("Unexpected update service table event = %#v", event) 909 return 910 } 911 912 err := proto.Unmarshal(value, &epRec) 913 if err != nil { 914 log.G(context.TODO()).Errorf("Failed to unmarshal service table value: %v", err) 915 return 916 } 917 918 containerName := epRec.Name 919 svcName := epRec.ServiceName 920 svcID := epRec.ServiceID 921 vip := net.ParseIP(epRec.VirtualIP) 922 ip := net.ParseIP(epRec.EndpointIP) 923 ingressPorts := epRec.IngressPorts 924 serviceAliases := epRec.Aliases 925 taskAliases := epRec.TaskAliases 926 927 if containerName == "" || ip == nil { 928 log.G(context.TODO()).Errorf("Invalid endpoint name/ip received while handling service table event %s", value) 929 return 930 } 931 932 switch ev.(type) { 933 case networkdb.CreateEvent: 934 log.G(context.TODO()).Debugf("handleEpTableEvent ADD %s R:%v", eid, epRec) 935 if svcID != "" { 936 // This is a remote task part of a service 937 if err := c.addServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent"); err != nil { 938 log.G(context.TODO()).Errorf("failed adding service binding for %s epRec:%v err:%v", eid, epRec, err) 939 return 940 } 941 } else { 942 // This is a remote container simply attached to an attachable network 943 if err := c.addContainerNameResolution(nid, eid, containerName, taskAliases, ip, "handleEpTableEvent"); err != nil { 944 log.G(context.TODO()).Errorf("failed adding container name resolution for %s epRec:%v err:%v", eid, epRec, err) 945 } 946 } 947 948 case networkdb.DeleteEvent: 949 log.G(context.TODO()).Debugf("handleEpTableEvent DEL %s R:%v", eid, epRec) 950 if svcID != "" { 951 // This is a remote task part of a service 952 if err := c.rmServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent", true, true); err != nil { 953 log.G(context.TODO()).Errorf("failed removing service binding for %s epRec:%v err:%v", eid, epRec, err) 954 return 955 } 956 } else { 957 // This is a remote container simply attached to an attachable network 958 if err := c.delContainerNameResolution(nid, eid, containerName, taskAliases, ip, "handleEpTableEvent"); err != nil { 959 log.G(context.TODO()).Errorf("failed removing container name resolution for %s epRec:%v err:%v", eid, epRec, err) 960 } 961 } 962 case networkdb.UpdateEvent: 963 log.G(context.TODO()).Debugf("handleEpTableEvent UPD %s R:%v", eid, epRec) 964 // We currently should only get these to inform us that an endpoint 965 // is disabled. Report if otherwise. 966 if svcID == "" || !epRec.ServiceDisabled { 967 log.G(context.TODO()).Errorf("Unexpected update table event for %s epRec:%v", eid, epRec) 968 return 969 } 970 // This is a remote task that is part of a service that is now disabled 971 if err := c.rmServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent", true, false); err != nil { 972 log.G(context.TODO()).Errorf("failed disabling service binding for %s epRec:%v err:%v", eid, epRec, err) 973 return 974 } 975 } 976 }