github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/libnetwork/drivers/overlay/ov_network.go (about) 1 package overlay 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io/ioutil" 7 "net" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "runtime" 12 "strconv" 13 "strings" 14 "sync" 15 16 "github.com/docker/docker/pkg/reexec" 17 "github.com/docker/libnetwork/datastore" 18 "github.com/docker/libnetwork/driverapi" 19 "github.com/docker/libnetwork/netlabel" 20 "github.com/docker/libnetwork/netutils" 21 "github.com/docker/libnetwork/ns" 22 "github.com/docker/libnetwork/osl" 23 "github.com/docker/libnetwork/resolvconf" 24 "github.com/docker/libnetwork/types" 25 "github.com/hashicorp/go-multierror" 26 "github.com/sirupsen/logrus" 27 "github.com/vishvananda/netlink" 28 "github.com/vishvananda/netlink/nl" 29 "github.com/vishvananda/netns" 30 "golang.org/x/sys/unix" 31 ) 32 33 var ( 34 hostMode bool 35 networkOnce sync.Once 36 networkMu sync.Mutex 37 vniTbl = make(map[uint32]string) 38 ) 39 40 type networkTable map[string]*network 41 42 type subnet struct { 43 sboxInit bool 44 vxlanName string 45 brName string 46 vni uint32 47 initErr error 48 subnetIP *net.IPNet 49 gwIP *net.IPNet 50 } 51 52 type subnetJSON struct { 53 SubnetIP string 54 GwIP string 55 Vni uint32 56 } 57 58 type network struct { 59 id string 60 dbIndex uint64 61 dbExists bool 62 sbox osl.Sandbox 63 nlSocket *nl.NetlinkSocket 64 endpoints endpointTable 65 driver *driver 66 joinCnt int 67 sboxInit bool 68 initEpoch int 69 initErr error 70 subnets []*subnet 71 secure bool 72 mtu int 73 sync.Mutex 74 } 75 76 func init() { 77 reexec.Register("set-default-vlan", setDefaultVlan) 78 } 79 80 func setDefaultVlan() { 81 if len(os.Args) < 3 { 82 logrus.Error("insufficient number of arguments") 83 os.Exit(1) 84 } 85 86 runtime.LockOSThread() 87 defer runtime.UnlockOSThread() 88 89 nsPath := os.Args[1] 90 ns, err := netns.GetFromPath(nsPath) 91 if err != nil { 92 logrus.Errorf("overlay namespace get failed, %v", err) 93 os.Exit(1) 94 } 95 if err = netns.Set(ns); err != nil { 96 logrus.Errorf("setting into overlay namespace failed, %v", err) 97 os.Exit(1) 98 } 99 100 // make sure the sysfs mount doesn't propagate back 101 if err = unix.Unshare(unix.CLONE_NEWNS); err != nil { 102 logrus.Errorf("unshare failed, %v", err) 103 os.Exit(1) 104 } 105 106 flag := unix.MS_PRIVATE | unix.MS_REC 107 if err = unix.Mount("", "/", "", uintptr(flag), ""); err != nil { 108 logrus.Errorf("root mount failed, %v", err) 109 os.Exit(1) 110 } 111 112 if err = unix.Mount("sysfs", "/sys", "sysfs", 0, ""); err != nil { 113 logrus.Errorf("mounting sysfs failed, %v", err) 114 os.Exit(1) 115 } 116 117 brName := os.Args[2] 118 path := filepath.Join("/sys/class/net", brName, "bridge/default_pvid") 119 data := []byte{'0', '\n'} 120 121 if err = ioutil.WriteFile(path, data, 0644); err != nil { 122 logrus.Errorf("enabling default vlan on bridge %s failed %v", brName, err) 123 os.Exit(1) 124 } 125 os.Exit(0) 126 } 127 128 func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) { 129 return nil, types.NotImplementedErrorf("not implemented") 130 } 131 132 func (d *driver) NetworkFree(id string) error { 133 return types.NotImplementedErrorf("not implemented") 134 } 135 136 func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error { 137 if id == "" { 138 return fmt.Errorf("invalid network id") 139 } 140 if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" { 141 return types.BadRequestErrorf("ipv4 pool is empty") 142 } 143 144 // Since we perform lazy configuration make sure we try 145 // configuring the driver when we enter CreateNetwork 146 if err := d.configure(); err != nil { 147 return err 148 } 149 150 n := &network{ 151 id: id, 152 driver: d, 153 endpoints: endpointTable{}, 154 subnets: []*subnet{}, 155 } 156 157 vnis := make([]uint32, 0, len(ipV4Data)) 158 if gval, ok := option[netlabel.GenericData]; ok { 159 optMap := gval.(map[string]string) 160 if val, ok := optMap[netlabel.OverlayVxlanIDList]; ok { 161 logrus.Debugf("overlay: Received vxlan IDs: %s", val) 162 vniStrings := strings.Split(val, ",") 163 for _, vniStr := range vniStrings { 164 vni, err := strconv.Atoi(vniStr) 165 if err != nil { 166 return fmt.Errorf("invalid vxlan id value %q passed", vniStr) 167 } 168 169 vnis = append(vnis, uint32(vni)) 170 } 171 } 172 if _, ok := optMap[secureOption]; ok { 173 n.secure = true 174 } 175 if val, ok := optMap[netlabel.DriverMTU]; ok { 176 var err error 177 if n.mtu, err = strconv.Atoi(val); err != nil { 178 return fmt.Errorf("failed to parse %v: %v", val, err) 179 } 180 if n.mtu < 0 { 181 return fmt.Errorf("invalid MTU value: %v", n.mtu) 182 } 183 } 184 } 185 186 // If we are getting vnis from libnetwork, either we get for 187 // all subnets or none. 188 if len(vnis) != 0 && len(vnis) < len(ipV4Data) { 189 return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis)) 190 } 191 192 for i, ipd := range ipV4Data { 193 s := &subnet{ 194 subnetIP: ipd.Pool, 195 gwIP: ipd.Gateway, 196 } 197 198 if len(vnis) != 0 { 199 s.vni = vnis[i] 200 } 201 202 n.subnets = append(n.subnets, s) 203 } 204 205 d.Lock() 206 defer d.Unlock() 207 if d.networks[n.id] != nil { 208 return fmt.Errorf("attempt to create overlay network %v that already exists", n.id) 209 } 210 211 if err := n.writeToStore(); err != nil { 212 return fmt.Errorf("failed to update data store for network %v: %v", n.id, err) 213 } 214 215 // Make sure no rule is on the way from any stale secure network 216 if !n.secure { 217 for _, vni := range vnis { 218 programMangle(vni, false) 219 programInput(vni, false) 220 } 221 } 222 223 if nInfo != nil { 224 if err := nInfo.TableEventRegister(ovPeerTable, driverapi.EndpointObject); err != nil { 225 // XXX Undo writeToStore? No method to so. Why? 226 return err 227 } 228 } 229 230 d.networks[id] = n 231 232 return nil 233 } 234 235 func (d *driver) DeleteNetwork(nid string) error { 236 if nid == "" { 237 return fmt.Errorf("invalid network id") 238 } 239 240 // Make sure driver resources are initialized before proceeding 241 if err := d.configure(); err != nil { 242 return err 243 } 244 245 d.Lock() 246 // Only perform a peer flush operation (if required) AFTER unlocking 247 // the driver lock to avoid deadlocking w/ the peerDB. 248 var doPeerFlush bool 249 defer func() { 250 d.Unlock() 251 if doPeerFlush { 252 d.peerFlush(nid) 253 } 254 }() 255 256 // This is similar to d.network(), but we need to keep holding the lock 257 // until we are done removing this network. 258 n, ok := d.networks[nid] 259 if !ok { 260 n = d.restoreNetworkFromStore(nid) 261 } 262 if n == nil { 263 return fmt.Errorf("could not find network with id %s", nid) 264 } 265 266 for _, ep := range n.endpoints { 267 if ep.ifName != "" { 268 if link, err := ns.NlHandle().LinkByName(ep.ifName); err == nil { 269 if err := ns.NlHandle().LinkDel(link); err != nil { 270 logrus.WithError(err).Warnf("Failed to delete interface (%s)'s link on endpoint (%s) delete", ep.ifName, ep.id) 271 } 272 } 273 } 274 275 if err := d.deleteEndpointFromStore(ep); err != nil { 276 logrus.Warnf("Failed to delete overlay endpoint %.7s from local store: %v", ep.id, err) 277 } 278 } 279 280 doPeerFlush = true 281 delete(d.networks, nid) 282 283 vnis, err := n.releaseVxlanID() 284 if err != nil { 285 return err 286 } 287 288 if n.secure { 289 for _, vni := range vnis { 290 programMangle(vni, false) 291 programInput(vni, false) 292 } 293 } 294 295 return nil 296 } 297 298 func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error { 299 return nil 300 } 301 302 func (d *driver) RevokeExternalConnectivity(nid, eid string) error { 303 return nil 304 } 305 306 func (n *network) joinSandbox(s *subnet, restore bool, incJoinCount bool) error { 307 // If there is a race between two go routines here only one will win 308 // the other will wait. 309 networkOnce.Do(networkOnceInit) 310 311 n.Lock() 312 // If non-restore initialization occurred and was successful then 313 // tell the peerDB to initialize the sandbox with all the peers 314 // previously received from networkdb. But only do this after 315 // unlocking the network. Otherwise we could deadlock with 316 // on the peerDB channel while peerDB is waiting for the network lock. 317 var doInitPeerDB bool 318 defer func() { 319 n.Unlock() 320 if doInitPeerDB { 321 n.driver.initSandboxPeerDB(n.id) 322 } 323 }() 324 325 if !n.sboxInit { 326 n.initErr = n.initSandbox(restore) 327 doInitPeerDB = n.initErr == nil && !restore 328 // If there was an error, we cannot recover it 329 n.sboxInit = true 330 } 331 332 if n.initErr != nil { 333 return fmt.Errorf("network sandbox join failed: %v", n.initErr) 334 } 335 336 subnetErr := s.initErr 337 if !s.sboxInit { 338 subnetErr = n.initSubnetSandbox(s, restore) 339 // We can recover from these errors, but not on restore 340 if restore || subnetErr == nil { 341 s.initErr = subnetErr 342 s.sboxInit = true 343 } 344 } 345 if subnetErr != nil { 346 return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), subnetErr) 347 } 348 349 if incJoinCount { 350 n.joinCnt++ 351 } 352 353 return nil 354 } 355 356 func (n *network) leaveSandbox() { 357 n.Lock() 358 defer n.Unlock() 359 n.joinCnt-- 360 if n.joinCnt != 0 { 361 return 362 } 363 364 n.destroySandbox() 365 366 n.sboxInit = false 367 n.initErr = nil 368 for _, s := range n.subnets { 369 s.sboxInit = false 370 s.initErr = nil 371 } 372 } 373 374 // to be called while holding network lock 375 func (n *network) destroySandbox() { 376 if n.sbox != nil { 377 for _, iface := range n.sbox.Info().Interfaces() { 378 if err := iface.Remove(); err != nil { 379 logrus.Debugf("Remove interface %s failed: %v", iface.SrcName(), err) 380 } 381 } 382 383 for _, s := range n.subnets { 384 if hostMode { 385 if err := removeFilters(n.id[:12], s.brName); err != nil { 386 logrus.Warnf("Could not remove overlay filters: %v", err) 387 } 388 } 389 390 if s.vxlanName != "" { 391 err := deleteInterface(s.vxlanName) 392 if err != nil { 393 logrus.Warnf("could not cleanup sandbox properly: %v", err) 394 } 395 } 396 } 397 398 if hostMode { 399 if err := removeNetworkChain(n.id[:12]); err != nil { 400 logrus.Warnf("could not remove network chain: %v", err) 401 } 402 } 403 404 // Close the netlink socket, this will also release the watchMiss goroutine that is using it 405 if n.nlSocket != nil { 406 n.nlSocket.Close() 407 n.nlSocket = nil 408 } 409 410 n.sbox.Destroy() 411 n.sbox = nil 412 } 413 } 414 415 func populateVNITbl() { 416 filepath.Walk(filepath.Dir(osl.GenerateKey("walk")), 417 func(path string, info os.FileInfo, err error) error { 418 _, fname := filepath.Split(path) 419 420 if len(strings.Split(fname, "-")) <= 1 { 421 return nil 422 } 423 424 ns, err := netns.GetFromPath(path) 425 if err != nil { 426 logrus.Errorf("Could not open namespace path %s during vni population: %v", path, err) 427 return nil 428 } 429 defer ns.Close() 430 431 nlh, err := netlink.NewHandleAt(ns, unix.NETLINK_ROUTE) 432 if err != nil { 433 logrus.Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err) 434 return nil 435 } 436 defer nlh.Delete() 437 438 err = nlh.SetSocketTimeout(soTimeout) 439 if err != nil { 440 logrus.Warnf("Failed to set the timeout on the netlink handle sockets for vni table population: %v", err) 441 } 442 443 links, err := nlh.LinkList() 444 if err != nil { 445 logrus.Errorf("Failed to list interfaces during vni population for ns %s: %v", path, err) 446 return nil 447 } 448 449 for _, l := range links { 450 if l.Type() == "vxlan" { 451 vniTbl[uint32(l.(*netlink.Vxlan).VxlanId)] = path 452 } 453 } 454 455 return nil 456 }) 457 } 458 459 func networkOnceInit() { 460 populateVNITbl() 461 462 if os.Getenv("_OVERLAY_HOST_MODE") != "" { 463 hostMode = true 464 return 465 } 466 467 err := createVxlan("testvxlan", 1, 0) 468 if err != nil { 469 logrus.Errorf("Failed to create testvxlan interface: %v", err) 470 return 471 } 472 473 defer deleteInterface("testvxlan") 474 475 path := "/proc/self/ns/net" 476 hNs, err := netns.GetFromPath(path) 477 if err != nil { 478 logrus.Errorf("Failed to get network namespace from path %s while setting host mode: %v", path, err) 479 return 480 } 481 defer hNs.Close() 482 483 nlh := ns.NlHandle() 484 485 iface, err := nlh.LinkByName("testvxlan") 486 if err != nil { 487 logrus.Errorf("Failed to get link testvxlan while setting host mode: %v", err) 488 return 489 } 490 491 // If we are not able to move the vxlan interface to a namespace 492 // then fallback to host mode 493 if err := nlh.LinkSetNsFd(iface, int(hNs)); err != nil { 494 hostMode = true 495 } 496 } 497 498 func (n *network) generateVxlanName(s *subnet) string { 499 id := n.id 500 if len(n.id) > 5 { 501 id = n.id[:5] 502 } 503 504 return fmt.Sprintf("vx-%06x-%v", s.vni, id) 505 } 506 507 func (n *network) generateBridgeName(s *subnet) string { 508 id := n.id 509 if len(n.id) > 5 { 510 id = n.id[:5] 511 } 512 513 return n.getBridgeNamePrefix(s) + "-" + id 514 } 515 516 func (n *network) getBridgeNamePrefix(s *subnet) string { 517 return fmt.Sprintf("ov-%06x", s.vni) 518 } 519 520 func checkOverlap(nw *net.IPNet) error { 521 var nameservers []string 522 523 if rc, err := resolvconf.Get(); err == nil { 524 nameservers = resolvconf.GetNameserversAsCIDR(rc.Content) 525 } 526 527 if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil { 528 return fmt.Errorf("overlay subnet %s failed check with nameserver: %v: %v", nw.String(), nameservers, err) 529 } 530 531 if err := netutils.CheckRouteOverlaps(nw); err != nil { 532 return fmt.Errorf("overlay subnet %s failed check with host route table: %v", nw.String(), err) 533 } 534 535 return nil 536 } 537 538 func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) error { 539 sbox := n.sbox 540 541 // restore overlay osl sandbox 542 Ifaces := make(map[string][]osl.IfaceOption) 543 brIfaceOption := make([]osl.IfaceOption, 2) 544 brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Address(s.gwIP)) 545 brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Bridge(true)) 546 Ifaces[brName+"+br"] = brIfaceOption 547 548 err := sbox.Restore(Ifaces, nil, nil, nil) 549 if err != nil { 550 return err 551 } 552 553 Ifaces = make(map[string][]osl.IfaceOption) 554 vxlanIfaceOption := make([]osl.IfaceOption, 1) 555 vxlanIfaceOption = append(vxlanIfaceOption, sbox.InterfaceOptions().Master(brName)) 556 Ifaces[vxlanName+"+vxlan"] = vxlanIfaceOption 557 return sbox.Restore(Ifaces, nil, nil, nil) 558 } 559 560 func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error { 561 562 if hostMode { 563 // Try to delete stale bridge interface if it exists 564 if err := deleteInterface(brName); err != nil { 565 deleteInterfaceBySubnet(n.getBridgeNamePrefix(s), s) 566 } 567 // Try to delete the vxlan interface by vni if already present 568 deleteVxlanByVNI("", s.vni) 569 570 if err := checkOverlap(s.subnetIP); err != nil { 571 return err 572 } 573 } 574 575 if !hostMode { 576 // Try to find this subnet's vni is being used in some 577 // other namespace by looking at vniTbl that we just 578 // populated in the once init. If a hit is found then 579 // it must a stale namespace from previous 580 // life. Destroy it completely and reclaim resourced. 581 networkMu.Lock() 582 path, ok := vniTbl[s.vni] 583 networkMu.Unlock() 584 585 if ok { 586 deleteVxlanByVNI(path, s.vni) 587 if err := unix.Unmount(path, unix.MNT_FORCE); err != nil { 588 logrus.Errorf("unmount of %s failed: %v", path, err) 589 } 590 os.Remove(path) 591 592 networkMu.Lock() 593 delete(vniTbl, s.vni) 594 networkMu.Unlock() 595 } 596 } 597 598 // create a bridge and vxlan device for this subnet and move it to the sandbox 599 sbox := n.sbox 600 601 if err := sbox.AddInterface(brName, "br", 602 sbox.InterfaceOptions().Address(s.gwIP), 603 sbox.InterfaceOptions().Bridge(true)); err != nil { 604 return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err) 605 } 606 607 err := createVxlan(vxlanName, s.vni, n.maxMTU()) 608 if err != nil { 609 return err 610 } 611 612 if err := sbox.AddInterface(vxlanName, "vxlan", 613 sbox.InterfaceOptions().Master(brName)); err != nil { 614 // If adding vxlan device to the overlay namespace fails, remove the bridge interface we 615 // already added to the namespace. This allows the caller to try the setup again. 616 for _, iface := range sbox.Info().Interfaces() { 617 if iface.SrcName() == brName { 618 if ierr := iface.Remove(); ierr != nil { 619 logrus.Errorf("removing bridge failed from ov ns %v failed, %v", n.sbox.Key(), ierr) 620 } 621 } 622 } 623 624 // Also, delete the vxlan interface. Since a global vni id is associated 625 // with the vxlan interface, an orphaned vxlan interface will result in 626 // failure of vxlan device creation if the vni is assigned to some other 627 // network. 628 if deleteErr := deleteInterface(vxlanName); deleteErr != nil { 629 logrus.Warnf("could not delete vxlan interface, %s, error %v, after config error, %v", vxlanName, deleteErr, err) 630 } 631 return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err) 632 } 633 634 if !hostMode { 635 var name string 636 for _, i := range sbox.Info().Interfaces() { 637 if i.Bridge() { 638 name = i.DstName() 639 } 640 } 641 cmd := &exec.Cmd{ 642 Path: reexec.Self(), 643 Args: []string{"set-default-vlan", sbox.Key(), name}, 644 Stdout: os.Stdout, 645 Stderr: os.Stderr, 646 } 647 if err := cmd.Run(); err != nil { 648 // not a fatal error 649 logrus.Errorf("reexec to set bridge default vlan failed %v", err) 650 } 651 } 652 653 if hostMode { 654 if err := addFilters(n.id[:12], brName); err != nil { 655 return err 656 } 657 } 658 659 return nil 660 } 661 662 // Must be called with the network lock 663 func (n *network) initSubnetSandbox(s *subnet, restore bool) error { 664 brName := n.generateBridgeName(s) 665 vxlanName := n.generateVxlanName(s) 666 667 // Program iptables rules for mandatory encryption of the secure 668 // network, or clean up leftover rules for a stale secure network which 669 // was previously assigned the same VNI. 670 if err := programMangle(s.vni, n.secure); err != nil { 671 return err 672 } 673 if err := programInput(s.vni, n.secure); err != nil { 674 if n.secure { 675 return multierror.Append(err, programMangle(s.vni, false)) 676 } 677 } 678 679 if restore { 680 if err := n.restoreSubnetSandbox(s, brName, vxlanName); err != nil { 681 return err 682 } 683 } else { 684 if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil { 685 return err 686 } 687 } 688 689 s.vxlanName = vxlanName 690 s.brName = brName 691 692 return nil 693 } 694 695 func (n *network) cleanupStaleSandboxes() { 696 filepath.Walk(filepath.Dir(osl.GenerateKey("walk")), 697 func(path string, info os.FileInfo, err error) error { 698 _, fname := filepath.Split(path) 699 700 pList := strings.Split(fname, "-") 701 if len(pList) <= 1 { 702 return nil 703 } 704 705 pattern := pList[1] 706 if strings.Contains(n.id, pattern) { 707 // Delete all vnis 708 deleteVxlanByVNI(path, 0) 709 unix.Unmount(path, unix.MNT_DETACH) 710 os.Remove(path) 711 712 // Now that we have destroyed this 713 // sandbox, remove all references to 714 // it in vniTbl so that we don't 715 // inadvertently destroy the sandbox 716 // created in this life. 717 networkMu.Lock() 718 for vni, tblPath := range vniTbl { 719 if tblPath == path { 720 delete(vniTbl, vni) 721 } 722 } 723 networkMu.Unlock() 724 } 725 726 return nil 727 }) 728 } 729 730 func (n *network) initSandbox(restore bool) error { 731 n.initEpoch++ 732 733 if !restore { 734 if hostMode { 735 if err := addNetworkChain(n.id[:12]); err != nil { 736 return err 737 } 738 } 739 740 // If there are any stale sandboxes related to this network 741 // from previous daemon life clean it up here 742 n.cleanupStaleSandboxes() 743 } 744 745 // In the restore case network sandbox already exist; but we don't know 746 // what epoch number it was created with. It has to be retrieved by 747 // searching the net namespaces. 748 var key string 749 if restore { 750 key = osl.GenerateKey("-" + n.id) 751 } else { 752 key = osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id) 753 } 754 755 sbox, err := osl.NewSandbox(key, !hostMode, restore) 756 if err != nil { 757 return fmt.Errorf("could not get network sandbox (oper %t): %v", restore, err) 758 } 759 760 // this is needed to let the peerAdd configure the sandbox 761 n.sbox = sbox 762 763 // If we are in swarm mode, we don't need anymore the watchMiss routine. 764 // This will save 1 thread and 1 netlink socket per network 765 if !n.driver.isSerfAlive() { 766 return nil 767 } 768 769 var nlSock *nl.NetlinkSocket 770 sbox.InvokeFunc(func() { 771 nlSock, err = nl.Subscribe(unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH) 772 if err != nil { 773 return 774 } 775 // set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed 776 tv := unix.NsecToTimeval(soTimeout.Nanoseconds()) 777 err = nlSock.SetReceiveTimeout(&tv) 778 }) 779 n.nlSocket = nlSock 780 781 if err == nil { 782 go n.watchMiss(nlSock, key) 783 } else { 784 logrus.Errorf("failed to subscribe to neighbor group netlink messages for overlay network %s in sbox %s: %v", 785 n.id, sbox.Key(), err) 786 } 787 788 return nil 789 } 790 791 func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) { 792 // With the new version of the netlink library the deserialize function makes 793 // requests about the interface of the netlink message. This can succeed only 794 // if this go routine is in the target namespace. For this reason following we 795 // lock the thread on that namespace 796 runtime.LockOSThread() 797 defer runtime.UnlockOSThread() 798 newNs, err := netns.GetFromPath(nsPath) 799 if err != nil { 800 logrus.WithError(err).Errorf("failed to get the namespace %s", nsPath) 801 return 802 } 803 defer newNs.Close() 804 if err = netns.Set(newNs); err != nil { 805 logrus.WithError(err).Errorf("failed to enter the namespace %s", nsPath) 806 return 807 } 808 for { 809 msgs, _, err := nlSock.Receive() 810 if err != nil { 811 n.Lock() 812 nlFd := nlSock.GetFd() 813 n.Unlock() 814 if nlFd == -1 { 815 // The netlink socket got closed, simply exit to not leak this goroutine 816 return 817 } 818 // When the receive timeout expires the receive will return EAGAIN 819 if err == unix.EAGAIN { 820 // we continue here to avoid spam for timeouts 821 continue 822 } 823 logrus.Errorf("Failed to receive from netlink: %v ", err) 824 continue 825 } 826 827 for _, msg := range msgs { 828 if msg.Header.Type != unix.RTM_GETNEIGH && msg.Header.Type != unix.RTM_NEWNEIGH { 829 continue 830 } 831 832 neigh, err := netlink.NeighDeserialize(msg.Data) 833 if err != nil { 834 logrus.Errorf("Failed to deserialize netlink ndmsg: %v", err) 835 continue 836 } 837 838 var ( 839 ip net.IP 840 mac net.HardwareAddr 841 l2Miss, l3Miss bool 842 ) 843 if neigh.IP.To4() != nil { 844 ip = neigh.IP 845 l3Miss = true 846 } else if neigh.HardwareAddr != nil { 847 mac = []byte(neigh.HardwareAddr) 848 ip = net.IP(mac[2:]) 849 l2Miss = true 850 } else { 851 continue 852 } 853 854 // Not any of the network's subnets. Ignore. 855 if !n.contains(ip) { 856 continue 857 } 858 859 if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 { 860 continue 861 } 862 863 logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac) 864 mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip) 865 if err != nil { 866 logrus.Errorf("could not resolve peer %q: %v", ip, err) 867 continue 868 } 869 n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false) 870 } 871 } 872 } 873 874 // Restore a network from the store to the driver if it is present. 875 // Must be called with the driver locked! 876 func (d *driver) restoreNetworkFromStore(nid string) *network { 877 n := d.getNetworkFromStore(nid) 878 if n != nil { 879 n.driver = d 880 n.endpoints = endpointTable{} 881 d.networks[nid] = n 882 } 883 return n 884 } 885 886 func (d *driver) network(nid string) *network { 887 d.Lock() 888 n, ok := d.networks[nid] 889 if !ok { 890 n = d.restoreNetworkFromStore(nid) 891 } 892 d.Unlock() 893 894 return n 895 } 896 897 func (d *driver) getNetworkFromStore(nid string) *network { 898 if d.store == nil { 899 return nil 900 } 901 902 n := &network{id: nid} 903 if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil { 904 return nil 905 } 906 907 return n 908 } 909 910 func (n *network) sandbox() osl.Sandbox { 911 n.Lock() 912 defer n.Unlock() 913 return n.sbox 914 } 915 916 func (n *network) vxlanID(s *subnet) uint32 { 917 n.Lock() 918 defer n.Unlock() 919 return s.vni 920 } 921 922 func (n *network) setVxlanID(s *subnet, vni uint32) { 923 n.Lock() 924 s.vni = vni 925 n.Unlock() 926 } 927 928 func (n *network) Key() []string { 929 return []string{"overlay", "network", n.id} 930 } 931 932 func (n *network) KeyPrefix() []string { 933 return []string{"overlay", "network"} 934 } 935 936 func (n *network) Value() []byte { 937 m := map[string]interface{}{} 938 939 netJSON := []*subnetJSON{} 940 941 for _, s := range n.subnets { 942 sj := &subnetJSON{ 943 SubnetIP: s.subnetIP.String(), 944 GwIP: s.gwIP.String(), 945 Vni: s.vni, 946 } 947 netJSON = append(netJSON, sj) 948 } 949 950 m["secure"] = n.secure 951 m["subnets"] = netJSON 952 m["mtu"] = n.mtu 953 b, err := json.Marshal(m) 954 if err != nil { 955 return []byte{} 956 } 957 958 return b 959 } 960 961 func (n *network) Index() uint64 { 962 return n.dbIndex 963 } 964 965 func (n *network) SetIndex(index uint64) { 966 n.dbIndex = index 967 n.dbExists = true 968 } 969 970 func (n *network) Exists() bool { 971 return n.dbExists 972 } 973 974 func (n *network) Skip() bool { 975 return false 976 } 977 978 func (n *network) SetValue(value []byte) error { 979 var ( 980 m map[string]interface{} 981 newNet bool 982 isMap = true 983 netJSON = []*subnetJSON{} 984 ) 985 986 if err := json.Unmarshal(value, &m); err != nil { 987 err := json.Unmarshal(value, &netJSON) 988 if err != nil { 989 return err 990 } 991 isMap = false 992 } 993 994 if len(n.subnets) == 0 { 995 newNet = true 996 } 997 998 if isMap { 999 if val, ok := m["secure"]; ok { 1000 n.secure = val.(bool) 1001 } 1002 if val, ok := m["mtu"]; ok { 1003 n.mtu = int(val.(float64)) 1004 } 1005 bytes, err := json.Marshal(m["subnets"]) 1006 if err != nil { 1007 return err 1008 } 1009 if err := json.Unmarshal(bytes, &netJSON); err != nil { 1010 return err 1011 } 1012 } 1013 1014 for _, sj := range netJSON { 1015 subnetIPstr := sj.SubnetIP 1016 gwIPstr := sj.GwIP 1017 vni := sj.Vni 1018 1019 subnetIP, _ := types.ParseCIDR(subnetIPstr) 1020 gwIP, _ := types.ParseCIDR(gwIPstr) 1021 1022 if newNet { 1023 s := &subnet{ 1024 subnetIP: subnetIP, 1025 gwIP: gwIP, 1026 vni: vni, 1027 } 1028 n.subnets = append(n.subnets, s) 1029 } else { 1030 sNet := n.getMatchingSubnet(subnetIP) 1031 if sNet != nil { 1032 sNet.vni = vni 1033 } 1034 } 1035 } 1036 return nil 1037 } 1038 1039 func (n *network) DataScope() string { 1040 return datastore.GlobalScope 1041 } 1042 1043 func (n *network) writeToStore() error { 1044 if n.driver.store == nil { 1045 return nil 1046 } 1047 1048 return n.driver.store.PutObjectAtomic(n) 1049 } 1050 1051 func (n *network) releaseVxlanID() ([]uint32, error) { 1052 n.Lock() 1053 nSubnets := len(n.subnets) 1054 n.Unlock() 1055 if nSubnets == 0 { 1056 return nil, nil 1057 } 1058 1059 if n.driver.store != nil { 1060 if err := n.driver.store.DeleteObjectAtomic(n); err != nil { 1061 if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound { 1062 // In both the above cases we can safely assume that the key has been removed by some other 1063 // instance and so simply get out of here 1064 return nil, nil 1065 } 1066 1067 return nil, fmt.Errorf("failed to delete network to vxlan id map: %v", err) 1068 } 1069 } 1070 var vnis []uint32 1071 n.Lock() 1072 for _, s := range n.subnets { 1073 if n.driver.vxlanIdm != nil { 1074 vnis = append(vnis, s.vni) 1075 } 1076 s.vni = 0 1077 } 1078 n.Unlock() 1079 1080 for _, vni := range vnis { 1081 n.driver.vxlanIdm.Release(uint64(vni)) 1082 } 1083 1084 return vnis, nil 1085 } 1086 1087 func (n *network) obtainVxlanID(s *subnet) error { 1088 //return if the subnet already has a vxlan id assigned 1089 if n.vxlanID(s) != 0 { 1090 return nil 1091 } 1092 1093 if n.driver.store == nil { 1094 return fmt.Errorf("no valid vxlan id and no datastore configured, cannot obtain vxlan id") 1095 } 1096 1097 for { 1098 if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil { 1099 return fmt.Errorf("getting network %q from datastore failed %v", n.id, err) 1100 } 1101 1102 if n.vxlanID(s) == 0 { 1103 vxlanID, err := n.driver.vxlanIdm.GetID(true) 1104 if err != nil { 1105 return fmt.Errorf("failed to allocate vxlan id: %v", err) 1106 } 1107 1108 n.setVxlanID(s, uint32(vxlanID)) 1109 if err := n.writeToStore(); err != nil { 1110 n.driver.vxlanIdm.Release(uint64(n.vxlanID(s))) 1111 n.setVxlanID(s, 0) 1112 if err == datastore.ErrKeyModified { 1113 continue 1114 } 1115 return fmt.Errorf("network %q failed to update data store: %v", n.id, err) 1116 } 1117 return nil 1118 } 1119 return nil 1120 } 1121 } 1122 1123 // contains return true if the passed ip belongs to one the network's 1124 // subnets 1125 func (n *network) contains(ip net.IP) bool { 1126 for _, s := range n.subnets { 1127 if s.subnetIP.Contains(ip) { 1128 return true 1129 } 1130 } 1131 1132 return false 1133 } 1134 1135 // getSubnetforIP returns the subnet to which the given IP belongs 1136 func (n *network) getSubnetforIP(ip *net.IPNet) *subnet { 1137 for _, s := range n.subnets { 1138 // first check if the mask lengths are the same 1139 i, _ := s.subnetIP.Mask.Size() 1140 j, _ := ip.Mask.Size() 1141 if i != j { 1142 continue 1143 } 1144 if s.subnetIP.Contains(ip.IP) { 1145 return s 1146 } 1147 } 1148 return nil 1149 } 1150 1151 // getMatchingSubnet return the network's subnet that matches the input 1152 func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet { 1153 if ip == nil { 1154 return nil 1155 } 1156 for _, s := range n.subnets { 1157 // first check if the mask lengths are the same 1158 i, _ := s.subnetIP.Mask.Size() 1159 j, _ := ip.Mask.Size() 1160 if i != j { 1161 continue 1162 } 1163 if s.subnetIP.IP.Equal(ip.IP) { 1164 return s 1165 } 1166 } 1167 return nil 1168 }