github.com/Heebron/moby@v0.0.0-20221111184709-6eab4f55faf7/libnetwork/drivers/overlay/ov_network.go (about) 1 //go:build linux 2 // +build linux 3 4 package overlay 5 6 import ( 7 "encoding/json" 8 "fmt" 9 "net" 10 "os" 11 "os/exec" 12 "path/filepath" 13 "runtime" 14 "strconv" 15 "strings" 16 "sync" 17 18 "github.com/docker/docker/libnetwork/datastore" 19 "github.com/docker/docker/libnetwork/driverapi" 20 "github.com/docker/docker/libnetwork/netlabel" 21 "github.com/docker/docker/libnetwork/netutils" 22 "github.com/docker/docker/libnetwork/ns" 23 "github.com/docker/docker/libnetwork/osl" 24 "github.com/docker/docker/libnetwork/resolvconf" 25 "github.com/docker/docker/libnetwork/types" 26 "github.com/docker/docker/pkg/reexec" 27 "github.com/sirupsen/logrus" 28 "github.com/vishvananda/netlink" 29 "github.com/vishvananda/netlink/nl" 30 "github.com/vishvananda/netns" 31 "golang.org/x/sys/unix" 32 ) 33 34 var ( 35 hostMode bool 36 networkOnce sync.Once 37 networkMu sync.Mutex 38 vniTbl = make(map[uint32]string) 39 ) 40 41 type networkTable map[string]*network 42 43 type subnet struct { 44 sboxInit bool 45 vxlanName string 46 brName string 47 vni uint32 48 initErr error 49 subnetIP *net.IPNet 50 gwIP *net.IPNet 51 } 52 53 type subnetJSON struct { 54 SubnetIP string 55 GwIP string 56 Vni uint32 57 } 58 59 type network struct { 60 id string 61 dbIndex uint64 62 dbExists bool 63 sbox osl.Sandbox 64 nlSocket *nl.NetlinkSocket 65 endpoints endpointTable 66 driver *driver 67 joinCnt int 68 sboxInit bool 69 initEpoch int 70 initErr error 71 subnets []*subnet 72 secure bool 73 mtu int 74 sync.Mutex 75 } 76 77 func init() { 78 reexec.Register("set-default-vlan", setDefaultVlan) 79 80 // Lock main() to the initial thread to exclude the goroutines executing 81 // func (*network).watchMiss() from being scheduled onto that thread. 82 // Changes to the network namespace of the initial thread alter 83 // /proc/self/ns/net, which would break any code which (incorrectly) 84 // assumes that that file is a handle to the network namespace for the 85 // thread it is currently executing on. 86 runtime.LockOSThread() 87 } 88 89 func setDefaultVlan() { 90 if len(os.Args) < 3 { 91 logrus.Error("insufficient number of arguments") 92 os.Exit(1) 93 } 94 95 runtime.LockOSThread() 96 defer runtime.UnlockOSThread() 97 98 nsPath := os.Args[1] 99 ns, err := netns.GetFromPath(nsPath) 100 if err != nil { 101 logrus.Errorf("overlay namespace get failed, %v", err) 102 os.Exit(1) 103 } 104 if err = netns.Set(ns); err != nil { 105 logrus.Errorf("setting into overlay namespace failed, %v", err) 106 os.Exit(1) 107 } 108 109 // make sure the sysfs mount doesn't propagate back 110 if err = unix.Unshare(unix.CLONE_NEWNS); err != nil { 111 logrus.Errorf("unshare failed, %v", err) 112 os.Exit(1) 113 } 114 115 flag := unix.MS_PRIVATE | unix.MS_REC 116 if err = unix.Mount("", "/", "", uintptr(flag), ""); err != nil { 117 logrus.Errorf("root mount failed, %v", err) 118 os.Exit(1) 119 } 120 121 if err = unix.Mount("sysfs", "/sys", "sysfs", 0, ""); err != nil { 122 logrus.Errorf("mounting sysfs failed, %v", err) 123 os.Exit(1) 124 } 125 126 brName := os.Args[2] 127 path := filepath.Join("/sys/class/net", brName, "bridge/default_pvid") 128 data := []byte{'0', '\n'} 129 130 if err = os.WriteFile(path, data, 0644); err != nil { 131 logrus.Errorf("enabling default vlan on bridge %s failed %v", brName, err) 132 os.Exit(1) 133 } 134 os.Exit(0) 135 } 136 137 func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) { 138 return nil, types.NotImplementedErrorf("not implemented") 139 } 140 141 func (d *driver) NetworkFree(id string) error { 142 return types.NotImplementedErrorf("not implemented") 143 } 144 145 func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error { 146 if id == "" { 147 return fmt.Errorf("invalid network id") 148 } 149 if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" { 150 return types.BadRequestErrorf("ipv4 pool is empty") 151 } 152 153 // Since we perform lazy configuration make sure we try 154 // configuring the driver when we enter CreateNetwork 155 if err := d.configure(); err != nil { 156 return err 157 } 158 159 n := &network{ 160 id: id, 161 driver: d, 162 endpoints: endpointTable{}, 163 subnets: []*subnet{}, 164 } 165 166 vnis := make([]uint32, 0, len(ipV4Data)) 167 if gval, ok := option[netlabel.GenericData]; ok { 168 optMap := gval.(map[string]string) 169 if val, ok := optMap[netlabel.OverlayVxlanIDList]; ok { 170 logrus.Debugf("overlay: Received vxlan IDs: %s", val) 171 vniStrings := strings.Split(val, ",") 172 for _, vniStr := range vniStrings { 173 vni, err := strconv.Atoi(vniStr) 174 if err != nil { 175 return fmt.Errorf("invalid vxlan id value %q passed", vniStr) 176 } 177 178 vnis = append(vnis, uint32(vni)) 179 } 180 } 181 if _, ok := optMap[secureOption]; ok { 182 n.secure = true 183 } 184 if val, ok := optMap[netlabel.DriverMTU]; ok { 185 var err error 186 if n.mtu, err = strconv.Atoi(val); err != nil { 187 return fmt.Errorf("failed to parse %v: %v", val, err) 188 } 189 if n.mtu < 0 { 190 return fmt.Errorf("invalid MTU value: %v", n.mtu) 191 } 192 } 193 } 194 195 // If we are getting vnis from libnetwork, either we get for 196 // all subnets or none. 197 if len(vnis) != 0 && len(vnis) < len(ipV4Data) { 198 return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis)) 199 } 200 201 for i, ipd := range ipV4Data { 202 s := &subnet{ 203 subnetIP: ipd.Pool, 204 gwIP: ipd.Gateway, 205 } 206 207 if len(vnis) != 0 { 208 s.vni = vnis[i] 209 } 210 211 n.subnets = append(n.subnets, s) 212 } 213 214 d.Lock() 215 defer d.Unlock() 216 if d.networks[n.id] != nil { 217 return fmt.Errorf("attempt to create overlay network %v that already exists", n.id) 218 } 219 220 if err := n.writeToStore(); err != nil { 221 return fmt.Errorf("failed to update data store for network %v: %v", n.id, err) 222 } 223 224 // Make sure no rule is on the way from any stale secure network 225 if !n.secure { 226 for _, vni := range vnis { 227 programMangle(vni, false) 228 programInput(vni, false) 229 } 230 } 231 232 if nInfo != nil { 233 if err := nInfo.TableEventRegister(ovPeerTable, driverapi.EndpointObject); err != nil { 234 // XXX Undo writeToStore? No method to so. Why? 235 return err 236 } 237 } 238 239 d.networks[id] = n 240 241 return nil 242 } 243 244 func (d *driver) DeleteNetwork(nid string) error { 245 if nid == "" { 246 return fmt.Errorf("invalid network id") 247 } 248 249 // Make sure driver resources are initialized before proceeding 250 if err := d.configure(); err != nil { 251 return err 252 } 253 254 d.Lock() 255 // Only perform a peer flush operation (if required) AFTER unlocking 256 // the driver lock to avoid deadlocking w/ the peerDB. 257 var doPeerFlush bool 258 defer func() { 259 d.Unlock() 260 if doPeerFlush { 261 d.peerFlush(nid) 262 } 263 }() 264 265 // This is similar to d.network(), but we need to keep holding the lock 266 // until we are done removing this network. 267 n, ok := d.networks[nid] 268 if !ok { 269 n = d.restoreNetworkFromStore(nid) 270 } 271 if n == nil { 272 return fmt.Errorf("could not find network with id %s", nid) 273 } 274 275 for _, ep := range n.endpoints { 276 if ep.ifName != "" { 277 if link, err := ns.NlHandle().LinkByName(ep.ifName); err == nil { 278 if err := ns.NlHandle().LinkDel(link); err != nil { 279 logrus.WithError(err).Warnf("Failed to delete interface (%s)'s link on endpoint (%s) delete", ep.ifName, ep.id) 280 } 281 } 282 } 283 284 if err := d.deleteEndpointFromStore(ep); err != nil { 285 logrus.Warnf("Failed to delete overlay endpoint %.7s from local store: %v", ep.id, err) 286 } 287 } 288 289 doPeerFlush = true 290 delete(d.networks, nid) 291 292 vnis, err := n.releaseVxlanID() 293 if err != nil { 294 return err 295 } 296 297 if n.secure { 298 for _, vni := range vnis { 299 programMangle(vni, false) 300 programInput(vni, false) 301 } 302 } 303 304 return nil 305 } 306 307 func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error { 308 return nil 309 } 310 311 func (d *driver) RevokeExternalConnectivity(nid, eid string) error { 312 return nil 313 } 314 315 func (n *network) joinSandbox(s *subnet, restore bool, incJoinCount bool) error { 316 // If there is a race between two go routines here only one will win 317 // the other will wait. 318 networkOnce.Do(networkOnceInit) 319 320 n.Lock() 321 // If non-restore initialization occurred and was successful then 322 // tell the peerDB to initialize the sandbox with all the peers 323 // previously received from networkdb. But only do this after 324 // unlocking the network. Otherwise we could deadlock with 325 // on the peerDB channel while peerDB is waiting for the network lock. 326 var doInitPeerDB bool 327 defer func() { 328 n.Unlock() 329 if doInitPeerDB { 330 go n.driver.initSandboxPeerDB(n.id) 331 } 332 }() 333 334 if !n.sboxInit { 335 n.initErr = n.initSandbox(restore) 336 doInitPeerDB = n.initErr == nil && !restore 337 // If there was an error, we cannot recover it 338 n.sboxInit = true 339 } 340 341 if n.initErr != nil { 342 return fmt.Errorf("network sandbox join failed: %v", n.initErr) 343 } 344 345 subnetErr := s.initErr 346 if !s.sboxInit { 347 subnetErr = n.initSubnetSandbox(s, restore) 348 // We can recover from these errors, but not on restore 349 if restore || subnetErr == nil { 350 s.initErr = subnetErr 351 s.sboxInit = true 352 } 353 } 354 if subnetErr != nil { 355 return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), subnetErr) 356 } 357 358 if incJoinCount { 359 n.joinCnt++ 360 } 361 362 return nil 363 } 364 365 func (n *network) leaveSandbox() { 366 n.Lock() 367 defer n.Unlock() 368 n.joinCnt-- 369 if n.joinCnt != 0 { 370 return 371 } 372 373 n.destroySandbox() 374 375 n.sboxInit = false 376 n.initErr = nil 377 for _, s := range n.subnets { 378 s.sboxInit = false 379 s.initErr = nil 380 } 381 } 382 383 // to be called while holding network lock 384 func (n *network) destroySandbox() { 385 if n.sbox != nil { 386 for _, iface := range n.sbox.Info().Interfaces() { 387 if err := iface.Remove(); err != nil { 388 logrus.Debugf("Remove interface %s failed: %v", iface.SrcName(), err) 389 } 390 } 391 392 for _, s := range n.subnets { 393 if hostMode { 394 if err := removeFilters(n.id[:12], s.brName); err != nil { 395 logrus.Warnf("Could not remove overlay filters: %v", err) 396 } 397 } 398 399 if s.vxlanName != "" { 400 err := deleteInterface(s.vxlanName) 401 if err != nil { 402 logrus.Warnf("could not cleanup sandbox properly: %v", err) 403 } 404 } 405 } 406 407 if hostMode { 408 if err := removeNetworkChain(n.id[:12]); err != nil { 409 logrus.Warnf("could not remove network chain: %v", err) 410 } 411 } 412 413 // Close the netlink socket, this will also release the watchMiss goroutine that is using it 414 if n.nlSocket != nil { 415 n.nlSocket.Close() 416 n.nlSocket = nil 417 } 418 419 n.sbox.Destroy() 420 n.sbox = nil 421 } 422 } 423 424 func populateVNITbl() { 425 filepath.WalkDir(filepath.Dir(osl.GenerateKey("walk")), 426 // NOTE(cpuguy83): The linter picked up on the fact that this walk function was not using this error argument 427 // That seems wrong... however I'm not familiar with this code or if that error matters 428 func(path string, _ os.DirEntry, _ error) error { 429 _, fname := filepath.Split(path) 430 431 if len(strings.Split(fname, "-")) <= 1 { 432 return nil 433 } 434 435 n, err := netns.GetFromPath(path) 436 if err != nil { 437 logrus.Errorf("Could not open namespace path %s during vni population: %v", path, err) 438 return nil 439 } 440 defer n.Close() 441 442 nlh, err := netlink.NewHandleAt(n, unix.NETLINK_ROUTE) 443 if err != nil { 444 logrus.Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err) 445 return nil 446 } 447 defer nlh.Close() 448 449 err = nlh.SetSocketTimeout(soTimeout) 450 if err != nil { 451 logrus.Warnf("Failed to set the timeout on the netlink handle sockets for vni table population: %v", err) 452 } 453 454 links, err := nlh.LinkList() 455 if err != nil { 456 logrus.Errorf("Failed to list interfaces during vni population for ns %s: %v", path, err) 457 return nil 458 } 459 460 for _, l := range links { 461 if l.Type() == "vxlan" { 462 vniTbl[uint32(l.(*netlink.Vxlan).VxlanId)] = path 463 } 464 } 465 466 return nil 467 }) 468 } 469 470 func networkOnceInit() { 471 populateVNITbl() 472 473 if os.Getenv("_OVERLAY_HOST_MODE") != "" { 474 hostMode = true 475 return 476 } 477 478 err := createVxlan("testvxlan", 1, 0) 479 if err != nil { 480 logrus.Errorf("Failed to create testvxlan interface: %v", err) 481 return 482 } 483 484 defer deleteInterface("testvxlan") 485 486 path := "/proc/self/ns/net" 487 hNs, err := netns.GetFromPath(path) 488 if err != nil { 489 logrus.Errorf("Failed to get network namespace from path %s while setting host mode: %v", path, err) 490 return 491 } 492 defer hNs.Close() 493 494 nlh := ns.NlHandle() 495 496 iface, err := nlh.LinkByName("testvxlan") 497 if err != nil { 498 logrus.Errorf("Failed to get link testvxlan while setting host mode: %v", err) 499 return 500 } 501 502 // If we are not able to move the vxlan interface to a namespace 503 // then fallback to host mode 504 if err := nlh.LinkSetNsFd(iface, int(hNs)); err != nil { 505 hostMode = true 506 } 507 } 508 509 func (n *network) generateVxlanName(s *subnet) string { 510 id := n.id 511 if len(n.id) > 5 { 512 id = n.id[:5] 513 } 514 515 return fmt.Sprintf("vx-%06x-%v", s.vni, id) 516 } 517 518 func (n *network) generateBridgeName(s *subnet) string { 519 id := n.id 520 if len(n.id) > 5 { 521 id = n.id[:5] 522 } 523 524 return n.getBridgeNamePrefix(s) + "-" + id 525 } 526 527 func (n *network) getBridgeNamePrefix(s *subnet) string { 528 return fmt.Sprintf("ov-%06x", s.vni) 529 } 530 531 func checkOverlap(nw *net.IPNet) error { 532 var nameservers []string 533 534 if rc, err := resolvconf.Get(); err == nil { 535 nameservers = resolvconf.GetNameserversAsCIDR(rc.Content) 536 } 537 538 if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil { 539 return fmt.Errorf("overlay subnet %s failed check with nameserver: %v: %v", nw.String(), nameservers, err) 540 } 541 542 if err := netutils.CheckRouteOverlaps(nw); err != nil { 543 return fmt.Errorf("overlay subnet %s failed check with host route table: %v", nw.String(), err) 544 } 545 546 return nil 547 } 548 549 func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) error { 550 // restore overlay osl sandbox 551 ifaces := map[string][]osl.IfaceOption{ 552 brName + "+br": { 553 n.sbox.InterfaceOptions().Address(s.gwIP), 554 n.sbox.InterfaceOptions().Bridge(true), 555 }, 556 } 557 if err := n.sbox.Restore(ifaces, nil, nil, nil); err != nil { 558 return err 559 } 560 561 ifaces = map[string][]osl.IfaceOption{ 562 vxlanName + "+vxlan": { 563 n.sbox.InterfaceOptions().Master(brName), 564 }, 565 } 566 return n.sbox.Restore(ifaces, nil, nil, nil) 567 } 568 569 func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error { 570 if hostMode { 571 // Try to delete stale bridge interface if it exists 572 if err := deleteInterface(brName); err != nil { 573 deleteInterfaceBySubnet(n.getBridgeNamePrefix(s), s) 574 } 575 // Try to delete the vxlan interface by vni if already present 576 deleteVxlanByVNI("", s.vni) 577 578 if err := checkOverlap(s.subnetIP); err != nil { 579 return err 580 } 581 } 582 583 if !hostMode { 584 // Try to find this subnet's vni is being used in some 585 // other namespace by looking at vniTbl that we just 586 // populated in the once init. If a hit is found then 587 // it must a stale namespace from previous 588 // life. Destroy it completely and reclaim resourced. 589 networkMu.Lock() 590 path, ok := vniTbl[s.vni] 591 networkMu.Unlock() 592 593 if ok { 594 deleteVxlanByVNI(path, s.vni) 595 if err := unix.Unmount(path, unix.MNT_FORCE); err != nil { 596 logrus.Errorf("unmount of %s failed: %v", path, err) 597 } 598 os.Remove(path) 599 600 networkMu.Lock() 601 delete(vniTbl, s.vni) 602 networkMu.Unlock() 603 } 604 } 605 606 // create a bridge and vxlan device for this subnet and move it to the sandbox 607 sbox := n.sbox 608 609 if err := sbox.AddInterface(brName, "br", 610 sbox.InterfaceOptions().Address(s.gwIP), 611 sbox.InterfaceOptions().Bridge(true)); err != nil { 612 return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err) 613 } 614 615 err := createVxlan(vxlanName, s.vni, n.maxMTU()) 616 if err != nil { 617 return err 618 } 619 620 if err := sbox.AddInterface(vxlanName, "vxlan", 621 sbox.InterfaceOptions().Master(brName)); err != nil { 622 // If adding vxlan device to the overlay namespace fails, remove the bridge interface we 623 // already added to the namespace. This allows the caller to try the setup again. 624 for _, iface := range sbox.Info().Interfaces() { 625 if iface.SrcName() == brName { 626 if ierr := iface.Remove(); ierr != nil { 627 logrus.Errorf("removing bridge failed from ov ns %v failed, %v", n.sbox.Key(), ierr) 628 } 629 } 630 } 631 632 // Also, delete the vxlan interface. Since a global vni id is associated 633 // with the vxlan interface, an orphaned vxlan interface will result in 634 // failure of vxlan device creation if the vni is assigned to some other 635 // network. 636 if deleteErr := deleteInterface(vxlanName); deleteErr != nil { 637 logrus.Warnf("could not delete vxlan interface, %s, error %v, after config error, %v", vxlanName, deleteErr, err) 638 } 639 return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err) 640 } 641 642 if !hostMode { 643 var name string 644 for _, i := range sbox.Info().Interfaces() { 645 if i.Bridge() { 646 name = i.DstName() 647 } 648 } 649 cmd := &exec.Cmd{ 650 Path: reexec.Self(), 651 Args: []string{"set-default-vlan", sbox.Key(), name}, 652 Stdout: os.Stdout, 653 Stderr: os.Stderr, 654 } 655 if err := cmd.Run(); err != nil { 656 // not a fatal error 657 logrus.Errorf("reexec to set bridge default vlan failed %v", err) 658 } 659 } 660 661 if hostMode { 662 if err := addFilters(n.id[:12], brName); err != nil { 663 return err 664 } 665 } 666 667 return nil 668 } 669 670 // Must be called with the network lock 671 func (n *network) initSubnetSandbox(s *subnet, restore bool) error { 672 brName := n.generateBridgeName(s) 673 vxlanName := n.generateVxlanName(s) 674 675 if restore { 676 if err := n.restoreSubnetSandbox(s, brName, vxlanName); err != nil { 677 return err 678 } 679 } else { 680 if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil { 681 return err 682 } 683 } 684 685 s.vxlanName = vxlanName 686 s.brName = brName 687 688 return nil 689 } 690 691 func (n *network) cleanupStaleSandboxes() { 692 filepath.WalkDir(filepath.Dir(osl.GenerateKey("walk")), 693 func(path string, _ os.DirEntry, _ error) error { 694 _, fname := filepath.Split(path) 695 696 pList := strings.Split(fname, "-") 697 if len(pList) <= 1 { 698 return nil 699 } 700 701 pattern := pList[1] 702 if strings.Contains(n.id, pattern) { 703 // Delete all vnis 704 deleteVxlanByVNI(path, 0) 705 unix.Unmount(path, unix.MNT_DETACH) 706 os.Remove(path) 707 708 // Now that we have destroyed this 709 // sandbox, remove all references to 710 // it in vniTbl so that we don't 711 // inadvertently destroy the sandbox 712 // created in this life. 713 networkMu.Lock() 714 for vni, tblPath := range vniTbl { 715 if tblPath == path { 716 delete(vniTbl, vni) 717 } 718 } 719 networkMu.Unlock() 720 } 721 722 return nil 723 }) 724 } 725 726 func (n *network) initSandbox(restore bool) error { 727 n.initEpoch++ 728 729 if !restore { 730 if hostMode { 731 if err := addNetworkChain(n.id[:12]); err != nil { 732 return err 733 } 734 } 735 736 // If there are any stale sandboxes related to this network 737 // from previous daemon life clean it up here 738 n.cleanupStaleSandboxes() 739 } 740 741 // In the restore case network sandbox already exist; but we don't know 742 // what epoch number it was created with. It has to be retrieved by 743 // searching the net namespaces. 744 var key string 745 if restore { 746 key = osl.GenerateKey("-" + n.id) 747 } else { 748 key = osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id) 749 } 750 751 sbox, err := osl.NewSandbox(key, !hostMode, restore) 752 if err != nil { 753 return fmt.Errorf("could not get network sandbox (oper %t): %v", restore, err) 754 } 755 756 // this is needed to let the peerAdd configure the sandbox 757 n.sbox = sbox 758 759 // If we are in swarm mode, we don't need anymore the watchMiss routine. 760 // This will save 1 thread and 1 netlink socket per network 761 if !n.driver.isSerfAlive() { 762 return nil 763 } 764 765 var nlSock *nl.NetlinkSocket 766 sbox.InvokeFunc(func() { 767 nlSock, err = nl.Subscribe(unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH) 768 if err != nil { 769 return 770 } 771 // set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed 772 tv := unix.NsecToTimeval(soTimeout.Nanoseconds()) 773 err = nlSock.SetReceiveTimeout(&tv) 774 }) 775 n.nlSocket = nlSock 776 777 if err == nil { 778 go n.watchMiss(nlSock, key) 779 } else { 780 logrus.Errorf("failed to subscribe to neighbor group netlink messages for overlay network %s in sbox %s: %v", 781 n.id, sbox.Key(), err) 782 } 783 784 return nil 785 } 786 787 func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) { 788 // With the new version of the netlink library the deserialize function makes 789 // requests about the interface of the netlink message. This can succeed only 790 // if this go routine is in the target namespace. 791 origNs, err := netns.Get() 792 if err != nil { 793 logrus.WithError(err).Error("failed to get the initial network namespace") 794 return 795 } 796 defer origNs.Close() 797 newNs, err := netns.GetFromPath(nsPath) 798 if err != nil { 799 logrus.WithError(err).Errorf("failed to get the namespace %s", nsPath) 800 return 801 } 802 defer newNs.Close() 803 804 runtime.LockOSThread() 805 if err = netns.Set(newNs); err != nil { 806 logrus.WithError(err).Errorf("failed to enter the namespace %s", nsPath) 807 runtime.UnlockOSThread() 808 return 809 } 810 defer func() { 811 if err := netns.Set(origNs); err != nil { 812 logrus.WithError(err).Error("failed to restore the thread's initial network namespace") 813 // The error is only fatal for the current thread. Keep this 814 // goroutine locked to the thread to make the runtime replace it 815 // with a clean thread once this goroutine terminates. 816 } else { 817 runtime.UnlockOSThread() 818 } 819 }() 820 for { 821 msgs, _, err := nlSock.Receive() 822 if err != nil { 823 n.Lock() 824 nlFd := nlSock.GetFd() 825 n.Unlock() 826 if nlFd == -1 { 827 // The netlink socket got closed, simply exit to not leak this goroutine 828 return 829 } 830 // When the receive timeout expires the receive will return EAGAIN 831 if err == unix.EAGAIN { 832 // we continue here to avoid spam for timeouts 833 continue 834 } 835 logrus.Errorf("Failed to receive from netlink: %v ", err) 836 continue 837 } 838 839 for _, msg := range msgs { 840 if msg.Header.Type != unix.RTM_GETNEIGH && msg.Header.Type != unix.RTM_NEWNEIGH { 841 continue 842 } 843 844 neigh, err := netlink.NeighDeserialize(msg.Data) 845 if err != nil { 846 logrus.Errorf("Failed to deserialize netlink ndmsg: %v", err) 847 continue 848 } 849 850 var ( 851 ip net.IP 852 mac net.HardwareAddr 853 l2Miss, l3Miss bool 854 ) 855 if neigh.IP.To4() != nil { 856 ip = neigh.IP 857 l3Miss = true 858 } else if neigh.HardwareAddr != nil { 859 mac = []byte(neigh.HardwareAddr) 860 ip = net.IP(mac[2:]) 861 l2Miss = true 862 } else { 863 continue 864 } 865 866 // Not any of the network's subnets. Ignore. 867 if !n.contains(ip) { 868 continue 869 } 870 871 if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 { 872 continue 873 } 874 875 logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac) 876 mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip) 877 if err != nil { 878 logrus.Errorf("could not resolve peer %q: %v", ip, err) 879 continue 880 } 881 n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false) 882 } 883 } 884 } 885 886 // Restore a network from the store to the driver if it is present. 887 // Must be called with the driver locked! 888 func (d *driver) restoreNetworkFromStore(nid string) *network { 889 n := d.getNetworkFromStore(nid) 890 if n != nil { 891 n.driver = d 892 n.endpoints = endpointTable{} 893 d.networks[nid] = n 894 } 895 return n 896 } 897 898 func (d *driver) network(nid string) *network { 899 d.Lock() 900 n, ok := d.networks[nid] 901 if !ok { 902 n = d.restoreNetworkFromStore(nid) 903 } 904 d.Unlock() 905 906 return n 907 } 908 909 func (d *driver) getNetworkFromStore(nid string) *network { 910 if d.store == nil { 911 return nil 912 } 913 914 n := &network{id: nid} 915 if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil { 916 return nil 917 } 918 919 return n 920 } 921 922 func (n *network) sandbox() osl.Sandbox { 923 n.Lock() 924 defer n.Unlock() 925 return n.sbox 926 } 927 928 func (n *network) vxlanID(s *subnet) uint32 { 929 n.Lock() 930 defer n.Unlock() 931 return s.vni 932 } 933 934 func (n *network) setVxlanID(s *subnet, vni uint32) { 935 n.Lock() 936 s.vni = vni 937 n.Unlock() 938 } 939 940 func (n *network) Key() []string { 941 return []string{"overlay", "network", n.id} 942 } 943 944 func (n *network) KeyPrefix() []string { 945 return []string{"overlay", "network"} 946 } 947 948 func (n *network) Value() []byte { 949 m := map[string]interface{}{} 950 951 netJSON := []*subnetJSON{} 952 953 for _, s := range n.subnets { 954 sj := &subnetJSON{ 955 SubnetIP: s.subnetIP.String(), 956 GwIP: s.gwIP.String(), 957 Vni: s.vni, 958 } 959 netJSON = append(netJSON, sj) 960 } 961 962 m["secure"] = n.secure 963 m["subnets"] = netJSON 964 m["mtu"] = n.mtu 965 b, err := json.Marshal(m) 966 if err != nil { 967 return []byte{} 968 } 969 970 return b 971 } 972 973 func (n *network) Index() uint64 { 974 return n.dbIndex 975 } 976 977 func (n *network) SetIndex(index uint64) { 978 n.dbIndex = index 979 n.dbExists = true 980 } 981 982 func (n *network) Exists() bool { 983 return n.dbExists 984 } 985 986 func (n *network) Skip() bool { 987 return false 988 } 989 990 func (n *network) SetValue(value []byte) error { 991 var ( 992 m map[string]interface{} 993 newNet bool 994 isMap = true 995 netJSON = []*subnetJSON{} 996 ) 997 998 if err := json.Unmarshal(value, &m); err != nil { 999 err := json.Unmarshal(value, &netJSON) 1000 if err != nil { 1001 return err 1002 } 1003 isMap = false 1004 } 1005 1006 if len(n.subnets) == 0 { 1007 newNet = true 1008 } 1009 1010 if isMap { 1011 if val, ok := m["secure"]; ok { 1012 n.secure = val.(bool) 1013 } 1014 if val, ok := m["mtu"]; ok { 1015 n.mtu = int(val.(float64)) 1016 } 1017 bytes, err := json.Marshal(m["subnets"]) 1018 if err != nil { 1019 return err 1020 } 1021 if err := json.Unmarshal(bytes, &netJSON); err != nil { 1022 return err 1023 } 1024 } 1025 1026 for _, sj := range netJSON { 1027 subnetIPstr := sj.SubnetIP 1028 gwIPstr := sj.GwIP 1029 vni := sj.Vni 1030 1031 subnetIP, _ := types.ParseCIDR(subnetIPstr) 1032 gwIP, _ := types.ParseCIDR(gwIPstr) 1033 1034 if newNet { 1035 s := &subnet{ 1036 subnetIP: subnetIP, 1037 gwIP: gwIP, 1038 vni: vni, 1039 } 1040 n.subnets = append(n.subnets, s) 1041 } else { 1042 sNet := n.getMatchingSubnet(subnetIP) 1043 if sNet != nil { 1044 sNet.vni = vni 1045 } 1046 } 1047 } 1048 return nil 1049 } 1050 1051 func (n *network) DataScope() string { 1052 return datastore.GlobalScope 1053 } 1054 1055 func (n *network) writeToStore() error { 1056 if n.driver.store == nil { 1057 return nil 1058 } 1059 1060 return n.driver.store.PutObjectAtomic(n) 1061 } 1062 1063 func (n *network) releaseVxlanID() ([]uint32, error) { 1064 n.Lock() 1065 nSubnets := len(n.subnets) 1066 n.Unlock() 1067 if nSubnets == 0 { 1068 return nil, nil 1069 } 1070 1071 if n.driver.store != nil { 1072 if err := n.driver.store.DeleteObjectAtomic(n); err != nil { 1073 if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound { 1074 // In both the above cases we can safely assume that the key has been removed by some other 1075 // instance and so simply get out of here 1076 return nil, nil 1077 } 1078 1079 return nil, fmt.Errorf("failed to delete network to vxlan id map: %v", err) 1080 } 1081 } 1082 var vnis []uint32 1083 n.Lock() 1084 for _, s := range n.subnets { 1085 if n.driver.vxlanIdm != nil { 1086 vnis = append(vnis, s.vni) 1087 } 1088 s.vni = 0 1089 } 1090 n.Unlock() 1091 1092 for _, vni := range vnis { 1093 n.driver.vxlanIdm.Release(uint64(vni)) 1094 } 1095 1096 return vnis, nil 1097 } 1098 1099 func (n *network) obtainVxlanID(s *subnet) error { 1100 // return if the subnet already has a vxlan id assigned 1101 if n.vxlanID(s) != 0 { 1102 return nil 1103 } 1104 1105 if n.driver.store == nil { 1106 return fmt.Errorf("no valid vxlan id and no datastore configured, cannot obtain vxlan id") 1107 } 1108 1109 for { 1110 if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil { 1111 return fmt.Errorf("getting network %q from datastore failed %v", n.id, err) 1112 } 1113 1114 if n.vxlanID(s) == 0 { 1115 vxlanID, err := n.driver.vxlanIdm.GetID(true) 1116 if err != nil { 1117 return fmt.Errorf("failed to allocate vxlan id: %v", err) 1118 } 1119 1120 n.setVxlanID(s, uint32(vxlanID)) 1121 if err := n.writeToStore(); err != nil { 1122 n.driver.vxlanIdm.Release(uint64(n.vxlanID(s))) 1123 n.setVxlanID(s, 0) 1124 if err == datastore.ErrKeyModified { 1125 continue 1126 } 1127 return fmt.Errorf("network %q failed to update data store: %v", n.id, err) 1128 } 1129 return nil 1130 } 1131 return nil 1132 } 1133 } 1134 1135 // contains return true if the passed ip belongs to one the network's 1136 // subnets 1137 func (n *network) contains(ip net.IP) bool { 1138 for _, s := range n.subnets { 1139 if s.subnetIP.Contains(ip) { 1140 return true 1141 } 1142 } 1143 1144 return false 1145 } 1146 1147 // getSubnetforIP returns the subnet to which the given IP belongs 1148 func (n *network) getSubnetforIP(ip *net.IPNet) *subnet { 1149 for _, s := range n.subnets { 1150 // first check if the mask lengths are the same 1151 i, _ := s.subnetIP.Mask.Size() 1152 j, _ := ip.Mask.Size() 1153 if i != j { 1154 continue 1155 } 1156 if s.subnetIP.Contains(ip.IP) { 1157 return s 1158 } 1159 } 1160 return nil 1161 } 1162 1163 // getMatchingSubnet return the network's subnet that matches the input 1164 func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet { 1165 if ip == nil { 1166 return nil 1167 } 1168 for _, s := range n.subnets { 1169 // first check if the mask lengths are the same 1170 i, _ := s.subnetIP.Mask.Size() 1171 j, _ := ip.Mask.Size() 1172 if i != j { 1173 continue 1174 } 1175 if s.subnetIP.IP.Equal(ip.IP) { 1176 return s 1177 } 1178 } 1179 return nil 1180 }