github.com/docker/engine@v22.0.0-20211208180946-d456264580cf+incompatible/libnetwork/drivers/overlay/ov_network.go (about) 1 //go:build linux 2 // +build linux 3 4 package overlay 5 6 import ( 7 "encoding/json" 8 "fmt" 9 "net" 10 "os" 11 "os/exec" 12 "path/filepath" 13 "runtime" 14 "strconv" 15 "strings" 16 "sync" 17 18 "github.com/docker/docker/libnetwork/datastore" 19 "github.com/docker/docker/libnetwork/driverapi" 20 "github.com/docker/docker/libnetwork/netlabel" 21 "github.com/docker/docker/libnetwork/netutils" 22 "github.com/docker/docker/libnetwork/ns" 23 "github.com/docker/docker/libnetwork/osl" 24 "github.com/docker/docker/libnetwork/resolvconf" 25 "github.com/docker/docker/libnetwork/types" 26 "github.com/docker/docker/pkg/reexec" 27 "github.com/sirupsen/logrus" 28 "github.com/vishvananda/netlink" 29 "github.com/vishvananda/netlink/nl" 30 "github.com/vishvananda/netns" 31 "golang.org/x/sys/unix" 32 ) 33 34 var ( 35 hostMode bool 36 networkOnce sync.Once 37 networkMu sync.Mutex 38 vniTbl = make(map[uint32]string) 39 ) 40 41 type networkTable map[string]*network 42 43 type subnet struct { 44 sboxInit bool 45 vxlanName string 46 brName string 47 vni uint32 48 initErr error 49 subnetIP *net.IPNet 50 gwIP *net.IPNet 51 } 52 53 type subnetJSON struct { 54 SubnetIP string 55 GwIP string 56 Vni uint32 57 } 58 59 type network struct { 60 id string 61 dbIndex uint64 62 dbExists bool 63 sbox osl.Sandbox 64 nlSocket *nl.NetlinkSocket 65 endpoints endpointTable 66 driver *driver 67 joinCnt int 68 sboxInit bool 69 initEpoch int 70 initErr error 71 subnets []*subnet 72 secure bool 73 mtu int 74 sync.Mutex 75 } 76 77 func init() { 78 reexec.Register("set-default-vlan", setDefaultVlan) 79 } 80 81 func setDefaultVlan() { 82 if len(os.Args) < 3 { 83 logrus.Error("insufficient number of arguments") 84 os.Exit(1) 85 } 86 87 runtime.LockOSThread() 88 defer runtime.UnlockOSThread() 89 90 nsPath := os.Args[1] 91 ns, err := netns.GetFromPath(nsPath) 92 if err != nil { 93 logrus.Errorf("overlay namespace get failed, %v", err) 94 os.Exit(1) 95 } 96 if err = netns.Set(ns); err != nil { 97 logrus.Errorf("setting into overlay namespace failed, %v", err) 98 os.Exit(1) 99 } 100 101 // make sure the sysfs mount doesn't propagate back 102 if err = unix.Unshare(unix.CLONE_NEWNS); err != nil { 103 logrus.Errorf("unshare failed, %v", err) 104 os.Exit(1) 105 } 106 107 flag := unix.MS_PRIVATE | unix.MS_REC 108 if err = unix.Mount("", "/", "", uintptr(flag), ""); err != nil { 109 logrus.Errorf("root mount failed, %v", err) 110 os.Exit(1) 111 } 112 113 if err = unix.Mount("sysfs", "/sys", "sysfs", 0, ""); err != nil { 114 logrus.Errorf("mounting sysfs failed, %v", err) 115 os.Exit(1) 116 } 117 118 brName := os.Args[2] 119 path := filepath.Join("/sys/class/net", brName, "bridge/default_pvid") 120 data := []byte{'0', '\n'} 121 122 if err = os.WriteFile(path, data, 0644); err != nil { 123 logrus.Errorf("enabling default vlan on bridge %s failed %v", brName, err) 124 os.Exit(1) 125 } 126 os.Exit(0) 127 } 128 129 func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) { 130 return nil, types.NotImplementedErrorf("not implemented") 131 } 132 133 func (d *driver) NetworkFree(id string) error { 134 return types.NotImplementedErrorf("not implemented") 135 } 136 137 func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error { 138 if id == "" { 139 return fmt.Errorf("invalid network id") 140 } 141 if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" { 142 return types.BadRequestErrorf("ipv4 pool is empty") 143 } 144 145 // Since we perform lazy configuration make sure we try 146 // configuring the driver when we enter CreateNetwork 147 if err := d.configure(); err != nil { 148 return err 149 } 150 151 n := &network{ 152 id: id, 153 driver: d, 154 endpoints: endpointTable{}, 155 subnets: []*subnet{}, 156 } 157 158 vnis := make([]uint32, 0, len(ipV4Data)) 159 if gval, ok := option[netlabel.GenericData]; ok { 160 optMap := gval.(map[string]string) 161 if val, ok := optMap[netlabel.OverlayVxlanIDList]; ok { 162 logrus.Debugf("overlay: Received vxlan IDs: %s", val) 163 vniStrings := strings.Split(val, ",") 164 for _, vniStr := range vniStrings { 165 vni, err := strconv.Atoi(vniStr) 166 if err != nil { 167 return fmt.Errorf("invalid vxlan id value %q passed", vniStr) 168 } 169 170 vnis = append(vnis, uint32(vni)) 171 } 172 } 173 if _, ok := optMap[secureOption]; ok { 174 n.secure = true 175 } 176 if val, ok := optMap[netlabel.DriverMTU]; ok { 177 var err error 178 if n.mtu, err = strconv.Atoi(val); err != nil { 179 return fmt.Errorf("failed to parse %v: %v", val, err) 180 } 181 if n.mtu < 0 { 182 return fmt.Errorf("invalid MTU value: %v", n.mtu) 183 } 184 } 185 } 186 187 // If we are getting vnis from libnetwork, either we get for 188 // all subnets or none. 189 if len(vnis) != 0 && len(vnis) < len(ipV4Data) { 190 return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis)) 191 } 192 193 for i, ipd := range ipV4Data { 194 s := &subnet{ 195 subnetIP: ipd.Pool, 196 gwIP: ipd.Gateway, 197 } 198 199 if len(vnis) != 0 { 200 s.vni = vnis[i] 201 } 202 203 n.subnets = append(n.subnets, s) 204 } 205 206 d.Lock() 207 defer d.Unlock() 208 if d.networks[n.id] != nil { 209 return fmt.Errorf("attempt to create overlay network %v that already exists", n.id) 210 } 211 212 if err := n.writeToStore(); err != nil { 213 return fmt.Errorf("failed to update data store for network %v: %v", n.id, err) 214 } 215 216 // Make sure no rule is on the way from any stale secure network 217 if !n.secure { 218 for _, vni := range vnis { 219 programMangle(vni, false) 220 programInput(vni, false) 221 } 222 } 223 224 if nInfo != nil { 225 if err := nInfo.TableEventRegister(ovPeerTable, driverapi.EndpointObject); err != nil { 226 // XXX Undo writeToStore? No method to so. Why? 227 return err 228 } 229 } 230 231 d.networks[id] = n 232 233 return nil 234 } 235 236 func (d *driver) DeleteNetwork(nid string) error { 237 if nid == "" { 238 return fmt.Errorf("invalid network id") 239 } 240 241 // Make sure driver resources are initialized before proceeding 242 if err := d.configure(); err != nil { 243 return err 244 } 245 246 d.Lock() 247 // Only perform a peer flush operation (if required) AFTER unlocking 248 // the driver lock to avoid deadlocking w/ the peerDB. 249 var doPeerFlush bool 250 defer func() { 251 d.Unlock() 252 if doPeerFlush { 253 d.peerFlush(nid) 254 } 255 }() 256 257 // This is similar to d.network(), but we need to keep holding the lock 258 // until we are done removing this network. 259 n, ok := d.networks[nid] 260 if !ok { 261 n = d.restoreNetworkFromStore(nid) 262 } 263 if n == nil { 264 return fmt.Errorf("could not find network with id %s", nid) 265 } 266 267 for _, ep := range n.endpoints { 268 if ep.ifName != "" { 269 if link, err := ns.NlHandle().LinkByName(ep.ifName); err == nil { 270 if err := ns.NlHandle().LinkDel(link); err != nil { 271 logrus.WithError(err).Warnf("Failed to delete interface (%s)'s link on endpoint (%s) delete", ep.ifName, ep.id) 272 } 273 } 274 } 275 276 if err := d.deleteEndpointFromStore(ep); err != nil { 277 logrus.Warnf("Failed to delete overlay endpoint %.7s from local store: %v", ep.id, err) 278 } 279 } 280 281 doPeerFlush = true 282 delete(d.networks, nid) 283 284 vnis, err := n.releaseVxlanID() 285 if err != nil { 286 return err 287 } 288 289 if n.secure { 290 for _, vni := range vnis { 291 programMangle(vni, false) 292 programInput(vni, false) 293 } 294 } 295 296 return nil 297 } 298 299 func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error { 300 return nil 301 } 302 303 func (d *driver) RevokeExternalConnectivity(nid, eid string) error { 304 return nil 305 } 306 307 func (n *network) joinSandbox(s *subnet, restore bool, incJoinCount bool) error { 308 // If there is a race between two go routines here only one will win 309 // the other will wait. 310 networkOnce.Do(networkOnceInit) 311 312 n.Lock() 313 // If non-restore initialization occurred and was successful then 314 // tell the peerDB to initialize the sandbox with all the peers 315 // previously received from networkdb. But only do this after 316 // unlocking the network. Otherwise we could deadlock with 317 // on the peerDB channel while peerDB is waiting for the network lock. 318 var doInitPeerDB bool 319 defer func() { 320 n.Unlock() 321 if doInitPeerDB { 322 n.driver.initSandboxPeerDB(n.id) 323 } 324 }() 325 326 if !n.sboxInit { 327 n.initErr = n.initSandbox(restore) 328 doInitPeerDB = n.initErr == nil && !restore 329 // If there was an error, we cannot recover it 330 n.sboxInit = true 331 } 332 333 if n.initErr != nil { 334 return fmt.Errorf("network sandbox join failed: %v", n.initErr) 335 } 336 337 subnetErr := s.initErr 338 if !s.sboxInit { 339 subnetErr = n.initSubnetSandbox(s, restore) 340 // We can recover from these errors, but not on restore 341 if restore || subnetErr == nil { 342 s.initErr = subnetErr 343 s.sboxInit = true 344 } 345 } 346 if subnetErr != nil { 347 return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), subnetErr) 348 } 349 350 if incJoinCount { 351 n.joinCnt++ 352 } 353 354 return nil 355 } 356 357 func (n *network) leaveSandbox() { 358 n.Lock() 359 defer n.Unlock() 360 n.joinCnt-- 361 if n.joinCnt != 0 { 362 return 363 } 364 365 n.destroySandbox() 366 367 n.sboxInit = false 368 n.initErr = nil 369 for _, s := range n.subnets { 370 s.sboxInit = false 371 s.initErr = nil 372 } 373 } 374 375 // to be called while holding network lock 376 func (n *network) destroySandbox() { 377 if n.sbox != nil { 378 for _, iface := range n.sbox.Info().Interfaces() { 379 if err := iface.Remove(); err != nil { 380 logrus.Debugf("Remove interface %s failed: %v", iface.SrcName(), err) 381 } 382 } 383 384 for _, s := range n.subnets { 385 if hostMode { 386 if err := removeFilters(n.id[:12], s.brName); err != nil { 387 logrus.Warnf("Could not remove overlay filters: %v", err) 388 } 389 } 390 391 if s.vxlanName != "" { 392 err := deleteInterface(s.vxlanName) 393 if err != nil { 394 logrus.Warnf("could not cleanup sandbox properly: %v", err) 395 } 396 } 397 } 398 399 if hostMode { 400 if err := removeNetworkChain(n.id[:12]); err != nil { 401 logrus.Warnf("could not remove network chain: %v", err) 402 } 403 } 404 405 // Close the netlink socket, this will also release the watchMiss goroutine that is using it 406 if n.nlSocket != nil { 407 n.nlSocket.Close() 408 n.nlSocket = nil 409 } 410 411 n.sbox.Destroy() 412 n.sbox = nil 413 } 414 } 415 416 func populateVNITbl() { 417 filepath.Walk(filepath.Dir(osl.GenerateKey("walk")), 418 // NOTE(cpuguy83): The linter picked up on the fact that this walk function was not using this error argument 419 // That seems wrong... however I'm not familiar with this code or if that error matters 420 func(path string, info os.FileInfo, _ error) error { 421 _, fname := filepath.Split(path) 422 423 if len(strings.Split(fname, "-")) <= 1 { 424 return nil 425 } 426 427 ns, err := netns.GetFromPath(path) 428 if err != nil { 429 logrus.Errorf("Could not open namespace path %s during vni population: %v", path, err) 430 return nil 431 } 432 defer ns.Close() 433 434 nlh, err := netlink.NewHandleAt(ns, unix.NETLINK_ROUTE) 435 if err != nil { 436 logrus.Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err) 437 return nil 438 } 439 defer nlh.Delete() 440 441 err = nlh.SetSocketTimeout(soTimeout) 442 if err != nil { 443 logrus.Warnf("Failed to set the timeout on the netlink handle sockets for vni table population: %v", err) 444 } 445 446 links, err := nlh.LinkList() 447 if err != nil { 448 logrus.Errorf("Failed to list interfaces during vni population for ns %s: %v", path, err) 449 return nil 450 } 451 452 for _, l := range links { 453 if l.Type() == "vxlan" { 454 vniTbl[uint32(l.(*netlink.Vxlan).VxlanId)] = path 455 } 456 } 457 458 return nil 459 }) 460 } 461 462 func networkOnceInit() { 463 populateVNITbl() 464 465 if os.Getenv("_OVERLAY_HOST_MODE") != "" { 466 hostMode = true 467 return 468 } 469 470 err := createVxlan("testvxlan", 1, 0) 471 if err != nil { 472 logrus.Errorf("Failed to create testvxlan interface: %v", err) 473 return 474 } 475 476 defer deleteInterface("testvxlan") 477 478 path := "/proc/self/ns/net" 479 hNs, err := netns.GetFromPath(path) 480 if err != nil { 481 logrus.Errorf("Failed to get network namespace from path %s while setting host mode: %v", path, err) 482 return 483 } 484 defer hNs.Close() 485 486 nlh := ns.NlHandle() 487 488 iface, err := nlh.LinkByName("testvxlan") 489 if err != nil { 490 logrus.Errorf("Failed to get link testvxlan while setting host mode: %v", err) 491 return 492 } 493 494 // If we are not able to move the vxlan interface to a namespace 495 // then fallback to host mode 496 if err := nlh.LinkSetNsFd(iface, int(hNs)); err != nil { 497 hostMode = true 498 } 499 } 500 501 func (n *network) generateVxlanName(s *subnet) string { 502 id := n.id 503 if len(n.id) > 5 { 504 id = n.id[:5] 505 } 506 507 return fmt.Sprintf("vx-%06x-%v", s.vni, id) 508 } 509 510 func (n *network) generateBridgeName(s *subnet) string { 511 id := n.id 512 if len(n.id) > 5 { 513 id = n.id[:5] 514 } 515 516 return n.getBridgeNamePrefix(s) + "-" + id 517 } 518 519 func (n *network) getBridgeNamePrefix(s *subnet) string { 520 return fmt.Sprintf("ov-%06x", s.vni) 521 } 522 523 func checkOverlap(nw *net.IPNet) error { 524 var nameservers []string 525 526 if rc, err := resolvconf.Get(); err == nil { 527 nameservers = resolvconf.GetNameserversAsCIDR(rc.Content) 528 } 529 530 if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil { 531 return fmt.Errorf("overlay subnet %s failed check with nameserver: %v: %v", nw.String(), nameservers, err) 532 } 533 534 if err := netutils.CheckRouteOverlaps(nw); err != nil { 535 return fmt.Errorf("overlay subnet %s failed check with host route table: %v", nw.String(), err) 536 } 537 538 return nil 539 } 540 541 func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) error { 542 sbox := n.sbox 543 544 // restore overlay osl sandbox 545 Ifaces := make(map[string][]osl.IfaceOption) 546 brIfaceOption := make([]osl.IfaceOption, 2) 547 brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Address(s.gwIP)) 548 brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Bridge(true)) 549 Ifaces[brName+"+br"] = brIfaceOption 550 551 err := sbox.Restore(Ifaces, nil, nil, nil) 552 if err != nil { 553 return err 554 } 555 556 Ifaces = make(map[string][]osl.IfaceOption) 557 vxlanIfaceOption := make([]osl.IfaceOption, 1) 558 vxlanIfaceOption = append(vxlanIfaceOption, sbox.InterfaceOptions().Master(brName)) 559 Ifaces[vxlanName+"+vxlan"] = vxlanIfaceOption 560 return sbox.Restore(Ifaces, nil, nil, nil) 561 } 562 563 func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error { 564 565 if hostMode { 566 // Try to delete stale bridge interface if it exists 567 if err := deleteInterface(brName); err != nil { 568 deleteInterfaceBySubnet(n.getBridgeNamePrefix(s), s) 569 } 570 // Try to delete the vxlan interface by vni if already present 571 deleteVxlanByVNI("", s.vni) 572 573 if err := checkOverlap(s.subnetIP); err != nil { 574 return err 575 } 576 } 577 578 if !hostMode { 579 // Try to find this subnet's vni is being used in some 580 // other namespace by looking at vniTbl that we just 581 // populated in the once init. If a hit is found then 582 // it must a stale namespace from previous 583 // life. Destroy it completely and reclaim resourced. 584 networkMu.Lock() 585 path, ok := vniTbl[s.vni] 586 networkMu.Unlock() 587 588 if ok { 589 deleteVxlanByVNI(path, s.vni) 590 if err := unix.Unmount(path, unix.MNT_FORCE); err != nil { 591 logrus.Errorf("unmount of %s failed: %v", path, err) 592 } 593 os.Remove(path) 594 595 networkMu.Lock() 596 delete(vniTbl, s.vni) 597 networkMu.Unlock() 598 } 599 } 600 601 // create a bridge and vxlan device for this subnet and move it to the sandbox 602 sbox := n.sbox 603 604 if err := sbox.AddInterface(brName, "br", 605 sbox.InterfaceOptions().Address(s.gwIP), 606 sbox.InterfaceOptions().Bridge(true)); err != nil { 607 return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err) 608 } 609 610 err := createVxlan(vxlanName, s.vni, n.maxMTU()) 611 if err != nil { 612 return err 613 } 614 615 if err := sbox.AddInterface(vxlanName, "vxlan", 616 sbox.InterfaceOptions().Master(brName)); err != nil { 617 // If adding vxlan device to the overlay namespace fails, remove the bridge interface we 618 // already added to the namespace. This allows the caller to try the setup again. 619 for _, iface := range sbox.Info().Interfaces() { 620 if iface.SrcName() == brName { 621 if ierr := iface.Remove(); ierr != nil { 622 logrus.Errorf("removing bridge failed from ov ns %v failed, %v", n.sbox.Key(), ierr) 623 } 624 } 625 } 626 627 // Also, delete the vxlan interface. Since a global vni id is associated 628 // with the vxlan interface, an orphaned vxlan interface will result in 629 // failure of vxlan device creation if the vni is assigned to some other 630 // network. 631 if deleteErr := deleteInterface(vxlanName); deleteErr != nil { 632 logrus.Warnf("could not delete vxlan interface, %s, error %v, after config error, %v", vxlanName, deleteErr, err) 633 } 634 return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err) 635 } 636 637 if !hostMode { 638 var name string 639 for _, i := range sbox.Info().Interfaces() { 640 if i.Bridge() { 641 name = i.DstName() 642 } 643 } 644 cmd := &exec.Cmd{ 645 Path: reexec.Self(), 646 Args: []string{"set-default-vlan", sbox.Key(), name}, 647 Stdout: os.Stdout, 648 Stderr: os.Stderr, 649 } 650 if err := cmd.Run(); err != nil { 651 // not a fatal error 652 logrus.Errorf("reexec to set bridge default vlan failed %v", err) 653 } 654 } 655 656 if hostMode { 657 if err := addFilters(n.id[:12], brName); err != nil { 658 return err 659 } 660 } 661 662 return nil 663 } 664 665 // Must be called with the network lock 666 func (n *network) initSubnetSandbox(s *subnet, restore bool) error { 667 brName := n.generateBridgeName(s) 668 vxlanName := n.generateVxlanName(s) 669 670 if restore { 671 if err := n.restoreSubnetSandbox(s, brName, vxlanName); err != nil { 672 return err 673 } 674 } else { 675 if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil { 676 return err 677 } 678 } 679 680 s.vxlanName = vxlanName 681 s.brName = brName 682 683 return nil 684 } 685 686 func (n *network) cleanupStaleSandboxes() { 687 filepath.Walk(filepath.Dir(osl.GenerateKey("walk")), 688 func(path string, info os.FileInfo, err error) error { 689 _, fname := filepath.Split(path) 690 691 pList := strings.Split(fname, "-") 692 if len(pList) <= 1 { 693 return nil 694 } 695 696 pattern := pList[1] 697 if strings.Contains(n.id, pattern) { 698 // Delete all vnis 699 deleteVxlanByVNI(path, 0) 700 unix.Unmount(path, unix.MNT_DETACH) 701 os.Remove(path) 702 703 // Now that we have destroyed this 704 // sandbox, remove all references to 705 // it in vniTbl so that we don't 706 // inadvertently destroy the sandbox 707 // created in this life. 708 networkMu.Lock() 709 for vni, tblPath := range vniTbl { 710 if tblPath == path { 711 delete(vniTbl, vni) 712 } 713 } 714 networkMu.Unlock() 715 } 716 717 return nil 718 }) 719 } 720 721 func (n *network) initSandbox(restore bool) error { 722 n.initEpoch++ 723 724 if !restore { 725 if hostMode { 726 if err := addNetworkChain(n.id[:12]); err != nil { 727 return err 728 } 729 } 730 731 // If there are any stale sandboxes related to this network 732 // from previous daemon life clean it up here 733 n.cleanupStaleSandboxes() 734 } 735 736 // In the restore case network sandbox already exist; but we don't know 737 // what epoch number it was created with. It has to be retrieved by 738 // searching the net namespaces. 739 var key string 740 if restore { 741 key = osl.GenerateKey("-" + n.id) 742 } else { 743 key = osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id) 744 } 745 746 sbox, err := osl.NewSandbox(key, !hostMode, restore) 747 if err != nil { 748 return fmt.Errorf("could not get network sandbox (oper %t): %v", restore, err) 749 } 750 751 // this is needed to let the peerAdd configure the sandbox 752 n.sbox = sbox 753 754 // If we are in swarm mode, we don't need anymore the watchMiss routine. 755 // This will save 1 thread and 1 netlink socket per network 756 if !n.driver.isSerfAlive() { 757 return nil 758 } 759 760 var nlSock *nl.NetlinkSocket 761 sbox.InvokeFunc(func() { 762 nlSock, err = nl.Subscribe(unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH) 763 if err != nil { 764 return 765 } 766 // set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed 767 tv := unix.NsecToTimeval(soTimeout.Nanoseconds()) 768 err = nlSock.SetReceiveTimeout(&tv) 769 }) 770 n.nlSocket = nlSock 771 772 if err == nil { 773 go n.watchMiss(nlSock, key) 774 } else { 775 logrus.Errorf("failed to subscribe to neighbor group netlink messages for overlay network %s in sbox %s: %v", 776 n.id, sbox.Key(), err) 777 } 778 779 return nil 780 } 781 782 func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) { 783 // With the new version of the netlink library the deserialize function makes 784 // requests about the interface of the netlink message. This can succeed only 785 // if this go routine is in the target namespace. For this reason following we 786 // lock the thread on that namespace 787 runtime.LockOSThread() 788 defer runtime.UnlockOSThread() 789 newNs, err := netns.GetFromPath(nsPath) 790 if err != nil { 791 logrus.WithError(err).Errorf("failed to get the namespace %s", nsPath) 792 return 793 } 794 defer newNs.Close() 795 if err = netns.Set(newNs); err != nil { 796 logrus.WithError(err).Errorf("failed to enter the namespace %s", nsPath) 797 return 798 } 799 for { 800 msgs, _, err := nlSock.Receive() 801 if err != nil { 802 n.Lock() 803 nlFd := nlSock.GetFd() 804 n.Unlock() 805 if nlFd == -1 { 806 // The netlink socket got closed, simply exit to not leak this goroutine 807 return 808 } 809 // When the receive timeout expires the receive will return EAGAIN 810 if err == unix.EAGAIN { 811 // we continue here to avoid spam for timeouts 812 continue 813 } 814 logrus.Errorf("Failed to receive from netlink: %v ", err) 815 continue 816 } 817 818 for _, msg := range msgs { 819 if msg.Header.Type != unix.RTM_GETNEIGH && msg.Header.Type != unix.RTM_NEWNEIGH { 820 continue 821 } 822 823 neigh, err := netlink.NeighDeserialize(msg.Data) 824 if err != nil { 825 logrus.Errorf("Failed to deserialize netlink ndmsg: %v", err) 826 continue 827 } 828 829 var ( 830 ip net.IP 831 mac net.HardwareAddr 832 l2Miss, l3Miss bool 833 ) 834 if neigh.IP.To4() != nil { 835 ip = neigh.IP 836 l3Miss = true 837 } else if neigh.HardwareAddr != nil { 838 mac = []byte(neigh.HardwareAddr) 839 ip = net.IP(mac[2:]) 840 l2Miss = true 841 } else { 842 continue 843 } 844 845 // Not any of the network's subnets. Ignore. 846 if !n.contains(ip) { 847 continue 848 } 849 850 if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 { 851 continue 852 } 853 854 logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac) 855 mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip) 856 if err != nil { 857 logrus.Errorf("could not resolve peer %q: %v", ip, err) 858 continue 859 } 860 n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false) 861 } 862 } 863 } 864 865 // Restore a network from the store to the driver if it is present. 866 // Must be called with the driver locked! 867 func (d *driver) restoreNetworkFromStore(nid string) *network { 868 n := d.getNetworkFromStore(nid) 869 if n != nil { 870 n.driver = d 871 n.endpoints = endpointTable{} 872 d.networks[nid] = n 873 } 874 return n 875 } 876 877 func (d *driver) network(nid string) *network { 878 d.Lock() 879 n, ok := d.networks[nid] 880 if !ok { 881 n = d.restoreNetworkFromStore(nid) 882 } 883 d.Unlock() 884 885 return n 886 } 887 888 func (d *driver) getNetworkFromStore(nid string) *network { 889 if d.store == nil { 890 return nil 891 } 892 893 n := &network{id: nid} 894 if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil { 895 return nil 896 } 897 898 return n 899 } 900 901 func (n *network) sandbox() osl.Sandbox { 902 n.Lock() 903 defer n.Unlock() 904 return n.sbox 905 } 906 907 func (n *network) vxlanID(s *subnet) uint32 { 908 n.Lock() 909 defer n.Unlock() 910 return s.vni 911 } 912 913 func (n *network) setVxlanID(s *subnet, vni uint32) { 914 n.Lock() 915 s.vni = vni 916 n.Unlock() 917 } 918 919 func (n *network) Key() []string { 920 return []string{"overlay", "network", n.id} 921 } 922 923 func (n *network) KeyPrefix() []string { 924 return []string{"overlay", "network"} 925 } 926 927 func (n *network) Value() []byte { 928 m := map[string]interface{}{} 929 930 netJSON := []*subnetJSON{} 931 932 for _, s := range n.subnets { 933 sj := &subnetJSON{ 934 SubnetIP: s.subnetIP.String(), 935 GwIP: s.gwIP.String(), 936 Vni: s.vni, 937 } 938 netJSON = append(netJSON, sj) 939 } 940 941 m["secure"] = n.secure 942 m["subnets"] = netJSON 943 m["mtu"] = n.mtu 944 b, err := json.Marshal(m) 945 if err != nil { 946 return []byte{} 947 } 948 949 return b 950 } 951 952 func (n *network) Index() uint64 { 953 return n.dbIndex 954 } 955 956 func (n *network) SetIndex(index uint64) { 957 n.dbIndex = index 958 n.dbExists = true 959 } 960 961 func (n *network) Exists() bool { 962 return n.dbExists 963 } 964 965 func (n *network) Skip() bool { 966 return false 967 } 968 969 func (n *network) SetValue(value []byte) error { 970 var ( 971 m map[string]interface{} 972 newNet bool 973 isMap = true 974 netJSON = []*subnetJSON{} 975 ) 976 977 if err := json.Unmarshal(value, &m); err != nil { 978 err := json.Unmarshal(value, &netJSON) 979 if err != nil { 980 return err 981 } 982 isMap = false 983 } 984 985 if len(n.subnets) == 0 { 986 newNet = true 987 } 988 989 if isMap { 990 if val, ok := m["secure"]; ok { 991 n.secure = val.(bool) 992 } 993 if val, ok := m["mtu"]; ok { 994 n.mtu = int(val.(float64)) 995 } 996 bytes, err := json.Marshal(m["subnets"]) 997 if err != nil { 998 return err 999 } 1000 if err := json.Unmarshal(bytes, &netJSON); err != nil { 1001 return err 1002 } 1003 } 1004 1005 for _, sj := range netJSON { 1006 subnetIPstr := sj.SubnetIP 1007 gwIPstr := sj.GwIP 1008 vni := sj.Vni 1009 1010 subnetIP, _ := types.ParseCIDR(subnetIPstr) 1011 gwIP, _ := types.ParseCIDR(gwIPstr) 1012 1013 if newNet { 1014 s := &subnet{ 1015 subnetIP: subnetIP, 1016 gwIP: gwIP, 1017 vni: vni, 1018 } 1019 n.subnets = append(n.subnets, s) 1020 } else { 1021 sNet := n.getMatchingSubnet(subnetIP) 1022 if sNet != nil { 1023 sNet.vni = vni 1024 } 1025 } 1026 } 1027 return nil 1028 } 1029 1030 func (n *network) DataScope() string { 1031 return datastore.GlobalScope 1032 } 1033 1034 func (n *network) writeToStore() error { 1035 if n.driver.store == nil { 1036 return nil 1037 } 1038 1039 return n.driver.store.PutObjectAtomic(n) 1040 } 1041 1042 func (n *network) releaseVxlanID() ([]uint32, error) { 1043 n.Lock() 1044 nSubnets := len(n.subnets) 1045 n.Unlock() 1046 if nSubnets == 0 { 1047 return nil, nil 1048 } 1049 1050 if n.driver.store != nil { 1051 if err := n.driver.store.DeleteObjectAtomic(n); err != nil { 1052 if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound { 1053 // In both the above cases we can safely assume that the key has been removed by some other 1054 // instance and so simply get out of here 1055 return nil, nil 1056 } 1057 1058 return nil, fmt.Errorf("failed to delete network to vxlan id map: %v", err) 1059 } 1060 } 1061 var vnis []uint32 1062 n.Lock() 1063 for _, s := range n.subnets { 1064 if n.driver.vxlanIdm != nil { 1065 vnis = append(vnis, s.vni) 1066 } 1067 s.vni = 0 1068 } 1069 n.Unlock() 1070 1071 for _, vni := range vnis { 1072 n.driver.vxlanIdm.Release(uint64(vni)) 1073 } 1074 1075 return vnis, nil 1076 } 1077 1078 func (n *network) obtainVxlanID(s *subnet) error { 1079 //return if the subnet already has a vxlan id assigned 1080 if n.vxlanID(s) != 0 { 1081 return nil 1082 } 1083 1084 if n.driver.store == nil { 1085 return fmt.Errorf("no valid vxlan id and no datastore configured, cannot obtain vxlan id") 1086 } 1087 1088 for { 1089 if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil { 1090 return fmt.Errorf("getting network %q from datastore failed %v", n.id, err) 1091 } 1092 1093 if n.vxlanID(s) == 0 { 1094 vxlanID, err := n.driver.vxlanIdm.GetID(true) 1095 if err != nil { 1096 return fmt.Errorf("failed to allocate vxlan id: %v", err) 1097 } 1098 1099 n.setVxlanID(s, uint32(vxlanID)) 1100 if err := n.writeToStore(); err != nil { 1101 n.driver.vxlanIdm.Release(uint64(n.vxlanID(s))) 1102 n.setVxlanID(s, 0) 1103 if err == datastore.ErrKeyModified { 1104 continue 1105 } 1106 return fmt.Errorf("network %q failed to update data store: %v", n.id, err) 1107 } 1108 return nil 1109 } 1110 return nil 1111 } 1112 } 1113 1114 // contains return true if the passed ip belongs to one the network's 1115 // subnets 1116 func (n *network) contains(ip net.IP) bool { 1117 for _, s := range n.subnets { 1118 if s.subnetIP.Contains(ip) { 1119 return true 1120 } 1121 } 1122 1123 return false 1124 } 1125 1126 // getSubnetforIP returns the subnet to which the given IP belongs 1127 func (n *network) getSubnetforIP(ip *net.IPNet) *subnet { 1128 for _, s := range n.subnets { 1129 // first check if the mask lengths are the same 1130 i, _ := s.subnetIP.Mask.Size() 1131 j, _ := ip.Mask.Size() 1132 if i != j { 1133 continue 1134 } 1135 if s.subnetIP.Contains(ip.IP) { 1136 return s 1137 } 1138 } 1139 return nil 1140 } 1141 1142 // getMatchingSubnet return the network's subnet that matches the input 1143 func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet { 1144 if ip == nil { 1145 return nil 1146 } 1147 for _, s := range n.subnets { 1148 // first check if the mask lengths are the same 1149 i, _ := s.subnetIP.Mask.Size() 1150 j, _ := ip.Mask.Size() 1151 if i != j { 1152 continue 1153 } 1154 if s.subnetIP.IP.Equal(ip.IP) { 1155 return s 1156 } 1157 } 1158 return nil 1159 }