github.com/pwn-term/docker@v0.0.0-20210616085119-6e977cce2565/libnetwork/drivers/overlay/ov_network.go (about) 1 package overlay 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io/ioutil" 7 "net" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "runtime" 12 "strconv" 13 "strings" 14 "sync" 15 16 "github.com/docker/docker/pkg/reexec" 17 "github.com/docker/libnetwork/datastore" 18 "github.com/docker/libnetwork/driverapi" 19 "github.com/docker/libnetwork/netlabel" 20 "github.com/docker/libnetwork/netutils" 21 "github.com/docker/libnetwork/ns" 22 "github.com/docker/libnetwork/osl" 23 "github.com/docker/libnetwork/resolvconf" 24 "github.com/docker/libnetwork/types" 25 "github.com/sirupsen/logrus" 26 "github.com/vishvananda/netlink" 27 "github.com/vishvananda/netlink/nl" 28 "github.com/vishvananda/netns" 29 "golang.org/x/sys/unix" 30 ) 31 32 var ( 33 hostMode bool 34 networkOnce sync.Once 35 networkMu sync.Mutex 36 vniTbl = make(map[uint32]string) 37 ) 38 39 type networkTable map[string]*network 40 41 type subnet struct { 42 sboxInit bool 43 vxlanName string 44 brName string 45 vni uint32 46 initErr error 47 subnetIP *net.IPNet 48 gwIP *net.IPNet 49 } 50 51 type subnetJSON struct { 52 SubnetIP string 53 GwIP string 54 Vni uint32 55 } 56 57 type network struct { 58 id string 59 dbIndex uint64 60 dbExists bool 61 sbox osl.Sandbox 62 nlSocket *nl.NetlinkSocket 63 endpoints endpointTable 64 driver *driver 65 joinCnt int 66 sboxInit bool 67 initEpoch int 68 initErr error 69 subnets []*subnet 70 secure bool 71 mtu int 72 sync.Mutex 73 } 74 75 func init() { 76 reexec.Register("set-default-vlan", setDefaultVlan) 77 } 78 79 func setDefaultVlan() { 80 if len(os.Args) < 3 { 81 logrus.Error("insufficient number of arguments") 82 os.Exit(1) 83 } 84 85 runtime.LockOSThread() 86 defer runtime.UnlockOSThread() 87 88 nsPath := os.Args[1] 89 ns, err := netns.GetFromPath(nsPath) 90 if err != nil { 91 logrus.Errorf("overlay namespace get failed, %v", err) 92 os.Exit(1) 93 } 94 if err = netns.Set(ns); err != nil { 95 logrus.Errorf("setting into overlay namespace failed, %v", err) 96 os.Exit(1) 97 } 98 99 // make sure the sysfs mount doesn't propagate back 100 if err = unix.Unshare(unix.CLONE_NEWNS); err != nil { 101 logrus.Errorf("unshare failed, %v", err) 102 os.Exit(1) 103 } 104 105 flag := unix.MS_PRIVATE | unix.MS_REC 106 if err = unix.Mount("", "/", "", uintptr(flag), ""); err != nil { 107 logrus.Errorf("root mount failed, %v", err) 108 os.Exit(1) 109 } 110 111 if err = unix.Mount("sysfs", "/sys", "sysfs", 0, ""); err != nil { 112 logrus.Errorf("mounting sysfs failed, %v", err) 113 os.Exit(1) 114 } 115 116 brName := os.Args[2] 117 path := filepath.Join("/sys/class/net", brName, "bridge/default_pvid") 118 data := []byte{'0', '\n'} 119 120 if err = ioutil.WriteFile(path, data, 0644); err != nil { 121 logrus.Errorf("enabling default vlan on bridge %s failed %v", brName, err) 122 os.Exit(1) 123 } 124 os.Exit(0) 125 } 126 127 func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) { 128 return nil, types.NotImplementedErrorf("not implemented") 129 } 130 131 func (d *driver) NetworkFree(id string) error { 132 return types.NotImplementedErrorf("not implemented") 133 } 134 135 func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error { 136 if id == "" { 137 return fmt.Errorf("invalid network id") 138 } 139 if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" { 140 return types.BadRequestErrorf("ipv4 pool is empty") 141 } 142 143 // Since we perform lazy configuration make sure we try 144 // configuring the driver when we enter CreateNetwork 145 if err := d.configure(); err != nil { 146 return err 147 } 148 149 n := &network{ 150 id: id, 151 driver: d, 152 endpoints: endpointTable{}, 153 subnets: []*subnet{}, 154 } 155 156 vnis := make([]uint32, 0, len(ipV4Data)) 157 if gval, ok := option[netlabel.GenericData]; ok { 158 optMap := gval.(map[string]string) 159 if val, ok := optMap[netlabel.OverlayVxlanIDList]; ok { 160 logrus.Debugf("overlay: Received vxlan IDs: %s", val) 161 vniStrings := strings.Split(val, ",") 162 for _, vniStr := range vniStrings { 163 vni, err := strconv.Atoi(vniStr) 164 if err != nil { 165 return fmt.Errorf("invalid vxlan id value %q passed", vniStr) 166 } 167 168 vnis = append(vnis, uint32(vni)) 169 } 170 } 171 if _, ok := optMap[secureOption]; ok { 172 n.secure = true 173 } 174 if val, ok := optMap[netlabel.DriverMTU]; ok { 175 var err error 176 if n.mtu, err = strconv.Atoi(val); err != nil { 177 return fmt.Errorf("failed to parse %v: %v", val, err) 178 } 179 if n.mtu < 0 { 180 return fmt.Errorf("invalid MTU value: %v", n.mtu) 181 } 182 } 183 } 184 185 // If we are getting vnis from libnetwork, either we get for 186 // all subnets or none. 187 if len(vnis) != 0 && len(vnis) < len(ipV4Data) { 188 return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis)) 189 } 190 191 for i, ipd := range ipV4Data { 192 s := &subnet{ 193 subnetIP: ipd.Pool, 194 gwIP: ipd.Gateway, 195 } 196 197 if len(vnis) != 0 { 198 s.vni = vnis[i] 199 } 200 201 n.subnets = append(n.subnets, s) 202 } 203 204 d.Lock() 205 defer d.Unlock() 206 if d.networks[n.id] != nil { 207 return fmt.Errorf("attempt to create overlay network %v that already exists", n.id) 208 } 209 210 if err := n.writeToStore(); err != nil { 211 return fmt.Errorf("failed to update data store for network %v: %v", n.id, err) 212 } 213 214 // Make sure no rule is on the way from any stale secure network 215 if !n.secure { 216 for _, vni := range vnis { 217 programMangle(vni, false) 218 programInput(vni, false) 219 } 220 } 221 222 if nInfo != nil { 223 if err := nInfo.TableEventRegister(ovPeerTable, driverapi.EndpointObject); err != nil { 224 // XXX Undo writeToStore? No method to so. Why? 225 return err 226 } 227 } 228 229 d.networks[id] = n 230 231 return nil 232 } 233 234 func (d *driver) DeleteNetwork(nid string) error { 235 if nid == "" { 236 return fmt.Errorf("invalid network id") 237 } 238 239 // Make sure driver resources are initialized before proceeding 240 if err := d.configure(); err != nil { 241 return err 242 } 243 244 d.Lock() 245 // Only perform a peer flush operation (if required) AFTER unlocking 246 // the driver lock to avoid deadlocking w/ the peerDB. 247 var doPeerFlush bool 248 defer func() { 249 d.Unlock() 250 if doPeerFlush { 251 d.peerFlush(nid) 252 } 253 }() 254 255 // This is similar to d.network(), but we need to keep holding the lock 256 // until we are done removing this network. 257 n, ok := d.networks[nid] 258 if !ok { 259 n = d.restoreNetworkFromStore(nid) 260 } 261 if n == nil { 262 return fmt.Errorf("could not find network with id %s", nid) 263 } 264 265 for _, ep := range n.endpoints { 266 if ep.ifName != "" { 267 if link, err := ns.NlHandle().LinkByName(ep.ifName); err == nil { 268 if err := ns.NlHandle().LinkDel(link); err != nil { 269 logrus.WithError(err).Warnf("Failed to delete interface (%s)'s link on endpoint (%s) delete", ep.ifName, ep.id) 270 } 271 } 272 } 273 274 if err := d.deleteEndpointFromStore(ep); err != nil { 275 logrus.Warnf("Failed to delete overlay endpoint %.7s from local store: %v", ep.id, err) 276 } 277 } 278 279 doPeerFlush = true 280 delete(d.networks, nid) 281 282 vnis, err := n.releaseVxlanID() 283 if err != nil { 284 return err 285 } 286 287 if n.secure { 288 for _, vni := range vnis { 289 programMangle(vni, false) 290 programInput(vni, false) 291 } 292 } 293 294 return nil 295 } 296 297 func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error { 298 return nil 299 } 300 301 func (d *driver) RevokeExternalConnectivity(nid, eid string) error { 302 return nil 303 } 304 305 func (n *network) joinSandbox(s *subnet, restore bool, incJoinCount bool) error { 306 // If there is a race between two go routines here only one will win 307 // the other will wait. 308 networkOnce.Do(networkOnceInit) 309 310 n.Lock() 311 // If non-restore initialization occurred and was successful then 312 // tell the peerDB to initialize the sandbox with all the peers 313 // previously received from networkdb. But only do this after 314 // unlocking the network. Otherwise we could deadlock with 315 // on the peerDB channel while peerDB is waiting for the network lock. 316 var doInitPeerDB bool 317 defer func() { 318 n.Unlock() 319 if doInitPeerDB { 320 n.driver.initSandboxPeerDB(n.id) 321 } 322 }() 323 324 if !n.sboxInit { 325 n.initErr = n.initSandbox(restore) 326 doInitPeerDB = n.initErr == nil && !restore 327 // If there was an error, we cannot recover it 328 n.sboxInit = true 329 } 330 331 if n.initErr != nil { 332 return fmt.Errorf("network sandbox join failed: %v", n.initErr) 333 } 334 335 subnetErr := s.initErr 336 if !s.sboxInit { 337 subnetErr = n.initSubnetSandbox(s, restore) 338 // We can recover from these errors, but not on restore 339 if restore || subnetErr == nil { 340 s.initErr = subnetErr 341 s.sboxInit = true 342 } 343 } 344 if subnetErr != nil { 345 return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), subnetErr) 346 } 347 348 if incJoinCount { 349 n.joinCnt++ 350 } 351 352 return nil 353 } 354 355 func (n *network) leaveSandbox() { 356 n.Lock() 357 defer n.Unlock() 358 n.joinCnt-- 359 if n.joinCnt != 0 { 360 return 361 } 362 363 n.destroySandbox() 364 365 n.sboxInit = false 366 n.initErr = nil 367 for _, s := range n.subnets { 368 s.sboxInit = false 369 s.initErr = nil 370 } 371 } 372 373 // to be called while holding network lock 374 func (n *network) destroySandbox() { 375 if n.sbox != nil { 376 for _, iface := range n.sbox.Info().Interfaces() { 377 if err := iface.Remove(); err != nil { 378 logrus.Debugf("Remove interface %s failed: %v", iface.SrcName(), err) 379 } 380 } 381 382 for _, s := range n.subnets { 383 if hostMode { 384 if err := removeFilters(n.id[:12], s.brName); err != nil { 385 logrus.Warnf("Could not remove overlay filters: %v", err) 386 } 387 } 388 389 if s.vxlanName != "" { 390 err := deleteInterface(s.vxlanName) 391 if err != nil { 392 logrus.Warnf("could not cleanup sandbox properly: %v", err) 393 } 394 } 395 } 396 397 if hostMode { 398 if err := removeNetworkChain(n.id[:12]); err != nil { 399 logrus.Warnf("could not remove network chain: %v", err) 400 } 401 } 402 403 // Close the netlink socket, this will also release the watchMiss goroutine that is using it 404 if n.nlSocket != nil { 405 n.nlSocket.Close() 406 n.nlSocket = nil 407 } 408 409 n.sbox.Destroy() 410 n.sbox = nil 411 } 412 } 413 414 func populateVNITbl() { 415 filepath.Walk(filepath.Dir(osl.GenerateKey("walk")), 416 func(path string, info os.FileInfo, err error) error { 417 _, fname := filepath.Split(path) 418 419 if len(strings.Split(fname, "-")) <= 1 { 420 return nil 421 } 422 423 ns, err := netns.GetFromPath(path) 424 if err != nil { 425 logrus.Errorf("Could not open namespace path %s during vni population: %v", path, err) 426 return nil 427 } 428 defer ns.Close() 429 430 nlh, err := netlink.NewHandleAt(ns, unix.NETLINK_ROUTE) 431 if err != nil { 432 logrus.Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err) 433 return nil 434 } 435 defer nlh.Delete() 436 437 err = nlh.SetSocketTimeout(soTimeout) 438 if err != nil { 439 logrus.Warnf("Failed to set the timeout on the netlink handle sockets for vni table population: %v", err) 440 } 441 442 links, err := nlh.LinkList() 443 if err != nil { 444 logrus.Errorf("Failed to list interfaces during vni population for ns %s: %v", path, err) 445 return nil 446 } 447 448 for _, l := range links { 449 if l.Type() == "vxlan" { 450 vniTbl[uint32(l.(*netlink.Vxlan).VxlanId)] = path 451 } 452 } 453 454 return nil 455 }) 456 } 457 458 func networkOnceInit() { 459 populateVNITbl() 460 461 if os.Getenv("_OVERLAY_HOST_MODE") != "" { 462 hostMode = true 463 return 464 } 465 466 err := createVxlan("testvxlan", 1, 0) 467 if err != nil { 468 logrus.Errorf("Failed to create testvxlan interface: %v", err) 469 return 470 } 471 472 defer deleteInterface("testvxlan") 473 474 path := "/proc/self/ns/net" 475 hNs, err := netns.GetFromPath(path) 476 if err != nil { 477 logrus.Errorf("Failed to get network namespace from path %s while setting host mode: %v", path, err) 478 return 479 } 480 defer hNs.Close() 481 482 nlh := ns.NlHandle() 483 484 iface, err := nlh.LinkByName("testvxlan") 485 if err != nil { 486 logrus.Errorf("Failed to get link testvxlan while setting host mode: %v", err) 487 return 488 } 489 490 // If we are not able to move the vxlan interface to a namespace 491 // then fallback to host mode 492 if err := nlh.LinkSetNsFd(iface, int(hNs)); err != nil { 493 hostMode = true 494 } 495 } 496 497 func (n *network) generateVxlanName(s *subnet) string { 498 id := n.id 499 if len(n.id) > 5 { 500 id = n.id[:5] 501 } 502 503 return fmt.Sprintf("vx-%06x-%v", s.vni, id) 504 } 505 506 func (n *network) generateBridgeName(s *subnet) string { 507 id := n.id 508 if len(n.id) > 5 { 509 id = n.id[:5] 510 } 511 512 return n.getBridgeNamePrefix(s) + "-" + id 513 } 514 515 func (n *network) getBridgeNamePrefix(s *subnet) string { 516 return fmt.Sprintf("ov-%06x", s.vni) 517 } 518 519 func checkOverlap(nw *net.IPNet) error { 520 var nameservers []string 521 522 if rc, err := resolvconf.Get(); err == nil { 523 nameservers = resolvconf.GetNameserversAsCIDR(rc.Content) 524 } 525 526 if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil { 527 return fmt.Errorf("overlay subnet %s failed check with nameserver: %v: %v", nw.String(), nameservers, err) 528 } 529 530 if err := netutils.CheckRouteOverlaps(nw); err != nil { 531 return fmt.Errorf("overlay subnet %s failed check with host route table: %v", nw.String(), err) 532 } 533 534 return nil 535 } 536 537 func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) error { 538 sbox := n.sbox 539 540 // restore overlay osl sandbox 541 Ifaces := make(map[string][]osl.IfaceOption) 542 brIfaceOption := make([]osl.IfaceOption, 2) 543 brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Address(s.gwIP)) 544 brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Bridge(true)) 545 Ifaces[brName+"+br"] = brIfaceOption 546 547 err := sbox.Restore(Ifaces, nil, nil, nil) 548 if err != nil { 549 return err 550 } 551 552 Ifaces = make(map[string][]osl.IfaceOption) 553 vxlanIfaceOption := make([]osl.IfaceOption, 1) 554 vxlanIfaceOption = append(vxlanIfaceOption, sbox.InterfaceOptions().Master(brName)) 555 Ifaces[vxlanName+"+vxlan"] = vxlanIfaceOption 556 return sbox.Restore(Ifaces, nil, nil, nil) 557 } 558 559 func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error { 560 561 if hostMode { 562 // Try to delete stale bridge interface if it exists 563 if err := deleteInterface(brName); err != nil { 564 deleteInterfaceBySubnet(n.getBridgeNamePrefix(s), s) 565 } 566 // Try to delete the vxlan interface by vni if already present 567 deleteVxlanByVNI("", s.vni) 568 569 if err := checkOverlap(s.subnetIP); err != nil { 570 return err 571 } 572 } 573 574 if !hostMode { 575 // Try to find this subnet's vni is being used in some 576 // other namespace by looking at vniTbl that we just 577 // populated in the once init. If a hit is found then 578 // it must a stale namespace from previous 579 // life. Destroy it completely and reclaim resourced. 580 networkMu.Lock() 581 path, ok := vniTbl[s.vni] 582 networkMu.Unlock() 583 584 if ok { 585 deleteVxlanByVNI(path, s.vni) 586 if err := unix.Unmount(path, unix.MNT_FORCE); err != nil { 587 logrus.Errorf("unmount of %s failed: %v", path, err) 588 } 589 os.Remove(path) 590 591 networkMu.Lock() 592 delete(vniTbl, s.vni) 593 networkMu.Unlock() 594 } 595 } 596 597 // create a bridge and vxlan device for this subnet and move it to the sandbox 598 sbox := n.sbox 599 600 if err := sbox.AddInterface(brName, "br", 601 sbox.InterfaceOptions().Address(s.gwIP), 602 sbox.InterfaceOptions().Bridge(true)); err != nil { 603 return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err) 604 } 605 606 err := createVxlan(vxlanName, s.vni, n.maxMTU()) 607 if err != nil { 608 return err 609 } 610 611 if err := sbox.AddInterface(vxlanName, "vxlan", 612 sbox.InterfaceOptions().Master(brName)); err != nil { 613 // If adding vxlan device to the overlay namespace fails, remove the bridge interface we 614 // already added to the namespace. This allows the caller to try the setup again. 615 for _, iface := range sbox.Info().Interfaces() { 616 if iface.SrcName() == brName { 617 if ierr := iface.Remove(); ierr != nil { 618 logrus.Errorf("removing bridge failed from ov ns %v failed, %v", n.sbox.Key(), ierr) 619 } 620 } 621 } 622 623 // Also, delete the vxlan interface. Since a global vni id is associated 624 // with the vxlan interface, an orphaned vxlan interface will result in 625 // failure of vxlan device creation if the vni is assigned to some other 626 // network. 627 if deleteErr := deleteInterface(vxlanName); deleteErr != nil { 628 logrus.Warnf("could not delete vxlan interface, %s, error %v, after config error, %v", vxlanName, deleteErr, err) 629 } 630 return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err) 631 } 632 633 if !hostMode { 634 var name string 635 for _, i := range sbox.Info().Interfaces() { 636 if i.Bridge() { 637 name = i.DstName() 638 } 639 } 640 cmd := &exec.Cmd{ 641 Path: reexec.Self(), 642 Args: []string{"set-default-vlan", sbox.Key(), name}, 643 Stdout: os.Stdout, 644 Stderr: os.Stderr, 645 } 646 if err := cmd.Run(); err != nil { 647 // not a fatal error 648 logrus.Errorf("reexec to set bridge default vlan failed %v", err) 649 } 650 } 651 652 if hostMode { 653 if err := addFilters(n.id[:12], brName); err != nil { 654 return err 655 } 656 } 657 658 return nil 659 } 660 661 // Must be called with the network lock 662 func (n *network) initSubnetSandbox(s *subnet, restore bool) error { 663 brName := n.generateBridgeName(s) 664 vxlanName := n.generateVxlanName(s) 665 666 if restore { 667 if err := n.restoreSubnetSandbox(s, brName, vxlanName); err != nil { 668 return err 669 } 670 } else { 671 if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil { 672 return err 673 } 674 } 675 676 s.vxlanName = vxlanName 677 s.brName = brName 678 679 return nil 680 } 681 682 func (n *network) cleanupStaleSandboxes() { 683 filepath.Walk(filepath.Dir(osl.GenerateKey("walk")), 684 func(path string, info os.FileInfo, err error) error { 685 _, fname := filepath.Split(path) 686 687 pList := strings.Split(fname, "-") 688 if len(pList) <= 1 { 689 return nil 690 } 691 692 pattern := pList[1] 693 if strings.Contains(n.id, pattern) { 694 // Delete all vnis 695 deleteVxlanByVNI(path, 0) 696 unix.Unmount(path, unix.MNT_DETACH) 697 os.Remove(path) 698 699 // Now that we have destroyed this 700 // sandbox, remove all references to 701 // it in vniTbl so that we don't 702 // inadvertently destroy the sandbox 703 // created in this life. 704 networkMu.Lock() 705 for vni, tblPath := range vniTbl { 706 if tblPath == path { 707 delete(vniTbl, vni) 708 } 709 } 710 networkMu.Unlock() 711 } 712 713 return nil 714 }) 715 } 716 717 func (n *network) initSandbox(restore bool) error { 718 n.initEpoch++ 719 720 if !restore { 721 if hostMode { 722 if err := addNetworkChain(n.id[:12]); err != nil { 723 return err 724 } 725 } 726 727 // If there are any stale sandboxes related to this network 728 // from previous daemon life clean it up here 729 n.cleanupStaleSandboxes() 730 } 731 732 // In the restore case network sandbox already exist; but we don't know 733 // what epoch number it was created with. It has to be retrieved by 734 // searching the net namespaces. 735 var key string 736 if restore { 737 key = osl.GenerateKey("-" + n.id) 738 } else { 739 key = osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id) 740 } 741 742 sbox, err := osl.NewSandbox(key, !hostMode, restore) 743 if err != nil { 744 return fmt.Errorf("could not get network sandbox (oper %t): %v", restore, err) 745 } 746 747 // this is needed to let the peerAdd configure the sandbox 748 n.sbox = sbox 749 750 // If we are in swarm mode, we don't need anymore the watchMiss routine. 751 // This will save 1 thread and 1 netlink socket per network 752 if !n.driver.isSerfAlive() { 753 return nil 754 } 755 756 var nlSock *nl.NetlinkSocket 757 sbox.InvokeFunc(func() { 758 nlSock, err = nl.Subscribe(unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH) 759 if err != nil { 760 return 761 } 762 // set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed 763 tv := unix.NsecToTimeval(soTimeout.Nanoseconds()) 764 err = nlSock.SetReceiveTimeout(&tv) 765 }) 766 n.nlSocket = nlSock 767 768 if err == nil { 769 go n.watchMiss(nlSock, key) 770 } else { 771 logrus.Errorf("failed to subscribe to neighbor group netlink messages for overlay network %s in sbox %s: %v", 772 n.id, sbox.Key(), err) 773 } 774 775 return nil 776 } 777 778 func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) { 779 // With the new version of the netlink library the deserialize function makes 780 // requests about the interface of the netlink message. This can succeed only 781 // if this go routine is in the target namespace. For this reason following we 782 // lock the thread on that namespace 783 runtime.LockOSThread() 784 defer runtime.UnlockOSThread() 785 newNs, err := netns.GetFromPath(nsPath) 786 if err != nil { 787 logrus.WithError(err).Errorf("failed to get the namespace %s", nsPath) 788 return 789 } 790 defer newNs.Close() 791 if err = netns.Set(newNs); err != nil { 792 logrus.WithError(err).Errorf("failed to enter the namespace %s", nsPath) 793 return 794 } 795 for { 796 msgs, _, err := nlSock.Receive() 797 if err != nil { 798 n.Lock() 799 nlFd := nlSock.GetFd() 800 n.Unlock() 801 if nlFd == -1 { 802 // The netlink socket got closed, simply exit to not leak this goroutine 803 return 804 } 805 // When the receive timeout expires the receive will return EAGAIN 806 if err == unix.EAGAIN { 807 // we continue here to avoid spam for timeouts 808 continue 809 } 810 logrus.Errorf("Failed to receive from netlink: %v ", err) 811 continue 812 } 813 814 for _, msg := range msgs { 815 if msg.Header.Type != unix.RTM_GETNEIGH && msg.Header.Type != unix.RTM_NEWNEIGH { 816 continue 817 } 818 819 neigh, err := netlink.NeighDeserialize(msg.Data) 820 if err != nil { 821 logrus.Errorf("Failed to deserialize netlink ndmsg: %v", err) 822 continue 823 } 824 825 var ( 826 ip net.IP 827 mac net.HardwareAddr 828 l2Miss, l3Miss bool 829 ) 830 if neigh.IP.To4() != nil { 831 ip = neigh.IP 832 l3Miss = true 833 } else if neigh.HardwareAddr != nil { 834 mac = []byte(neigh.HardwareAddr) 835 ip = net.IP(mac[2:]) 836 l2Miss = true 837 } else { 838 continue 839 } 840 841 // Not any of the network's subnets. Ignore. 842 if !n.contains(ip) { 843 continue 844 } 845 846 if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 { 847 continue 848 } 849 850 logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac) 851 mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip) 852 if err != nil { 853 logrus.Errorf("could not resolve peer %q: %v", ip, err) 854 continue 855 } 856 n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false) 857 } 858 } 859 } 860 861 // Restore a network from the store to the driver if it is present. 862 // Must be called with the driver locked! 863 func (d *driver) restoreNetworkFromStore(nid string) *network { 864 n := d.getNetworkFromStore(nid) 865 if n != nil { 866 n.driver = d 867 n.endpoints = endpointTable{} 868 d.networks[nid] = n 869 } 870 return n 871 } 872 873 func (d *driver) network(nid string) *network { 874 d.Lock() 875 n, ok := d.networks[nid] 876 if !ok { 877 n = d.restoreNetworkFromStore(nid) 878 } 879 d.Unlock() 880 881 return n 882 } 883 884 func (d *driver) getNetworkFromStore(nid string) *network { 885 if d.store == nil { 886 return nil 887 } 888 889 n := &network{id: nid} 890 if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil { 891 return nil 892 } 893 894 return n 895 } 896 897 func (n *network) sandbox() osl.Sandbox { 898 n.Lock() 899 defer n.Unlock() 900 return n.sbox 901 } 902 903 func (n *network) vxlanID(s *subnet) uint32 { 904 n.Lock() 905 defer n.Unlock() 906 return s.vni 907 } 908 909 func (n *network) setVxlanID(s *subnet, vni uint32) { 910 n.Lock() 911 s.vni = vni 912 n.Unlock() 913 } 914 915 func (n *network) Key() []string { 916 return []string{"overlay", "network", n.id} 917 } 918 919 func (n *network) KeyPrefix() []string { 920 return []string{"overlay", "network"} 921 } 922 923 func (n *network) Value() []byte { 924 m := map[string]interface{}{} 925 926 netJSON := []*subnetJSON{} 927 928 for _, s := range n.subnets { 929 sj := &subnetJSON{ 930 SubnetIP: s.subnetIP.String(), 931 GwIP: s.gwIP.String(), 932 Vni: s.vni, 933 } 934 netJSON = append(netJSON, sj) 935 } 936 937 m["secure"] = n.secure 938 m["subnets"] = netJSON 939 m["mtu"] = n.mtu 940 b, err := json.Marshal(m) 941 if err != nil { 942 return []byte{} 943 } 944 945 return b 946 } 947 948 func (n *network) Index() uint64 { 949 return n.dbIndex 950 } 951 952 func (n *network) SetIndex(index uint64) { 953 n.dbIndex = index 954 n.dbExists = true 955 } 956 957 func (n *network) Exists() bool { 958 return n.dbExists 959 } 960 961 func (n *network) Skip() bool { 962 return false 963 } 964 965 func (n *network) SetValue(value []byte) error { 966 var ( 967 m map[string]interface{} 968 newNet bool 969 isMap = true 970 netJSON = []*subnetJSON{} 971 ) 972 973 if err := json.Unmarshal(value, &m); err != nil { 974 err := json.Unmarshal(value, &netJSON) 975 if err != nil { 976 return err 977 } 978 isMap = false 979 } 980 981 if len(n.subnets) == 0 { 982 newNet = true 983 } 984 985 if isMap { 986 if val, ok := m["secure"]; ok { 987 n.secure = val.(bool) 988 } 989 if val, ok := m["mtu"]; ok { 990 n.mtu = int(val.(float64)) 991 } 992 bytes, err := json.Marshal(m["subnets"]) 993 if err != nil { 994 return err 995 } 996 if err := json.Unmarshal(bytes, &netJSON); err != nil { 997 return err 998 } 999 } 1000 1001 for _, sj := range netJSON { 1002 subnetIPstr := sj.SubnetIP 1003 gwIPstr := sj.GwIP 1004 vni := sj.Vni 1005 1006 subnetIP, _ := types.ParseCIDR(subnetIPstr) 1007 gwIP, _ := types.ParseCIDR(gwIPstr) 1008 1009 if newNet { 1010 s := &subnet{ 1011 subnetIP: subnetIP, 1012 gwIP: gwIP, 1013 vni: vni, 1014 } 1015 n.subnets = append(n.subnets, s) 1016 } else { 1017 sNet := n.getMatchingSubnet(subnetIP) 1018 if sNet != nil { 1019 sNet.vni = vni 1020 } 1021 } 1022 } 1023 return nil 1024 } 1025 1026 func (n *network) DataScope() string { 1027 return datastore.GlobalScope 1028 } 1029 1030 func (n *network) writeToStore() error { 1031 if n.driver.store == nil { 1032 return nil 1033 } 1034 1035 return n.driver.store.PutObjectAtomic(n) 1036 } 1037 1038 func (n *network) releaseVxlanID() ([]uint32, error) { 1039 n.Lock() 1040 nSubnets := len(n.subnets) 1041 n.Unlock() 1042 if nSubnets == 0 { 1043 return nil, nil 1044 } 1045 1046 if n.driver.store != nil { 1047 if err := n.driver.store.DeleteObjectAtomic(n); err != nil { 1048 if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound { 1049 // In both the above cases we can safely assume that the key has been removed by some other 1050 // instance and so simply get out of here 1051 return nil, nil 1052 } 1053 1054 return nil, fmt.Errorf("failed to delete network to vxlan id map: %v", err) 1055 } 1056 } 1057 var vnis []uint32 1058 n.Lock() 1059 for _, s := range n.subnets { 1060 if n.driver.vxlanIdm != nil { 1061 vnis = append(vnis, s.vni) 1062 } 1063 s.vni = 0 1064 } 1065 n.Unlock() 1066 1067 for _, vni := range vnis { 1068 n.driver.vxlanIdm.Release(uint64(vni)) 1069 } 1070 1071 return vnis, nil 1072 } 1073 1074 func (n *network) obtainVxlanID(s *subnet) error { 1075 //return if the subnet already has a vxlan id assigned 1076 if n.vxlanID(s) != 0 { 1077 return nil 1078 } 1079 1080 if n.driver.store == nil { 1081 return fmt.Errorf("no valid vxlan id and no datastore configured, cannot obtain vxlan id") 1082 } 1083 1084 for { 1085 if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil { 1086 return fmt.Errorf("getting network %q from datastore failed %v", n.id, err) 1087 } 1088 1089 if n.vxlanID(s) == 0 { 1090 vxlanID, err := n.driver.vxlanIdm.GetID(true) 1091 if err != nil { 1092 return fmt.Errorf("failed to allocate vxlan id: %v", err) 1093 } 1094 1095 n.setVxlanID(s, uint32(vxlanID)) 1096 if err := n.writeToStore(); err != nil { 1097 n.driver.vxlanIdm.Release(uint64(n.vxlanID(s))) 1098 n.setVxlanID(s, 0) 1099 if err == datastore.ErrKeyModified { 1100 continue 1101 } 1102 return fmt.Errorf("network %q failed to update data store: %v", n.id, err) 1103 } 1104 return nil 1105 } 1106 return nil 1107 } 1108 } 1109 1110 // contains return true if the passed ip belongs to one the network's 1111 // subnets 1112 func (n *network) contains(ip net.IP) bool { 1113 for _, s := range n.subnets { 1114 if s.subnetIP.Contains(ip) { 1115 return true 1116 } 1117 } 1118 1119 return false 1120 } 1121 1122 // getSubnetforIP returns the subnet to which the given IP belongs 1123 func (n *network) getSubnetforIP(ip *net.IPNet) *subnet { 1124 for _, s := range n.subnets { 1125 // first check if the mask lengths are the same 1126 i, _ := s.subnetIP.Mask.Size() 1127 j, _ := ip.Mask.Size() 1128 if i != j { 1129 continue 1130 } 1131 if s.subnetIP.Contains(ip.IP) { 1132 return s 1133 } 1134 } 1135 return nil 1136 } 1137 1138 // getMatchingSubnet return the network's subnet that matches the input 1139 func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet { 1140 if ip == nil { 1141 return nil 1142 } 1143 for _, s := range n.subnets { 1144 // first check if the mask lengths are the same 1145 i, _ := s.subnetIP.Mask.Size() 1146 j, _ := ip.Mask.Size() 1147 if i != j { 1148 continue 1149 } 1150 if s.subnetIP.IP.Equal(ip.IP) { 1151 return s 1152 } 1153 } 1154 return nil 1155 }