github.com/blixtra/rkt@v0.8.1-0.20160204105720-ab0d1add1a43/networking/kvm.go (about) 1 // Copyright 2015 The rkt Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // kvm.go file provides networking supporting functions for kvm flavor 16 package networking 17 18 import ( 19 "bufio" 20 "crypto/sha512" 21 "encoding/json" 22 "errors" 23 "fmt" 24 "net" 25 "os" 26 "strconv" 27 "strings" 28 "syscall" 29 30 "github.com/appc/cni/pkg/ip" 31 cnitypes "github.com/appc/cni/pkg/types" 32 "github.com/appc/spec/schema/types" 33 "github.com/hashicorp/errwrap" 34 "github.com/vishvananda/netlink" 35 36 "github.com/coreos/rkt/common" 37 "github.com/coreos/rkt/networking/netinfo" 38 "github.com/coreos/rkt/networking/tuntap" 39 ) 40 41 const ( 42 defaultBrName = "kvm-cni0" 43 defaultSubnetFile = "/run/flannel/subnet.env" 44 defaultMTU = 1500 45 ) 46 47 type BridgeNetConf struct { 48 NetConf 49 BrName string `json:"bridge"` 50 IsGw bool `json:"isGateway"` 51 } 52 53 // setupTapDevice creates persistent tap device 54 // and returns a newly created netlink.Link structure 55 func setupTapDevice(podID types.UUID) (netlink.Link, error) { 56 // network device names are limited to 16 characters 57 // the suffix %d will be replaced by the kernel with a suitable number 58 nameTemplate := fmt.Sprintf("rkt-%s-tap%%d", podID.String()[0:4]) 59 ifName, err := tuntap.CreatePersistentIface(nameTemplate, tuntap.Tap) 60 if err != nil { 61 return nil, errwrap.Wrap(errors.New("tuntap persist"), err) 62 } 63 64 link, err := netlink.LinkByName(ifName) 65 if err != nil { 66 return nil, errwrap.Wrap(fmt.Errorf("cannot find link %q", ifName), err) 67 } 68 69 if err := netlink.LinkSetUp(link); err != nil { 70 return nil, errwrap.Wrap(fmt.Errorf("cannot set link up %q", ifName), err) 71 } 72 return link, nil 73 } 74 75 type MacVTapNetConf struct { 76 NetConf 77 Master string `json:"master"` 78 Mode string `json:"mode"` 79 } 80 81 // setupTapDevice creates persistent macvtap device 82 // and returns a newly created netlink.Link structure 83 func setupMacVTapDevice(podID types.UUID, config MacVTapNetConf) (netlink.Link, error) { 84 master, err := netlink.LinkByName(config.Master) 85 if err != nil { 86 return nil, errwrap.Wrap(fmt.Errorf("cannot find master device '%v'", config.Master), err) 87 } 88 var mode netlink.MacvlanMode 89 switch config.Mode { 90 // if not set - defaults to bridge mode as in: 91 // https://github.com/coreos/rkt/blob/master/Documentation/networking.md#macvlan 92 case "", "bridge": 93 mode = netlink.MACVLAN_MODE_BRIDGE 94 case "private": 95 mode = netlink.MACVLAN_MODE_PRIVATE 96 case "vepa": 97 mode = netlink.MACVLAN_MODE_VEPA 98 case "passthru": 99 mode = netlink.MACVLAN_MODE_PASSTHRU 100 default: 101 return nil, fmt.Errorf("unsupported macvtap mode: %v", config.Mode) 102 } 103 mtu := master.Attrs().MTU 104 if config.MTU != 0 { 105 mtu = config.MTU 106 } 107 nameTemplate := fmt.Sprintf("rkt-%s-vtap%%d", podID.String()[0:4]) 108 link := &netlink.Macvtap{ 109 Macvlan: netlink.Macvlan{ 110 LinkAttrs: netlink.LinkAttrs{ 111 Name: nameTemplate, 112 MTU: mtu, 113 ParentIndex: master.Attrs().Index, 114 }, 115 Mode: mode, 116 }, 117 } 118 119 if err := netlink.LinkAdd(link); err != nil { 120 return nil, errwrap.Wrap(errors.New("cannot create macvtap interface"), err) 121 } 122 return link, nil 123 } 124 125 // kvmSetupNetAddressing calls IPAM plugin (with a hack) to reserve an IP to be 126 // used by newly create tuntap pair 127 // in result it updates activeNet.runtime configuration 128 func kvmSetupNetAddressing(network *Networking, n activeNet, ifName string) error { 129 // TODO: very ugly hack, that go through upper plugin, down to ipam plugin 130 if err := ip.EnableIP4Forward(); err != nil { 131 return errwrap.Wrap(errors.New("failed to enable forwarding"), err) 132 } 133 n.conf.Type = n.conf.IPAM.Type 134 output, err := network.execNetPlugin("ADD", &n, ifName) 135 if err != nil { 136 return errwrap.Wrap(fmt.Errorf("problem executing network plugin %q (%q)", n.conf.Type, ifName), err) 137 } 138 139 result := cnitypes.Result{} 140 if err = json.Unmarshal(output, &result); err != nil { 141 return errwrap.Wrap(fmt.Errorf("error parsing %q result", n.conf.Name), err) 142 } 143 144 if result.IP4 == nil { 145 return fmt.Errorf("net-plugin returned no IPv4 configuration") 146 } 147 148 n.runtime.IP, n.runtime.Mask, n.runtime.HostIP, n.runtime.IP4 = result.IP4.IP.IP, net.IP(result.IP4.IP.Mask), result.IP4.Gateway, result.IP4 149 150 return nil 151 } 152 153 func ensureHasAddr(link netlink.Link, ipn *net.IPNet) error { 154 addrs, err := netlink.AddrList(link, syscall.AF_INET) 155 if err != nil && err != syscall.ENOENT { 156 return errwrap.Wrap(errors.New("could not get list of IP addresses"), err) 157 } 158 159 // if there're no addresses on the interface, it's ok -- we'll add one 160 if len(addrs) > 0 { 161 ipnStr := ipn.String() 162 for _, a := range addrs { 163 // string comp is actually easiest for doing IPNet comps 164 if a.IPNet.String() == ipnStr { 165 return nil 166 } 167 } 168 return fmt.Errorf("%q already has an IP address different from %v", link.Attrs().Name, ipn.String()) 169 } 170 171 addr := &netlink.Addr{IPNet: ipn, Label: link.Attrs().Name} 172 if err := netlink.AddrAdd(link, addr); err != nil { 173 return errwrap.Wrap(fmt.Errorf("could not add IP address to %q", link.Attrs().Name), err) 174 } 175 return nil 176 } 177 178 func bridgeByName(name string) (*netlink.Bridge, error) { 179 l, err := netlink.LinkByName(name) 180 if err != nil { 181 return nil, errwrap.Wrap(fmt.Errorf("could not lookup %q", name), err) 182 } 183 br, ok := l.(*netlink.Bridge) 184 if !ok { 185 return nil, fmt.Errorf("%q already exists but is not a bridge", name) 186 } 187 return br, nil 188 } 189 190 func ensureBridgeIsUp(brName string, mtu int) (*netlink.Bridge, error) { 191 br := &netlink.Bridge{ 192 LinkAttrs: netlink.LinkAttrs{ 193 Name: brName, 194 MTU: mtu, 195 }, 196 } 197 198 if err := netlink.LinkAdd(br); err != nil { 199 if err != syscall.EEXIST { 200 return nil, errwrap.Wrap(fmt.Errorf("could not add %q", brName), err) 201 } 202 203 // it's ok if the device already exists as long as config is similar 204 br, err = bridgeByName(brName) 205 if err != nil { 206 return nil, err 207 } 208 } 209 210 if err := netlink.LinkSetUp(br); err != nil { 211 return nil, err 212 } 213 214 return br, nil 215 } 216 217 func addRoute(link netlink.Link, podIP net.IP) error { 218 route := netlink.Route{ 219 LinkIndex: link.Attrs().Index, 220 Scope: netlink.SCOPE_LINK, 221 Dst: &net.IPNet{ 222 IP: podIP, 223 Mask: net.IPv4Mask(0xff, 0xff, 0xff, 0xff), 224 }, 225 } 226 return netlink.RouteAdd(&route) 227 } 228 229 func removeAllRoutesOnLink(link netlink.Link) error { 230 routes, err := netlink.RouteList(link, netlink.FAMILY_V4) 231 if err != nil { 232 return errwrap.Wrap(fmt.Errorf("cannot list routes on link %q", link.Attrs().Name), err) 233 } 234 235 for _, route := range routes { 236 if err := netlink.RouteDel(&route); err != nil { 237 return errwrap.Wrap(fmt.Errorf("error in time of route removal for route %q", route), err) 238 } 239 } 240 241 return nil 242 } 243 244 func getChainName(podUUIDString, confName string) string { 245 h := sha512.Sum512([]byte(podUUIDString)) 246 return fmt.Sprintf("CNI-%s-%x", confName, h[:8]) 247 } 248 249 type FlannelNetConf struct { 250 NetConf 251 252 SubnetFile string `json:"subnetFile"` 253 Delegate map[string]interface{} `json:"delegate"` 254 } 255 256 func loadFlannelNetConf(bytes []byte) (*FlannelNetConf, error) { 257 n := &FlannelNetConf{ 258 SubnetFile: defaultSubnetFile, 259 } 260 if err := json.Unmarshal(bytes, n); err != nil { 261 return nil, errwrap.Wrap(errors.New("failed to load netconf"), err) 262 } 263 return n, nil 264 } 265 266 type subnetEnv struct { 267 nw *net.IPNet 268 sn *net.IPNet 269 mtu int 270 ipmasq bool 271 } 272 273 func loadFlannelSubnetEnv(fn string) (*subnetEnv, error) { 274 f, err := os.Open(fn) 275 if err != nil { 276 return nil, err 277 } 278 defer f.Close() 279 280 se := &subnetEnv{} 281 282 s := bufio.NewScanner(f) 283 for s.Scan() { 284 parts := strings.SplitN(s.Text(), "=", 2) 285 switch parts[0] { 286 case "FLANNEL_NETWORK": 287 _, se.nw, err = net.ParseCIDR(parts[1]) 288 if err != nil { 289 return nil, err 290 } 291 292 case "FLANNEL_SUBNET": 293 _, se.sn, err = net.ParseCIDR(parts[1]) 294 if err != nil { 295 return nil, err 296 } 297 298 case "FLANNEL_MTU": 299 mtu, err := strconv.ParseUint(parts[1], 10, 32) 300 if err != nil { 301 return nil, err 302 } 303 se.mtu = int(mtu) 304 305 case "FLANNEL_IPMASQ": 306 se.ipmasq = parts[1] == "true" 307 } 308 } 309 if err := s.Err(); err != nil { 310 return nil, err 311 } 312 313 return se, nil 314 } 315 316 func hasKey(m map[string]interface{}, k string) bool { 317 _, ok := m[k] 318 return ok 319 } 320 321 func isString(i interface{}) bool { 322 _, ok := i.(string) 323 return ok 324 } 325 326 func kvmTransformFlannelNetwork(net *activeNet) error { 327 n, err := loadFlannelNetConf(net.confBytes) 328 if err != nil { 329 return err 330 } 331 332 fenv, err := loadFlannelSubnetEnv(n.SubnetFile) 333 if err != nil { 334 return err 335 } 336 337 if n.Delegate == nil { 338 n.Delegate = make(map[string]interface{}) 339 } else { 340 if hasKey(n.Delegate, "type") && !isString(n.Delegate["type"]) { 341 return fmt.Errorf("'delegate' dictionary, if present, must have (string) 'type' field") 342 } 343 if hasKey(n.Delegate, "name") { 344 return fmt.Errorf("'delegate' dictionary must not have 'name' field, it'll be set by flannel") 345 } 346 if hasKey(n.Delegate, "ipam") { 347 return fmt.Errorf("'delegate' dictionary must not have 'ipam' field, it'll be set by flannel") 348 } 349 } 350 351 n.Delegate["name"] = n.Name 352 353 if !hasKey(n.Delegate, "type") { 354 n.Delegate["type"] = "bridge" 355 } 356 357 if !hasKey(n.Delegate, "ipMasq") { 358 // if flannel is not doing ipmasq, we should 359 ipmasq := !fenv.ipmasq 360 n.Delegate["ipMasq"] = ipmasq 361 } 362 363 if !hasKey(n.Delegate, "mtu") { 364 mtu := fenv.mtu 365 n.Delegate["mtu"] = mtu 366 } 367 368 if n.Delegate["type"].(string) == "bridge" { 369 if !hasKey(n.Delegate, "isGateway") { 370 n.Delegate["isGateway"] = true 371 } 372 } 373 374 n.Delegate["ipam"] = map[string]interface{}{ 375 "type": "host-local", 376 "subnet": fenv.sn.String(), 377 "routes": []cnitypes.Route{ 378 cnitypes.Route{ 379 Dst: *fenv.nw, 380 }, 381 }, 382 } 383 384 bytes, err := json.Marshal(n.Delegate) 385 if err != nil { 386 return errwrap.Wrap(errors.New("error in marshaling generated network settings"), err) 387 } 388 389 *net = activeNet{ 390 confBytes: bytes, 391 conf: &NetConf{}, 392 runtime: &netinfo.NetInfo{ 393 IP4: &cnitypes.IPConfig{}, 394 }, 395 } 396 net.conf.Name = n.Name 397 net.conf.Type = n.Delegate["type"].(string) 398 net.conf.IPMasq = n.Delegate["ipMasq"].(bool) 399 net.conf.MTU = n.Delegate["mtu"].(int) 400 net.conf.IPAM.Type = "host-local" 401 return nil 402 } 403 404 // kvmSetup prepare new Networking to be used in kvm environment based on tuntap pair interfaces 405 // to allow communication with virtual machine created by lkvm tool 406 func kvmSetup(podRoot string, podID types.UUID, fps []ForwardedPort, netList common.NetList, localConfig string) (*Networking, error) { 407 network := Networking{ 408 podEnv: podEnv{ 409 podRoot: podRoot, 410 podID: podID, 411 netsLoadList: netList, 412 localConfig: localConfig, 413 }, 414 } 415 var e error 416 network.nets, e = network.loadNets() 417 if e != nil { 418 return nil, errwrap.Wrap(errors.New("error loading network definitions"), e) 419 } 420 421 for i, n := range network.nets { 422 if n.conf.Type == "flannel" { 423 if err := kvmTransformFlannelNetwork(&n); err != nil { 424 return nil, errwrap.Wrap(errors.New("cannot transform flannel network into basic network"), err) 425 } 426 } 427 switch n.conf.Type { 428 case "ptp": 429 link, err := setupTapDevice(podID) 430 if err != nil { 431 return nil, err 432 } 433 ifName := link.Attrs().Name 434 n.runtime.IfName = ifName 435 436 err = kvmSetupNetAddressing(&network, n, ifName) 437 if err != nil { 438 return nil, err 439 } 440 441 // add address to host tap device 442 err = ensureHasAddr( 443 link, 444 &net.IPNet{ 445 IP: n.runtime.IP4.Gateway, 446 Mask: net.IPMask(n.runtime.Mask), 447 }, 448 ) 449 if err != nil { 450 return nil, errwrap.Wrap(fmt.Errorf("cannot add address to host tap device %q", ifName), err) 451 } 452 453 if err := removeAllRoutesOnLink(link); err != nil { 454 return nil, errwrap.Wrap(fmt.Errorf("cannot remove route on host tap device %q", ifName), err) 455 } 456 457 if err := addRoute(link, n.runtime.IP); err != nil { 458 return nil, errwrap.Wrap(errors.New("cannot add on host direct route to pod"), err) 459 } 460 461 case "bridge": 462 config := BridgeNetConf{ 463 NetConf: NetConf{ 464 MTU: defaultMTU, 465 }, 466 BrName: defaultBrName, 467 } 468 if err := json.Unmarshal(n.confBytes, &config); err != nil { 469 return nil, errwrap.Wrap(fmt.Errorf("error parsing %q result", n.conf.Name), err) 470 } 471 472 br, err := ensureBridgeIsUp(config.BrName, config.MTU) 473 if err != nil { 474 return nil, errwrap.Wrap(errors.New("error in time of bridge setup"), err) 475 } 476 link, err := setupTapDevice(podID) 477 if err != nil { 478 return nil, errwrap.Wrap(errors.New("can not setup tap device"), err) 479 } 480 err = netlink.LinkSetMaster(link, br) 481 if err != nil { 482 rErr := tuntap.RemovePersistentIface(n.runtime.IfName, tuntap.Tap) 483 if rErr != nil { 484 stderr.PrintE("warning: could not cleanup tap interface", rErr) 485 } 486 return nil, errwrap.Wrap(errors.New("can not add tap interface to bridge"), err) 487 } 488 489 ifName := link.Attrs().Name 490 n.runtime.IfName = ifName 491 492 err = kvmSetupNetAddressing(&network, n, ifName) 493 if err != nil { 494 return nil, err 495 } 496 497 if config.IsGw { 498 err = ensureHasAddr( 499 br, 500 &net.IPNet{ 501 IP: n.runtime.IP4.Gateway, 502 Mask: net.IPMask(n.runtime.Mask), 503 }, 504 ) 505 506 if err != nil { 507 return nil, errwrap.Wrap(fmt.Errorf("cannot add address to host bridge device %q", br.Name), err) 508 } 509 } 510 511 case "macvlan": 512 config := MacVTapNetConf{} 513 if err := json.Unmarshal(n.confBytes, &config); err != nil { 514 return nil, errwrap.Wrap(fmt.Errorf("error parsing %q result", n.conf.Name), err) 515 } 516 link, err := setupMacVTapDevice(podID, config) 517 if err != nil { 518 return nil, err 519 } 520 ifName := link.Attrs().Name 521 n.runtime.IfName = ifName 522 523 err = kvmSetupNetAddressing(&network, n, ifName) 524 if err != nil { 525 return nil, err 526 } 527 528 default: 529 return nil, fmt.Errorf("network %q have unsupported type: %q", n.conf.Name, n.conf.Type) 530 } 531 532 if n.conf.IPMasq { 533 chain := getChainName(podID.String(), n.conf.Name) 534 if err := ip.SetupIPMasq(&net.IPNet{ 535 IP: n.runtime.IP, 536 Mask: net.IPMask(n.runtime.Mask), 537 }, chain); err != nil { 538 return nil, err 539 } 540 } 541 network.nets[i] = n 542 } 543 err := network.forwardPorts(fps, network.GetDefaultIP()) 544 if err != nil { 545 return nil, err 546 } 547 548 return &network, nil 549 } 550 551 /* 552 extend Networking struct with methods to clean up kvm specific network configurations 553 */ 554 555 // teardownKvmNets teardown every active networking from networking by 556 // removing tuntap interface and releasing its ip from IPAM plugin 557 func (n *Networking) teardownKvmNets() { 558 for _, an := range n.nets { 559 switch an.conf.Type { 560 case "ptp", "bridge": 561 // remove tuntap interface 562 tuntap.RemovePersistentIface(an.runtime.IfName, tuntap.Tap) 563 564 case "macvlan": 565 link, err := netlink.LinkByName(an.runtime.IfName) 566 if err != nil { 567 stderr.PrintE(fmt.Sprintf("cannot find link `%v`", an.runtime.IfName), err) 568 continue 569 } else { 570 err := netlink.LinkDel(link) 571 if err != nil { 572 stderr.PrintE(fmt.Sprintf("cannot remove link `%v`", an.runtime.IfName), err) 573 continue 574 } 575 } 576 577 default: 578 stderr.Printf("unsupported network type: %q", an.conf.Type) 579 continue 580 } 581 // ugly hack again to directly call IPAM plugin to release IP 582 an.conf.Type = an.conf.IPAM.Type 583 584 _, err := n.execNetPlugin("DEL", &an, an.runtime.IfName) 585 if err != nil { 586 stderr.PrintE("error executing network plugin", err) 587 } 588 // remove masquerading if it was prepared 589 if an.conf.IPMasq { 590 chain := getChainName(n.podID.String(), an.conf.Name) 591 err := ip.TeardownIPMasq(&net.IPNet{ 592 IP: an.runtime.IP, 593 Mask: net.IPMask(an.runtime.Mask), 594 }, chain) 595 if err != nil { 596 stderr.PrintE("error on removing masquerading", err) 597 } 598 } 599 } 600 } 601 602 // kvmTeardown network teardown for kvm flavor based pods 603 // similar to Networking.Teardown but without host namespaces 604 func (n *Networking) kvmTeardown() { 605 606 if err := n.unforwardPorts(); err != nil { 607 stderr.PrintE("error removing forwarded ports (kvm)", err) 608 } 609 n.teardownKvmNets() 610 } 611 612 // Following methods implements behavior of netDescriber by activeNet 613 // (behavior required by stage1/init/kvm package and its kernel parameters configuration) 614 615 func (an activeNet) HostIP() net.IP { 616 return an.runtime.HostIP 617 } 618 func (an activeNet) GuestIP() net.IP { 619 return an.runtime.IP 620 } 621 func (an activeNet) IfName() string { 622 if an.conf.Type == "macvlan" { 623 // macvtap device passed as parameter to lkvm binary have different 624 // kind of name, path to /dev/tapN made with N as link index 625 link, err := netlink.LinkByName(an.runtime.IfName) 626 if err != nil { 627 stderr.PrintE(fmt.Sprintf("cannot get interface '%v'", an.runtime.IfName), err) 628 return "" 629 } 630 return fmt.Sprintf("/dev/tap%d", link.Attrs().Index) 631 } 632 return an.runtime.IfName 633 } 634 func (an activeNet) Mask() net.IP { 635 return an.runtime.Mask 636 } 637 func (an activeNet) Name() string { 638 return an.conf.Name 639 } 640 func (an activeNet) IPMasq() bool { 641 return an.conf.IPMasq 642 } 643 func (an activeNet) Gateway() net.IP { 644 return an.runtime.IP4.Gateway 645 } 646 func (an activeNet) Routes() []cnitypes.Route { 647 return an.runtime.IP4.Routes 648 } 649 650 // GetActiveNetworks returns activeNets to be used as NetDescriptors 651 // by plugins, which are required for stage1 executor to run (only for KVM) 652 func (e *Networking) GetActiveNetworks() []activeNet { 653 return e.nets 654 }