github.com/mirantis/virtlet@v1.5.2-0.20191204181327-1659b8a48e9b/pkg/tapmanager/tapfdsource.go (about) 1 /* 2 Copyright 2017 Mirantis 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package tapmanager 18 19 import ( 20 "encoding/json" 21 "fmt" 22 "net" 23 "strings" 24 "sync" 25 "time" 26 27 "github.com/containernetworking/cni/pkg/ns" 28 cnitypes "github.com/containernetworking/cni/pkg/types" 29 cnicurrent "github.com/containernetworking/cni/pkg/types/current" 30 "github.com/davecgh/go-spew/spew" 31 "github.com/golang/glog" 32 "github.com/vishvananda/netlink" 33 34 "github.com/Mirantis/virtlet/pkg/cni" 35 "github.com/Mirantis/virtlet/pkg/dhcp" 36 "github.com/Mirantis/virtlet/pkg/nettools" 37 "github.com/Mirantis/virtlet/pkg/network" 38 "github.com/Mirantis/virtlet/pkg/utils" 39 ) 40 41 const ( 42 calicoDefaultSubnet = 24 43 calicoSubnetVar = "VIRTLET_CALICO_SUBNET" 44 ) 45 46 // InterfaceDescription contains interface type with additional data 47 // needed to identify it 48 type InterfaceDescription struct { 49 Type network.InterfaceType `json:"type"` 50 HardwareAddr net.HardwareAddr `json:"mac"` 51 FdIndex int `json:"fdIndex"` 52 PCIAddress string `json:"pciAddress"` 53 } 54 55 // PodNetworkDesc contains the data that are required by TapFDSource 56 // to set up a tap device for a VM 57 type PodNetworkDesc struct { 58 // PodID specifies the id of the pod 59 PodID string `json:"podId"` 60 // PodNs specifies the namespace of the pod 61 PodNs string `json:"podNs"` 62 // PodName specifies the name of the pod 63 PodName string `json:"podName"` 64 // DNS specifies DNS settings for the pod 65 DNS *cnitypes.DNS 66 } 67 68 // GetFDPayload contains the data that are required by TapFDSource 69 // to prepare container side network configuration 70 type GetFDPayload struct { 71 // Description contains the pod information and DNS settings for the pod 72 Description *PodNetworkDesc `json:"podNetworkDesc"` 73 } 74 75 // RecoverPayload contains the data that are required by TapFDSource 76 // to recover a network configuration in a pod 77 type RecoverPayload struct { 78 // Description contains the pod information and DNS settings for the pod 79 Description *PodNetworkDesc `json:"podNetworkDesc"` 80 // ContainerSideNetwork specifies configuration used to configure retaken 81 // environment 82 ContainerSideNetwork *network.ContainerSideNetwork `json:"csn"` 83 // HaveRunningContainers is true if any domains are currently running 84 // for this pod. VF reconfiguration is to be skipped if that's the case. 85 HaveRunningContainers bool 86 } 87 88 type podNetwork struct { 89 pnd PodNetworkDesc 90 csn *network.ContainerSideNetwork 91 dhcpServer *dhcp.Server 92 doneCh chan error 93 } 94 95 // TapFDSource sets up and tears down Virtlet VM network. 96 // It implements FDSource interface 97 type TapFDSource struct { 98 sync.Mutex 99 100 cniClient cni.Client 101 dummyNetwork *cnicurrent.Result 102 dummyNetworkNsPath string 103 fdMap map[string]*podNetwork 104 enableSriov bool 105 calicoSubnetSize int 106 } 107 108 var _ FDSource = &TapFDSource{} 109 110 // NewTapFDSource returns a TapFDSource for the specified CNI plugin & 111 // config dir 112 func NewTapFDSource(cniClient cni.Client, enableSriov bool, calicoSubnetSize int) (*TapFDSource, error) { 113 s := &TapFDSource{ 114 cniClient: cniClient, 115 fdMap: make(map[string]*podNetwork), 116 calicoSubnetSize: calicoSubnetSize, 117 enableSriov: enableSriov, 118 } 119 120 return s, nil 121 } 122 123 func (s *TapFDSource) getDummyNetwork() (*cnicurrent.Result, string, error) { 124 if s.dummyNetwork == nil { 125 var err error 126 s.dummyNetwork, s.dummyNetworkNsPath, err = s.cniClient.GetDummyNetwork() 127 if err != nil { 128 return nil, "", err 129 } 130 // s.dummyGateway = dummyResult.IPs[0].Address.IP 131 132 } 133 return s.dummyNetwork, s.dummyNetworkNsPath, nil 134 } 135 136 // GetFDs implements GetFDs method of FDSource interface 137 func (s *TapFDSource) GetFDs(key string, data []byte) ([]int, []byte, error) { 138 var payload GetFDPayload 139 if err := json.Unmarshal(data, &payload); err != nil { 140 return nil, nil, fmt.Errorf("error unmarshalling GetFD payload: %v", err) 141 } 142 pnd := payload.Description 143 if err := cni.CreateNetNS(pnd.PodID); err != nil { 144 return nil, nil, fmt.Errorf("error creating new netns for pod %s (%s): %v", pnd.PodName, pnd.PodID, err) 145 } 146 147 gotError := false 148 podAddedToNetwork := false 149 defer func() { 150 if gotError { 151 if podAddedToNetwork { 152 if err := s.cniClient.RemoveSandboxFromNetwork(pnd.PodID, pnd.PodName, pnd.PodNs); err != nil { 153 glog.Errorf("Error removing a pod from the pod network after failed network setup: %v", err) 154 } 155 } 156 if err := cni.DestroyNetNS(pnd.PodID); err != nil { 157 glog.Errorf("Error removing netns after failed network setup: %v", err) 158 } 159 } 160 }() 161 162 netConfig, err := s.cniClient.AddSandboxToNetwork(pnd.PodID, pnd.PodName, pnd.PodNs) 163 if err != nil { 164 gotError = true 165 return nil, nil, fmt.Errorf("error adding pod %s (%s) to CNI network: %v", pnd.PodName, pnd.PodID, err) 166 } 167 podAddedToNetwork = true 168 glog.V(3).Infof("CNI configuration for pod %s (%s): %s", pnd.PodName, pnd.PodID, spew.Sdump(netConfig)) 169 170 if netConfig == nil { 171 netConfig = &cnicurrent.Result{} 172 } 173 174 if payload.Description.DNS != nil { 175 netConfig.DNS.Nameservers = pnd.DNS.Nameservers 176 netConfig.DNS.Search = pnd.DNS.Search 177 netConfig.DNS.Options = pnd.DNS.Options 178 } 179 180 var fds []int 181 var respData []byte 182 var csn *network.ContainerSideNetwork 183 if err := s.setupNetNS(key, pnd, func(netNSPath string, allLinks []netlink.Link, hostNS ns.NetNS) (*network.ContainerSideNetwork, error) { 184 if netConfig, err = nettools.ValidateAndFixCNIResult(netConfig, netNSPath, allLinks); err != nil { 185 gotError = true 186 return nil, fmt.Errorf("error fixing cni configuration: %v", err) 187 } 188 if err := nettools.FixCalicoNetworking(netConfig, s.calicoSubnetSize, s.getDummyNetwork); err != nil { 189 // don't fail in this case because there may be even no Calico 190 glog.Warningf("Calico detection/fix didn't work: %v", err) 191 } 192 glog.V(3).Infof("CNI Result after fix:\n%s", spew.Sdump(netConfig)) 193 194 var err error 195 if csn, err = nettools.SetupContainerSideNetwork(netConfig, netNSPath, allLinks, s.enableSriov, hostNS); err != nil { 196 return nil, err 197 } 198 199 if respData, err = json.Marshal(csn); err != nil { 200 return nil, fmt.Errorf("error marshalling net config: %v", err) 201 } 202 203 for _, i := range csn.Interfaces { 204 fds = append(fds, int(i.Fo.Fd())) 205 } 206 return csn, nil 207 }); err != nil { 208 gotError = true 209 return nil, nil, err 210 } 211 212 return fds, respData, nil 213 } 214 215 // Release implements Release method of FDSource interface 216 func (s *TapFDSource) Release(key string) error { 217 s.Lock() 218 defer s.Unlock() 219 pn, found := s.fdMap[key] 220 if !found { 221 return fmt.Errorf("bad fd key: %q", key) 222 } 223 224 netNSPath := cni.PodNetNSPath(pn.pnd.PodID) 225 226 vmNS, err := ns.GetNS(netNSPath) 227 if err != nil { 228 return fmt.Errorf("failed to open network namespace at %q: %v", netNSPath, err) 229 } 230 231 // Try to keep this function idempotent even if there are errors during the following calls. 232 // This can cause some resource leaks in multiple CNI case but makes it possible 233 // to call `RunPodSandbox` again after a failed attempt. Failing to do so would cause 234 // the next `RunPodSandbox` call to fail due to the netns already being present. 235 defer func() { 236 if err := cni.DestroyNetNS(pn.pnd.PodID); err != nil { 237 glog.Errorf("Error when removing network namespace for pod sandbox %q: %v", pn.pnd.PodID, err) 238 } 239 }() 240 241 if err := nettools.ReconstructVFs(pn.csn, vmNS, false); err != nil { 242 return fmt.Errorf("failed to reconstruct SR-IOV devices: %v", err) 243 } 244 245 if err := vmNS.Do(func(ns.NetNS) error { 246 if err := pn.dhcpServer.Close(); err != nil { 247 return fmt.Errorf("failed to stop dhcp server: %v", err) 248 } 249 <-pn.doneCh 250 return nettools.Teardown(pn.csn) 251 }); err != nil { 252 return err 253 } 254 255 if err := s.cniClient.RemoveSandboxFromNetwork(pn.pnd.PodID, pn.pnd.PodName, pn.pnd.PodNs); err != nil { 256 return fmt.Errorf("error removing pod sandbox %q from CNI network: %v", pn.pnd.PodID, err) 257 } 258 259 delete(s.fdMap, key) 260 return nil 261 } 262 263 // GetInfo implements GetInfo method of FDSource interface 264 func (s *TapFDSource) GetInfo(key string) ([]byte, error) { 265 s.Lock() 266 defer s.Unlock() 267 pn, found := s.fdMap[key] 268 if !found { 269 return nil, fmt.Errorf("bad fd key: %q", key) 270 } 271 var descriptions []InterfaceDescription 272 for i, iface := range pn.csn.Interfaces { 273 descriptions = append(descriptions, InterfaceDescription{ 274 FdIndex: i, 275 HardwareAddr: iface.HardwareAddr, 276 Type: iface.Type, 277 PCIAddress: iface.PCIAddress, 278 }) 279 } 280 data, err := json.Marshal(descriptions) 281 if err != nil { 282 return nil, fmt.Errorf("interface descriptions marshaling error: %v", err) 283 } 284 return data, nil 285 } 286 287 // Stop stops any running DHCP servers associated with TapFDSource 288 // and closes tap fds without releasing any other resources. 289 func (s *TapFDSource) Stop() error { 290 s.Lock() 291 defer s.Unlock() 292 var errors []string 293 for _, pn := range s.fdMap { 294 if err := pn.dhcpServer.Close(); err != nil { 295 errors = append(errors, fmt.Sprintf("error stopping dhcp server: %v", err.Error())) 296 } else { 297 <-pn.doneCh 298 } 299 for _, i := range pn.csn.Interfaces { 300 if err := i.Fo.Close(); err != nil { 301 errors = append(errors, fmt.Sprintf("error closing tap fd: %v", err)) 302 } 303 } 304 } 305 s.fdMap = make(map[string]*podNetwork) 306 if errors != nil { 307 return fmt.Errorf("Errors while stopping TapFDSource:\n%s", strings.Join(errors, "\n")) 308 } 309 return nil 310 } 311 312 // Recover recovers the state for the netns after Virtlet restart 313 func (s *TapFDSource) Recover(key string, data []byte) error { 314 var payload RecoverPayload 315 if err := json.Unmarshal(data, &payload); err != nil { 316 return fmt.Errorf("error unmarshalling GetFD payload: %v", err) 317 } 318 pnd := payload.Description 319 csn := payload.ContainerSideNetwork 320 if csn == nil { 321 return fmt.Errorf("ContainerSideNetwork not passed to Recover()") 322 } 323 if csn.Result == nil { 324 csn.Result = &cnicurrent.Result{} 325 } 326 netNSPath := cni.PodNetNSPath(pnd.PodID) 327 vmNS, err := ns.GetNS(netNSPath) 328 if err != nil { 329 return fmt.Errorf("failed to open network namespace at %q: %v", netNSPath, err) 330 } 331 if !payload.HaveRunningContainers { 332 if err := nettools.ReconstructVFs(csn, vmNS, true); err != nil { 333 return err 334 } 335 } 336 return s.setupNetNS(key, pnd, func(netNSPath string, allLinks []netlink.Link, hostNS ns.NetNS) (*network.ContainerSideNetwork, error) { 337 if err := nettools.RecoverContainerSideNetwork(csn, netNSPath, allLinks, hostNS); err != nil { 338 return nil, err 339 } 340 return csn, nil 341 }) 342 } 343 344 // RetrieveFDs retrieves the FDs. 345 // It's only used in case if VM exited but Recover() didn't populate the FDs 346 func (s *TapFDSource) RetrieveFDs(key string) ([]int, error) { 347 var podNet *podNetwork 348 var fds []int 349 func() { 350 s.Lock() 351 defer s.Unlock() 352 podNet = s.fdMap[key] 353 }() 354 if podNet == nil { 355 return nil, fmt.Errorf("bad key %q to retrieve FDs", key) 356 } 357 358 netNSPath := cni.PodNetNSPath(podNet.pnd.PodID) 359 vmNS, err := ns.GetNS(netNSPath) 360 if err != nil { 361 return nil, fmt.Errorf("failed to open network namespace at %q: %v", netNSPath, err) 362 } 363 364 if err := utils.CallInNetNSWithSysfsRemounted(vmNS, func(hostNS ns.NetNS) error { 365 allLinks, err := netlink.LinkList() 366 if err != nil { 367 return fmt.Errorf("error listing the links: %v", err) 368 } 369 370 return nettools.RecoverContainerSideNetwork(podNet.csn, netNSPath, allLinks, hostNS) 371 }); err != nil { 372 return nil, err 373 } 374 375 for _, ifDesc := range podNet.csn.Interfaces { 376 // Fail if not all succeeded 377 if ifDesc.Fo == nil { 378 return nil, fmt.Errorf("failed to open tap interface %q", ifDesc.Name) 379 } 380 fds = append(fds, int(ifDesc.Fo.Fd())) 381 } 382 return fds, nil 383 } 384 385 func (s *TapFDSource) setupNetNS(key string, pnd *PodNetworkDesc, initNet func(netNSPath string, allLinks []netlink.Link, hostNS ns.NetNS) (*network.ContainerSideNetwork, error)) error { 386 netNSPath := cni.PodNetNSPath(pnd.PodID) 387 vmNS, err := ns.GetNS(netNSPath) 388 if err != nil { 389 return fmt.Errorf("failed to open network namespace at %q: %v", netNSPath, err) 390 } 391 392 var csn *network.ContainerSideNetwork 393 var dhcpServer *dhcp.Server 394 doneCh := make(chan error) 395 if err := utils.CallInNetNSWithSysfsRemounted(vmNS, func(hostNS ns.NetNS) error { 396 allLinks, err := netlink.LinkList() 397 if err != nil { 398 return fmt.Errorf("error listing the links: %v", err) 399 } 400 401 if csn, err = initNet(netNSPath, allLinks, hostNS); err != nil { 402 return err 403 } 404 405 dhcpServer = dhcp.NewServer(csn) 406 if err := dhcpServer.SetupListener("0.0.0.0"); err != nil { 407 return fmt.Errorf("Failed to set up dhcp listener: %v", err) 408 } 409 go func() { 410 doneCh <- vmNS.Do(func(ns.NetNS) error { 411 err := dhcpServer.Serve() 412 if err != nil { 413 glog.Errorf("dhcp server error: %v", err) 414 } 415 return err 416 }) 417 }() 418 419 // FIXME: there's some very small possibility for a race here 420 // (happens if the VM makes DHCP request before DHCP server is ready) 421 // For now, let's make the probability of such problem even smaller 422 time.Sleep(500 * time.Millisecond) 423 return nil 424 }); err != nil { 425 return err 426 } 427 428 s.Lock() 429 defer s.Unlock() 430 s.fdMap[key] = &podNetwork{ 431 pnd: *pnd, 432 csn: csn, 433 dhcpServer: dhcpServer, 434 doneCh: doneCh, 435 } 436 return nil 437 }