github.com/mirantis/virtlet@v1.5.2-0.20191204181327-1659b8a48e9b/pkg/libvirttools/virtualization.go (about) 1 /* 2 Copyright 2016-2017 Mirantis 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package libvirttools 18 19 import ( 20 "fmt" 21 "path/filepath" 22 "strings" 23 "time" 24 25 "github.com/golang/glog" 26 "github.com/jonboulle/clockwork" 27 libvirtxml "github.com/libvirt/libvirt-go-xml" 28 uuid "github.com/nu7hatch/gouuid" 29 "k8s.io/apimachinery/pkg/fields" 30 kubetypes "k8s.io/kubernetes/pkg/kubelet/types" 31 32 vconfig "github.com/Mirantis/virtlet/pkg/config" 33 "github.com/Mirantis/virtlet/pkg/fs" 34 "github.com/Mirantis/virtlet/pkg/metadata" 35 "github.com/Mirantis/virtlet/pkg/metadata/types" 36 "github.com/Mirantis/virtlet/pkg/network" 37 "github.com/Mirantis/virtlet/pkg/utils" 38 "github.com/Mirantis/virtlet/pkg/virt" 39 ) 40 41 const ( 42 defaultMemory = 1024 43 defaultMemoryUnit = "MiB" 44 defaultDomainType = "kvm" 45 defaultEmulator = "/usr/bin/kvm" 46 noKvmDomainType = "qemu" 47 noKvmEmulator = "/usr/bin/qemu-system-x86_64" 48 49 domainStartCheckInterval = 250 * time.Millisecond 50 domainStartTimeout = 10 * time.Second 51 domainShutdownRetryInterval = 5 * time.Second 52 domainShutdownOnRemoveTimeout = 60 * time.Second 53 domainDestroyCheckInterval = 500 * time.Millisecond 54 domainDestroyTimeout = 5 * time.Second 55 56 // ContainerNsUUID template for container ns uuid generation 57 ContainerNsUUID = "67b7fb47-7735-4b64-86d2-6d062d121966" 58 59 // KubernetesPodNameLabel is pod name container label (copied from kubetypes). 60 KubernetesPodNameLabel = "io.kubernetes.pod.name" 61 // KubernetesPodNamespaceLabel is pod namespace container label (copied from kubetypes), 62 KubernetesPodNamespaceLabel = "io.kubernetes.pod.namespace" 63 // KubernetesPodUIDLabel is uid container label (copied from kubetypes). 64 KubernetesPodUIDLabel = "io.kubernetes.pod.uid" 65 // KubernetesContainerNameLabel is container name label (copied from kubetypes) 66 KubernetesContainerNameLabel = "io.kubernetes.container.name" 67 ) 68 69 type domainSettings struct { 70 useKvm bool 71 domainName string 72 domainUUID string 73 memory int 74 memoryUnit string 75 vcpuNum int 76 cpuShares uint 77 cpuPeriod uint64 78 cpuQuota int64 79 rootDiskFilepath string 80 netFdKey string 81 enableSriov bool 82 cpuModel string 83 systemUUID *uuid.UUID 84 } 85 86 func (ds *domainSettings) createDomain(config *types.VMConfig) *libvirtxml.Domain { 87 domainType := defaultDomainType 88 emulator := defaultEmulator 89 if !ds.useKvm { 90 domainType = noKvmDomainType 91 emulator = noKvmEmulator 92 } 93 94 scsiControllerIndex := uint(0) 95 domain := &libvirtxml.Domain{ 96 Devices: &libvirtxml.DomainDeviceList{ 97 Emulator: "/vmwrapper", 98 Inputs: []libvirtxml.DomainInput{ 99 {Type: "tablet", Bus: "usb"}, 100 }, 101 Graphics: []libvirtxml.DomainGraphic{ 102 {VNC: &libvirtxml.DomainGraphicVNC{Port: -1}}, 103 }, 104 Videos: []libvirtxml.DomainVideo{ 105 {Model: libvirtxml.DomainVideoModel{Type: "cirrus"}}, 106 }, 107 Controllers: []libvirtxml.DomainController{ 108 {Type: "scsi", Index: &scsiControllerIndex, Model: "virtio-scsi"}, 109 }, 110 }, 111 112 OS: &libvirtxml.DomainOS{ 113 Type: &libvirtxml.DomainOSType{Type: "hvm"}, 114 BootDevices: []libvirtxml.DomainBootDevice{ 115 {Dev: "hd"}, 116 }, 117 }, 118 119 Features: &libvirtxml.DomainFeatureList{ACPI: &libvirtxml.DomainFeature{}}, 120 121 OnPoweroff: "destroy", 122 OnReboot: "restart", 123 OnCrash: "restart", 124 125 Type: domainType, 126 127 Name: ds.domainName, 128 UUID: ds.domainUUID, 129 Memory: &libvirtxml.DomainMemory{Value: uint(ds.memory), Unit: ds.memoryUnit}, 130 VCPU: &libvirtxml.DomainVCPU{Value: ds.vcpuNum}, 131 CPUTune: &libvirtxml.DomainCPUTune{ 132 Shares: &libvirtxml.DomainCPUTuneShares{Value: ds.cpuShares}, 133 Period: &libvirtxml.DomainCPUTunePeriod{Value: ds.cpuPeriod}, 134 Quota: &libvirtxml.DomainCPUTuneQuota{Value: ds.cpuQuota}, 135 }, 136 // This causes '"qemu: qemu_thread_create: Resource temporarily unavailable"' QEMU errors 137 // when Virtlet is run as a non-privileged user. 138 // Under strace, it looks like a bunch of mmap()s failing with EAGAIN 139 // which happens due to mlockall() call somewhere above that. 140 // This could be worked around using setrlimit() but really 141 // swap handling is not needed here because it's incorrect 142 // to have swap enabled on the nodes of a real Kubernetes cluster. 143 144 // MemoryBacking: &libvirtxml.DomainMemoryBacking{Locked: &libvirtxml.DomainMemoryBackingLocked{}}, 145 146 QEMUCommandline: &libvirtxml.DomainQEMUCommandline{ 147 Envs: []libvirtxml.DomainQEMUCommandlineEnv{ 148 {Name: vconfig.EmulatorEnvVarName, Value: emulator}, 149 {Name: vconfig.NetKeyEnvVarName, Value: ds.netFdKey}, 150 {Name: vconfig.ContainerIDEnvVarName, Value: config.DomainUUID}, 151 {Name: vconfig.LogPathEnvVarName, 152 Value: filepath.Join(config.LogDirectory, config.LogPath)}, 153 }, 154 }, 155 } 156 157 // Set cpu model. 158 // If user understand the cpu definition of libvirt, 159 // the user is very professional, we prior to use it. 160 if config.ParsedAnnotations.CPUSetting != nil { 161 domain.CPU = config.ParsedAnnotations.CPUSetting 162 } else { 163 switch ds.cpuModel { 164 case types.CPUModelHostModel: 165 // The following enables nested virtualization. 166 // In case of intel processors it requires nested=1 option 167 // for kvm_intel module. That can be passed like this: 168 // modprobe kvm_intel nested=1 169 domain.CPU = &libvirtxml.DomainCPU{ 170 Mode: types.CPUModelHostModel, 171 Model: &libvirtxml.DomainCPUModel{ 172 Fallback: "forbid", 173 }, 174 Features: []libvirtxml.DomainCPUFeature{ 175 { 176 Policy: "require", 177 Name: "vmx", 178 }, 179 }, 180 } 181 case "": 182 // leave it empty 183 default: 184 glog.Warningf("Unknown value set in VIRTLET_CPU_MODEL: %q", ds.cpuModel) 185 } 186 } 187 188 if ds.systemUUID != nil { 189 domain.SysInfo = &libvirtxml.DomainSysInfo{ 190 Type: "smbios", 191 System: &libvirtxml.DomainSysInfoSystem{ 192 Entry: []libvirtxml.DomainSysInfoEntry{ 193 { 194 Name: "uuid", 195 Value: ds.systemUUID.String(), 196 }, 197 }, 198 }, 199 } 200 } 201 202 if ds.enableSriov { 203 domain.QEMUCommandline.Envs = append(domain.QEMUCommandline.Envs, 204 libvirtxml.DomainQEMUCommandlineEnv{Name: "VMWRAPPER_KEEP_PRIVS", Value: "1"}) 205 } 206 207 return domain 208 } 209 210 // VirtualizationConfig specifies configuration options for VirtualizationTool. 211 type VirtualizationConfig struct { 212 // True if KVM should be disabled 213 DisableKVM bool 214 // True if SR-IOV support needs to be enabled 215 EnableSriov bool 216 // List of raw devices that can be accessed by the VM. 217 RawDevices []string 218 // Kubelet's root dir 219 // FIXME: kubelet's --root-dir may be something other than /var/lib/kubelet 220 // Need to remove it from daemonset mounts (both dev and non-dev) 221 // Use 'nsenter -t 1 -m -- tar ...' or something to grab the path 222 // from root namespace 223 KubeletRootDir string 224 // The path of streamer socket used for 225 // logging. By default, the path is empty. When the path is empty, 226 // logging is disabled for the VMs. 227 StreamerSocketPath string 228 // The name of libvirt volume pool to use for the VMs. 229 VolumePoolName string 230 // CPUModel contains type (can be overloaded by pod annotation) 231 // of cpu model to be passed in libvirt domain definition. 232 // Empty value denotes libvirt defaults usage. 233 CPUModel string 234 // Path to the directory used for shared filesystems 235 SharedFilesystemPath string 236 } 237 238 // VirtualizationTool provides methods to operate on libvirt. 239 type VirtualizationTool struct { 240 domainConn virt.DomainConnection 241 storageConn virt.StorageConnection 242 imageManager ImageManager 243 metadataStore metadata.Store 244 clock clockwork.Clock 245 volumeSource VMVolumeSource 246 config VirtualizationConfig 247 fsys fs.FileSystem 248 commander utils.Commander 249 } 250 251 var _ volumeOwner = &VirtualizationTool{} 252 253 // NewVirtualizationTool verifies existence of volumes pool in libvirt store 254 // and returns initialized VirtualizationTool. 255 func NewVirtualizationTool(domainConn virt.DomainConnection, 256 storageConn virt.StorageConnection, imageManager ImageManager, 257 metadataStore metadata.Store, volumeSource VMVolumeSource, 258 config VirtualizationConfig, fsys fs.FileSystem, 259 commander utils.Commander) *VirtualizationTool { 260 return &VirtualizationTool{ 261 domainConn: domainConn, 262 storageConn: storageConn, 263 imageManager: imageManager, 264 metadataStore: metadataStore, 265 clock: clockwork.NewRealClock(), 266 volumeSource: volumeSource, 267 config: config, 268 fsys: fsys, 269 commander: commander, 270 } 271 } 272 273 // SetClock sets the clock to use (used in tests) 274 func (v *VirtualizationTool) SetClock(clock clockwork.Clock) { 275 v.clock = clock 276 } 277 278 func (v *VirtualizationTool) addSerialDevicesToDomain(domain *libvirtxml.Domain) error { 279 port := uint(0) 280 timeout := uint(1) 281 if v.config.StreamerSocketPath != "" { 282 domain.Devices.Serials = []libvirtxml.DomainSerial{ 283 { 284 Source: &libvirtxml.DomainChardevSource{ 285 UNIX: &libvirtxml.DomainChardevSourceUNIX{ 286 Mode: "connect", 287 Path: v.config.StreamerSocketPath, 288 Reconnect: &libvirtxml.DomainChardevSourceReconnect{ 289 Enabled: "yes", 290 Timeout: &timeout, 291 }, 292 }, 293 }, 294 Target: &libvirtxml.DomainSerialTarget{Port: &port}, 295 }, 296 } 297 } else { 298 domain.Devices.Serials = []libvirtxml.DomainSerial{ 299 { 300 Target: &libvirtxml.DomainSerialTarget{Port: &port}, 301 }, 302 } 303 domain.Devices.Consoles = []libvirtxml.DomainConsole{ 304 { 305 Target: &libvirtxml.DomainConsoleTarget{Type: "serial", Port: &port}, 306 }, 307 } 308 } 309 return nil 310 } 311 312 // CreateContainer defines libvirt domain for VM, prepares it's disks and stores 313 // all info in metadata store. It returns domain uuid generated basing on pod 314 // sandbox id. 315 func (v *VirtualizationTool) CreateContainer(config *types.VMConfig, netFdKey string) (string, error) { 316 if err := config.LoadAnnotations(); err != nil { 317 return "", err 318 } 319 320 var domainUUID string 321 if config.ParsedAnnotations.SystemUUID != nil { 322 domainUUID = config.ParsedAnnotations.SystemUUID.String() 323 } else { 324 domainUUID = utils.NewUUID5(ContainerNsUUID, config.PodSandboxID) 325 } 326 // FIXME: this field should be moved to VMStatus struct (to be added) 327 config.DomainUUID = domainUUID 328 cpuModel := v.config.CPUModel 329 if config.ParsedAnnotations.CPUModel != "" { 330 cpuModel = string(config.ParsedAnnotations.CPUModel) 331 } 332 settings := domainSettings{ 333 domainUUID: domainUUID, 334 // Note: using only first 13 characters because libvirt has an issue with handling 335 // long path names for qemu monitor socket 336 domainName: "virtlet-" + domainUUID[:13] + "-" + config.Name, 337 netFdKey: netFdKey, 338 vcpuNum: config.ParsedAnnotations.VCPUCount, 339 memory: int(config.MemoryLimitInBytes), 340 cpuShares: uint(config.CPUShares), 341 cpuPeriod: uint64(config.CPUPeriod), 342 enableSriov: v.config.EnableSriov, 343 // CPU bandwidth limits for domains are actually set equal per 344 // each vCPU by libvirt. Thus, to limit overall VM's CPU 345 // threads consumption by the value from the pod definition 346 // we need to perform this division 347 cpuQuota: config.CPUQuota / int64(config.ParsedAnnotations.VCPUCount), 348 memoryUnit: "b", 349 useKvm: !v.config.DisableKVM, 350 cpuModel: cpuModel, 351 systemUUID: config.ParsedAnnotations.SystemUUID, 352 } 353 if settings.memory == 0 { 354 settings.memory = defaultMemory 355 settings.memoryUnit = defaultMemoryUnit 356 } 357 358 domainDef := settings.createDomain(config) 359 diskList, err := newDiskList(config, v.volumeSource, v) 360 if err != nil { 361 return "", err 362 } 363 domainDef.Devices.Disks, domainDef.Devices.Filesystems, err = diskList.setup() 364 if err != nil { 365 return "", err 366 } 367 368 ok := false 369 defer func() { 370 if ok { 371 return 372 } 373 if err := v.removeDomain(settings.domainUUID, config, types.ContainerState_CONTAINER_UNKNOWN, true); err != nil { 374 glog.Warningf("Failed to remove domain %q: %v", settings.domainUUID, err) 375 } 376 if err := diskList.teardown(); err != nil { 377 glog.Warningf("error tearing down volumes after an error: %v", err) 378 } 379 }() 380 381 if err := v.addSerialDevicesToDomain(domainDef); err != nil { 382 return "", err 383 } 384 385 if config.ContainerLabels == nil { 386 config.ContainerLabels = map[string]string{} 387 } 388 config.ContainerLabels[kubetypes.KubernetesPodNameLabel] = config.PodName 389 config.ContainerLabels[kubetypes.KubernetesPodNamespaceLabel] = config.PodNamespace 390 config.ContainerLabels[kubetypes.KubernetesPodUIDLabel] = config.PodSandboxID 391 config.ContainerLabels[kubetypes.KubernetesContainerNameLabel] = config.Name 392 393 domain, err := v.domainConn.DefineDomain(domainDef) 394 if err == nil { 395 err = diskList.writeImages(domain) 396 } 397 if err == nil { 398 err = v.metadataStore.Container(settings.domainUUID).Save( 399 func(_ *types.ContainerInfo) (*types.ContainerInfo, error) { 400 return &types.ContainerInfo{ 401 Name: config.Name, 402 CreatedAt: v.clock.Now().UnixNano(), 403 Config: *config, 404 State: types.ContainerState_CONTAINER_CREATED, 405 }, nil 406 }) 407 } 408 if err != nil { 409 return "", err 410 } 411 412 ok = true 413 return settings.domainUUID, nil 414 } 415 416 func (v *VirtualizationTool) updateDiskImages(containerID string) error { 417 domain, err := v.domainConn.LookupDomainByUUIDString(containerID) 418 if err != nil { 419 return fmt.Errorf("failed to look up domain %q: %v", containerID, err) 420 } 421 422 config, _, err := v.getVMConfigFromMetadata(containerID) 423 if err != nil { 424 return err 425 } 426 427 if config == nil { 428 glog.Warningf("No info found for domain %q in the metadata store. Not updating disk images", containerID) 429 return nil 430 } 431 432 diskList, err := newDiskList(config, v.volumeSource, v) 433 if err != nil { 434 return err 435 } 436 437 return diskList.writeImages(domain) 438 } 439 440 // UpdateContainerNetwork updates network info for the container 441 func (v *VirtualizationTool) UpdateContainerNetwork(containerID string, csn *network.ContainerSideNetwork) error { 442 if err := v.metadataStore.Container(containerID).Save( 443 func(c *types.ContainerInfo) (*types.ContainerInfo, error) { 444 // make sure the container is not removed during the call 445 if c != nil { 446 c.Config.ContainerSideNetwork = csn 447 } 448 return c, nil 449 }); err != nil { 450 return fmt.Errorf("error updating container info: %v", err) 451 } 452 453 // propagate network config to cloud-init 454 if err := v.updateDiskImages(containerID); err != nil { 455 return fmt.Errorf("domain %q: error updating disk images: %v", containerID, err) 456 } 457 458 return nil 459 } 460 461 func (v *VirtualizationTool) startContainer(containerID string) error { 462 domain, err := v.domainConn.LookupDomainByUUIDString(containerID) 463 if err != nil { 464 return fmt.Errorf("failed to look up domain %q: %v", containerID, err) 465 } 466 467 state, err := domain.State() 468 if err != nil { 469 return fmt.Errorf("failed to get state of the domain %q: %v", containerID, err) 470 } 471 if state != virt.DomainStateShutoff { 472 return fmt.Errorf("domain %q: bad state %v upon StartContainer()", containerID, state) 473 } 474 475 if err = domain.Create(); err != nil { 476 return fmt.Errorf("failed to create domain %q: %v", containerID, err) 477 } 478 479 // XXX: maybe we don't really have to wait here but I couldn't 480 // find it in libvirt docs. 481 if err = utils.WaitLoop(func() (bool, error) { 482 state, err := domain.State() 483 if err != nil { 484 return false, fmt.Errorf("failed to get state of the domain %q: %v", containerID, err) 485 } 486 switch state { 487 case virt.DomainStateRunning: 488 return true, nil 489 case virt.DomainStateShutdown: 490 return false, fmt.Errorf("unexpected shutdown for new domain %q", containerID) 491 case virt.DomainStateCrashed: 492 return false, fmt.Errorf("domain %q crashed on start", containerID) 493 default: 494 return false, nil 495 } 496 }, domainStartCheckInterval, domainStartTimeout, v.clock); err != nil { 497 return err 498 } 499 500 return v.metadataStore.Container(containerID).Save( 501 func(c *types.ContainerInfo) (*types.ContainerInfo, error) { 502 // make sure the container is not removed during the call 503 if c != nil { 504 c.State = types.ContainerState_CONTAINER_RUNNING 505 c.StartedAt = v.clock.Now().UnixNano() 506 } 507 return c, nil 508 }) 509 } 510 511 // StartContainer calls libvirt to start domain, waits up to 10 seconds for 512 // DOMAIN_RUNNING state, then updates it's state in metadata store. 513 // If there was an error it will be returned to caller after an domain removal 514 // attempt. If also it had an error - both of them will be combined. 515 func (v *VirtualizationTool) StartContainer(containerID string) error { 516 return v.startContainer(containerID) 517 } 518 519 // StopContainer calls graceful shutdown of domain and if it was non successful 520 // it calls libvirt to destroy that domain. 521 // Successful shutdown or destroy of domain is followed by removal of 522 // VM info from metadata store. 523 // Succeeded removal of metadata is followed by volumes cleanup. 524 func (v *VirtualizationTool) StopContainer(containerID string, timeout time.Duration) error { 525 domain, err := v.domainConn.LookupDomainByUUIDString(containerID) 526 if err != nil { 527 return err 528 } 529 530 // We try to shut down the VM gracefully first. This may take several attempts 531 // because shutdown requests may be ignored e.g. when the VM boots. 532 // If this fails, we just destroy the domain (i.e. power off the VM). 533 err = utils.WaitLoop(func() (bool, error) { 534 _, err := v.domainConn.LookupDomainByUUIDString(containerID) 535 if err == virt.ErrDomainNotFound { 536 return true, nil 537 } 538 if err != nil { 539 return false, fmt.Errorf("failed to look up the domain %q: %v", containerID, err) 540 } 541 542 // domain.Shutdown() may return 'invalid operation' error if domain is already 543 // shut down. But checking the state beforehand will not make the situation 544 // any simpler because we'll still have a race, thus we need multiple attempts 545 domainShutdownErr := domain.Shutdown() 546 547 state, err := domain.State() 548 if err != nil { 549 return false, fmt.Errorf("failed to get state of the domain %q: %v", containerID, err) 550 } 551 552 if state == virt.DomainStateShutoff { 553 return true, nil 554 } 555 556 if domainShutdownErr != nil { 557 // The domain is not in 'DOMAIN_SHUTOFF' state and domain.Shutdown() failed, 558 // so we need to return the error that happened during Shutdown() 559 return false, fmt.Errorf("failed to shut down domain %q: %v", containerID, err) 560 } 561 562 return false, nil 563 }, domainShutdownRetryInterval, timeout, v.clock) 564 565 if err != nil { 566 glog.Warningf("Failed to shut down VM %q: %v -- trying to destroy the domain", containerID, err) 567 // if the domain is destroyed successfully we return no error 568 if err = domain.Destroy(); err != nil { 569 return fmt.Errorf("failed to destroy the domain: %v", err) 570 } 571 } 572 573 if err == nil { 574 err = v.metadataStore.Container(containerID).Save( 575 func(c *types.ContainerInfo) (*types.ContainerInfo, error) { 576 // make sure the container is not removed during the call 577 if c != nil { 578 c.State = types.ContainerState_CONTAINER_EXITED 579 } 580 return c, nil 581 }) 582 } 583 584 if err == nil { 585 // Note: volume cleanup is done right after domain has been stopped 586 // due to by the time the ContainerRemove request all flexvolume 587 // data is already removed by kubelet's VolumeManager 588 return v.cleanupVolumes(containerID) 589 } 590 591 return err 592 } 593 594 func (v *VirtualizationTool) getVMConfigFromMetadata(containerID string) (*types.VMConfig, types.ContainerState, error) { 595 containerInfo, err := v.metadataStore.Container(containerID).Retrieve() 596 if err != nil { 597 glog.Errorf("Error when retrieving domain %q info from metadata store: %v", containerID, err) 598 return nil, types.ContainerState_CONTAINER_UNKNOWN, err 599 } 600 if containerInfo == nil { 601 // the vm is already removed 602 return nil, types.ContainerState_CONTAINER_UNKNOWN, nil 603 } 604 605 return &containerInfo.Config, containerInfo.State, nil 606 } 607 608 func (v *VirtualizationTool) cleanupVolumes(containerID string) error { 609 config, _, err := v.getVMConfigFromMetadata(containerID) 610 if err != nil { 611 return err 612 } 613 614 if config == nil { 615 glog.Warningf("No info found for domain %q in metadata store. Volume cleanup skipped.", containerID) 616 return nil 617 } 618 619 diskList, err := newDiskList(config, v.volumeSource, v) 620 if err == nil { 621 err = diskList.teardown() 622 } 623 624 var errs []string 625 if err != nil { 626 glog.Errorf("Volume teardown failed for domain %q: %v", containerID, err) 627 errs = append(errs, err.Error()) 628 } 629 630 return nil 631 } 632 633 func (v *VirtualizationTool) removeDomain(containerID string, config *types.VMConfig, state types.ContainerState, failUponVolumeTeardownFailure bool) error { 634 // Give a chance to gracefully stop domain 635 // TODO: handle errors - there could be e.g. lost connection error 636 domain, err := v.domainConn.LookupDomainByUUIDString(containerID) 637 if err != nil && err != virt.ErrDomainNotFound { 638 return err 639 } 640 641 if domain != nil { 642 if state == types.ContainerState_CONTAINER_RUNNING { 643 if err := domain.Destroy(); err != nil { 644 return fmt.Errorf("failed to destroy the domain: %v", err) 645 } 646 } 647 648 if err := domain.Undefine(); err != nil { 649 return fmt.Errorf("error undefining the domain %q: %v", containerID, err) 650 } 651 652 // Wait until domain is really removed or timeout after 5 sec. 653 if err := utils.WaitLoop(func() (bool, error) { 654 if _, err := v.domainConn.LookupDomainByUUIDString(containerID); err == virt.ErrDomainNotFound { 655 return true, nil 656 } else if err != nil { 657 // Unexpected error occurred 658 return false, fmt.Errorf("error looking up domain %q: %v", containerID, err) 659 } 660 return false, nil 661 }, domainDestroyCheckInterval, domainDestroyTimeout, v.clock); err != nil { 662 return err 663 } 664 } 665 666 diskList, err := newDiskList(config, v.volumeSource, v) 667 if err == nil { 668 err = diskList.teardown() 669 } 670 671 switch { 672 case err == nil: 673 return nil 674 case failUponVolumeTeardownFailure: 675 return err 676 default: 677 glog.Warningf("Error during volume teardown for container %s: %v", containerID, err) 678 return nil 679 } 680 } 681 682 // RemoveContainer tries to gracefully stop domain, then forcibly removes it 683 // even if it's still running. 684 // It waits up to 5 sec for doing the job by libvirt. 685 func (v *VirtualizationTool) RemoveContainer(containerID string) error { 686 config, state, err := v.getVMConfigFromMetadata(containerID) 687 688 if err != nil { 689 return err 690 } 691 692 if config == nil { 693 glog.Warningf("No info found for domain %q in metadata store. Domain cleanup skipped", containerID) 694 return nil 695 } 696 697 if err := v.removeDomain(containerID, config, state, state == types.ContainerState_CONTAINER_CREATED || 698 state == types.ContainerState_CONTAINER_RUNNING); err != nil { 699 return err 700 } 701 702 if v.metadataStore.Container(containerID).Save( 703 func(_ *types.ContainerInfo) (*types.ContainerInfo, error) { 704 return nil, nil // delete container 705 }, 706 ); err != nil { 707 glog.Errorf("Error when removing container '%s' from metadata store: %v", containerID, err) 708 return err 709 } 710 711 return nil 712 } 713 714 func virtToKubeState(domainState virt.DomainState, lastState types.ContainerState) types.ContainerState { 715 var containerState types.ContainerState 716 717 switch domainState { 718 case virt.DomainStateShutdown: 719 // the domain is being shut down, but is still running 720 fallthrough 721 case virt.DomainStateRunning: 722 containerState = types.ContainerState_CONTAINER_RUNNING 723 case virt.DomainStatePaused: 724 if lastState == types.ContainerState_CONTAINER_CREATED { 725 containerState = types.ContainerState_CONTAINER_CREATED 726 } else { 727 containerState = types.ContainerState_CONTAINER_EXITED 728 } 729 case virt.DomainStateShutoff: 730 if lastState == types.ContainerState_CONTAINER_CREATED { 731 containerState = types.ContainerState_CONTAINER_CREATED 732 } else { 733 containerState = types.ContainerState_CONTAINER_EXITED 734 } 735 case virt.DomainStateCrashed: 736 containerState = types.ContainerState_CONTAINER_EXITED 737 case virt.DomainStatePMSuspended: 738 containerState = types.ContainerState_CONTAINER_EXITED 739 default: 740 containerState = types.ContainerState_CONTAINER_UNKNOWN 741 } 742 743 return containerState 744 } 745 746 func (v *VirtualizationTool) getPodContainer(podSandboxID string) (*types.ContainerInfo, error) { 747 // FIXME: is it possible for multiple containers to exist? 748 domainContainers, err := v.metadataStore.ListPodContainers(podSandboxID) 749 if err != nil { 750 // There's no such sandbox. Looks like it's already removed, so return an empty list 751 return nil, nil 752 } 753 for _, containerMeta := range domainContainers { 754 // TODO: Distinguish lack of domain from other errors 755 _, err := v.domainConn.LookupDomainByUUIDString(containerMeta.GetID()) 756 if err != nil { 757 // There's no such domain. Looks like it's already removed, so return an empty list 758 return nil, nil 759 } 760 761 // Verify if there is container metadata 762 containerInfo, err := containerMeta.Retrieve() 763 if err != nil { 764 return nil, err 765 } 766 if containerInfo == nil { 767 // There's no such container - looks like it's already removed, but still is mentioned in sandbox 768 return nil, fmt.Errorf("container metadata not found, but it's still mentioned in sandbox %s", podSandboxID) 769 } 770 771 return containerInfo, nil 772 } 773 return nil, nil 774 } 775 776 // ListContainers queries libvirt for domains denoted by container id or 777 // pod standbox id or for all domains and after gathering theirs description 778 // from metadata and conversion of status from libvirt to kubeapi compatible 779 // returns them as a list of kubeapi Containers. 780 func (v *VirtualizationTool) ListContainers(filter *types.ContainerFilter) ([]*types.ContainerInfo, error) { 781 var containers []*types.ContainerInfo 782 switch { 783 case filter != nil && filter.Id != "": 784 containerInfo, err := v.ContainerInfo(filter.Id) 785 if err != nil || containerInfo == nil { 786 return nil, err 787 } 788 containers = append(containers, containerInfo) 789 case filter != nil && filter.PodSandboxID != "": 790 containerInfo, err := v.getPodContainer(filter.PodSandboxID) 791 if err != nil || containerInfo == nil { 792 return nil, err 793 } 794 containers = append(containers, containerInfo) 795 default: 796 // Get list of all the defined domains from libvirt 797 // and check each container against the remaining 798 // filter settings 799 domains, err := v.domainConn.ListDomains() 800 if err != nil { 801 return nil, err 802 } 803 for _, domain := range domains { 804 containerID, err := domain.UUIDString() 805 if err != nil { 806 return nil, err 807 } 808 containerInfo, err := v.ContainerInfo(containerID) 809 if err != nil { 810 return nil, err 811 } 812 813 if containerInfo == nil { 814 glog.V(1).Infof("Failed to find info for domain with id %q in virtlet db, considering it a non-virtlet libvirt domain.", containerID) 815 continue 816 } 817 containers = append(containers, containerInfo) 818 } 819 } 820 821 if filter == nil { 822 return containers, nil 823 } 824 825 var r []*types.ContainerInfo 826 for _, c := range containers { 827 if filterContainer(c, *filter) { 828 r = append(r, c) 829 } 830 } 831 832 return r, nil 833 } 834 835 // ContainerInfo returns info for the specified container, making sure it's also 836 // present among libvirt domains. If it isn't, the function returns nil 837 func (v *VirtualizationTool) ContainerInfo(containerID string) (*types.ContainerInfo, error) { 838 domain, err := v.domainConn.LookupDomainByUUIDString(containerID) 839 if err != nil { 840 return nil, err 841 } 842 843 containerInfo, err := v.metadataStore.Container(containerID).Retrieve() 844 if err != nil { 845 return nil, err 846 } 847 if containerInfo == nil { 848 return nil, nil 849 } 850 851 state, err := domain.State() 852 if err != nil { 853 return nil, err 854 } 855 856 containerState := virtToKubeState(state, containerInfo.State) 857 if containerInfo.State != containerState { 858 if err := v.metadataStore.Container(containerID).Save( 859 func(c *types.ContainerInfo) (*types.ContainerInfo, error) { 860 // make sure the container is not removed during the call 861 if c != nil { 862 c.State = containerState 863 } 864 return c, nil 865 }, 866 ); err != nil { 867 return nil, err 868 } 869 containerInfo.State = containerState 870 } 871 return containerInfo, nil 872 } 873 874 // VMStats returns current cpu/memory/disk usage for VM 875 func (v *VirtualizationTool) VMStats(containerID string, name string) (*types.VMStats, error) { 876 domain, err := v.domainConn.LookupDomainByUUIDString(containerID) 877 if err != nil { 878 return nil, err 879 } 880 vs := types.VMStats{ 881 Timestamp: v.clock.Now().UnixNano(), 882 ContainerID: containerID, 883 Name: name, 884 } 885 886 rss, err := domain.GetRSS() 887 if err != nil { 888 return nil, err 889 } 890 vs.MemoryUsage = rss 891 892 cpuTime, err := domain.GetCPUTime() 893 if err != nil { 894 return nil, err 895 } 896 vs.CpuUsage = cpuTime 897 898 domainxml, err := domain.XML() 899 if err != nil { 900 return nil, err 901 } 902 903 rootDiskLocation := "" 904 for _, disk := range domainxml.Devices.Disks { 905 if disk.Source == nil || disk.Source.File == nil { 906 continue 907 } 908 fname := disk.Source.File.File 909 // TODO: split file name and use HasPrefix on last part 910 // instead of Contains 911 if strings.Contains(fname, "virtlet_root_") { 912 rootDiskLocation = fname 913 } 914 } 915 if rootDiskLocation == "" { 916 return nil, fmt.Errorf("cannot locate root disk in domain definition") 917 } 918 919 rootDiskSize, err := v.ImageManager().BytesUsedBy(rootDiskLocation) 920 if err != nil { 921 return nil, err 922 } 923 vs.FsBytes = rootDiskSize 924 925 glog.V(4).Infof("VMStats - cpu: %d, mem: %d, disk: %d, timestamp: %d", vs.CpuUsage, vs.MemoryUsage, vs.FsBytes, vs.Timestamp) 926 927 return &vs, nil 928 } 929 930 // ListVMStats returns statistics (same as VMStats) for all containers matching 931 // provided filter (id AND podstandboxid AND labels) 932 func (v *VirtualizationTool) ListVMStats(filter *types.VMStatsFilter) ([]types.VMStats, error) { 933 var containersFilter *types.ContainerFilter 934 if filter != nil { 935 containersFilter = &types.ContainerFilter{} 936 if filter.Id != "" { 937 containersFilter.Id = filter.Id 938 } 939 if filter.PodSandboxID != "" { 940 containersFilter.PodSandboxID = filter.PodSandboxID 941 } 942 if filter.LabelSelector != nil { 943 containersFilter.LabelSelector = filter.LabelSelector 944 } 945 } 946 947 infos, err := v.ListContainers(containersFilter) 948 if err != nil { 949 return nil, err 950 } 951 952 var statsList []types.VMStats 953 for _, info := range infos { 954 stats, err := v.VMStats(info.Id, info.Name) 955 if err != nil { 956 return nil, err 957 } 958 statsList = append(statsList, *stats) 959 } 960 return statsList, nil 961 } 962 963 // volumeOwner implementation follows 964 965 // StoragePool implements volumeOwner StoragePool method 966 func (v *VirtualizationTool) StoragePool() (virt.StoragePool, error) { 967 return ensureStoragePool(v.storageConn, v.config.VolumePoolName) 968 } 969 970 // DomainConnection implements volumeOwner DomainConnection method 971 func (v *VirtualizationTool) DomainConnection() virt.DomainConnection { return v.domainConn } 972 973 // StorageConnection implements volumeOwner StorageConnection method 974 func (v *VirtualizationTool) StorageConnection() virt.StorageConnection { return v.storageConn } 975 976 // ImageManager implements volumeOwner ImageManager method 977 func (v *VirtualizationTool) ImageManager() ImageManager { return v.imageManager } 978 979 // RawDevices implements volumeOwner RawDevices method 980 func (v *VirtualizationTool) RawDevices() []string { return v.config.RawDevices } 981 982 // KubeletRootDir implements volumeOwner KubeletRootDir method 983 func (v *VirtualizationTool) KubeletRootDir() string { return v.config.KubeletRootDir } 984 985 // VolumePoolName implements volumeOwner VolumePoolName method 986 func (v *VirtualizationTool) VolumePoolName() string { return v.config.VolumePoolName } 987 988 // FileSystem implements volumeOwner FileSystem method 989 func (v *VirtualizationTool) FileSystem() fs.FileSystem { return v.fsys } 990 991 // SharedFilesystemPath implements volumeOwner SharedFilesystemPath method 992 func (v *VirtualizationTool) SharedFilesystemPath() string { return v.config.SharedFilesystemPath } 993 994 // Commander implements volumeOwner Commander method 995 func (v *VirtualizationTool) Commander() utils.Commander { return v.commander } 996 997 func filterContainer(containerInfo *types.ContainerInfo, filter types.ContainerFilter) bool { 998 if filter.Id != "" && containerInfo.Id != filter.Id { 999 return false 1000 } 1001 1002 if filter.PodSandboxID != "" && containerInfo.Config.PodSandboxID != filter.PodSandboxID { 1003 return false 1004 } 1005 1006 if filter.State != nil && containerInfo.State != *filter.State { 1007 return false 1008 } 1009 if filter.LabelSelector != nil { 1010 sel := fields.SelectorFromSet(filter.LabelSelector) 1011 if !sel.Matches(fields.Set(containerInfo.Config.ContainerLabels)) { 1012 return false 1013 } 1014 } 1015 1016 return true 1017 }